Merge tag 'metag-v3.9-rc1-v4' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag

Pull new ImgTec Meta architecture from James Hogan:
 "This adds core architecture support for Imagination's Meta processor
  cores, followed by some later miscellaneous arch/metag cleanups and
  fixes which I kept separate to ease review:

   - Support for basic Meta 1 (ATP) and Meta 2 (HTP) core architecture
   - A few fixes all over, particularly for symbol prefixes
   - A few privilege protection fixes
   - Several cleanups (setup.c includes, split out a lot of
     metag_ksyms.c)
   - Fix some missing exports
   - Convert hugetlb to use vm_unmapped_area()
   - Copy device tree to non-init memory
   - Provide dma_get_sgtable()"

* tag 'metag-v3.9-rc1-v4' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag: (61 commits)
  metag: Provide dma_get_sgtable()
  metag: prom.h: remove declaration of metag_dt_memblock_reserve()
  metag: copy devicetree to non-init memory
  metag: cleanup metag_ksyms.c includes
  metag: move mm/init.c exports out of metag_ksyms.c
  metag: move usercopy.c exports out of metag_ksyms.c
  metag: move setup.c exports out of metag_ksyms.c
  metag: move kick.c exports out of metag_ksyms.c
  metag: move traps.c exports out of metag_ksyms.c
  metag: move irq enable out of irqflags.h on SMP
  genksyms: fix metag symbol prefix on crc symbols
  metag: hugetlb: convert to vm_unmapped_area()
  metag: export clear_page and copy_page
  metag: export metag_code_cache_flush_all
  metag: protect more non-MMU memory regions
  metag: make TXPRIVEXT bits explicit
  metag: kernel/setup.c: sort includes
  perf: Enable building perf tools for Meta
  metag: add boot time LNKGET/LNKSET check
  metag: add __init to metag_cache_probe()
  ...
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 0f3e8bb..45b3df9 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -299,6 +299,8 @@
 	- Hotpluggable memory support, how to use and current status.
 memory.txt
 	- info on typical Linux memory problems.
+metag/
+	- directory with info about Linux on Meta architecture.
 mips/
 	- directory with info about Linux on MIPS architecture.
 misc-devices/
diff --git a/Documentation/devicetree/bindings/metag/meta-intc.txt b/Documentation/devicetree/bindings/metag/meta-intc.txt
new file mode 100644
index 0000000..8c47dcb
--- /dev/null
+++ b/Documentation/devicetree/bindings/metag/meta-intc.txt
@@ -0,0 +1,82 @@
+* Meta External Trigger Controller Binding
+
+This binding specifies what properties must be available in the device tree
+representation of a Meta external trigger controller.
+
+Required properties:
+
+    - compatible: Specifies the compatibility list for the interrupt controller.
+      The type shall be <string> and the value shall include "img,meta-intc".
+
+    - num-banks: Specifies the number of interrupt banks (each of which can
+      handle 32 interrupt sources).
+
+    - interrupt-controller: The presence of this property identifies the node
+      as an interupt controller. No property value shall be defined.
+
+    - #interrupt-cells: Specifies the number of cells needed to encode an
+      interrupt source. The type shall be a <u32> and the value shall be 2.
+
+    - #address-cells: Specifies the number of cells needed to encode an
+      address. The type shall be <u32> and the value shall be 0. As such,
+      'interrupt-map' nodes do not have to specify a parent unit address.
+
+Optional properties:
+
+    - no-mask: The controller doesn't have any mask registers.
+
+* Interrupt Specifier Definition
+
+  Interrupt specifiers consists of 2 cells encoded as follows:
+
+    - <1st-cell>: The interrupt-number that identifies the interrupt source.
+
+    - <2nd-cell>: The Linux interrupt flags containing level-sense information,
+                  encoded as follows:
+                    1 = edge triggered
+                    4 = level-sensitive
+
+* Examples
+
+Example 1:
+
+	/*
+	 * Meta external trigger block
+	 */
+	intc: intc {
+		// This is an interrupt controller node.
+		interrupt-controller;
+
+		// No address cells so that 'interrupt-map' nodes which
+		// reference this interrupt controller node do not need a parent
+		// address specifier.
+		#address-cells = <0>;
+
+		// Two cells to encode interrupt sources.
+		#interrupt-cells = <2>;
+
+		// Number of interrupt banks
+		num-banks = <2>;
+
+		// No HWMASKEXT is available (specify on Chorus2 and Comet ES1)
+		no-mask;
+
+		// Compatible with Meta hardware trigger block.
+		compatible = "img,meta-intc";
+	};
+
+Example 2:
+
+	/*
+	 * An interrupt generating device that is wired to a Meta external
+	 * trigger block.
+	 */
+	uart1: uart@0x02004c00 {
+		// Interrupt source '5' that is level-sensitive.
+		// Note that there are only two cells as specified in the
+		// interrupt parent's '#interrupt-cells' property.
+		interrupts = <5 4 /* level */>;
+
+		// The interrupt controller that this device is wired to.
+		interrupt-parent = <&intc>;
+	};
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 3a54fca..4609e81 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -978,6 +978,10 @@
 			       If specified, z/VM IUCV HVC accepts connections
 			       from listed z/VM user IDs only.
 
+	hwthread_map=	[METAG] Comma-separated list of Linux cpu id to
+			        hardware thread id mappings.
+				Format: <cpu>:<hwthread>
+
 	keep_bootcon	[KNL]
 			Do not unregister boot console at start. This is only
 			useful for debugging when something happens in the window
diff --git a/Documentation/metag/00-INDEX b/Documentation/metag/00-INDEX
new file mode 100644
index 0000000..db11c51
--- /dev/null
+++ b/Documentation/metag/00-INDEX
@@ -0,0 +1,4 @@
+00-INDEX
+	- this file
+kernel-ABI.txt
+	- Documents metag ABI details
diff --git a/Documentation/metag/kernel-ABI.txt b/Documentation/metag/kernel-ABI.txt
new file mode 100644
index 0000000..7b8dee8
--- /dev/null
+++ b/Documentation/metag/kernel-ABI.txt
@@ -0,0 +1,256 @@
+			==========================
+			KERNEL ABIS FOR METAG ARCH
+			==========================
+
+This document describes the Linux ABIs for the metag architecture, and has the
+following sections:
+
+ (*) Outline of registers
+ (*) Userland registers
+ (*) Kernel registers
+ (*) System call ABI
+ (*) Calling conventions
+
+
+====================
+OUTLINE OF REGISTERS
+====================
+
+The main Meta core registers are arranged in units:
+
+	UNIT	Type	DESCRIPTION	GP	EXT	PRIV	GLOBAL
+	=======	=======	===============	=======	=======	=======	=======
+	CT	Special	Control unit
+	D0	General	Data unit 0	0-7	8-15	16-31	16-31
+	D1	General	Data unit 1	0-7	8-15	16-31	16-31
+	A0	General	Address unit 0	0-3	4-7	 8-15	 8-15
+	A1	General	Address unit 1	0-3	4-7	 8-15	 8-15
+	PC	Special	PC unit		0		 1
+	PORT	Special	Ports
+	TR	Special	Trigger unit			 0-7
+	TT	Special	Trace unit			 0-5
+	FX	General	FP unit			0-15
+
+GP registers form part of the main context.
+
+Extended context registers (EXT) may not be present on all hardware threads and
+can be context switched if support is enabled and the appropriate bits are set
+in e.g. the D0.8 register to indicate what extended state to preserve.
+
+Global registers are shared between threads and are privilege protected.
+
+See arch/metag/include/asm/metag_regs.h for definitions relating to core
+registers and the fields and bits they contain. See the TRMs for further details
+about special registers.
+
+Several special registers are preserved in the main context, these are the
+interesting ones:
+
+	REG	(ALIAS)		PURPOSE
+	=======================	===============================================
+	CT.1	(TXMODE)	Processor mode bits (particularly for DSP)
+	CT.2	(TXSTATUS)	Condition flags and LSM_STEP (MGET/MSET step)
+	CT.3	(TXRPT)		Branch repeat counter
+	PC.0	(PC)		Program counter
+
+Some of the general registers have special purposes in the ABI and therefore
+have aliases:
+
+	D0 REG	(ALIAS)	PURPOSE		D1 REG	(ALIAS)	PURPOSE
+	===============	===============	===============	=======================
+	D0.0	(D0Re0)	32bit result	D1.0	(D1Re0)	Top half of 64bit result
+	D0.1	(D0Ar6)	Argument 6	D1.1	(D1Ar5)	Argument 5
+	D0.2	(D0Ar4)	Argument 4	D1.2	(D1Ar3)	Argument 3
+	D0.3	(D0Ar2)	Argument 2	D1.3	(D1Ar1)	Argument 1
+	D0.4	(D0FrT)	Frame temp	D1.4	(D1RtP)	Return pointer
+	D0.5		Call preserved	D1.5		Call preserved
+	D0.6		Call preserved	D1.6		Call preserved
+	D0.7		Call preserved	D1.7		Call preserved
+
+	A0 REG	(ALIAS)	PURPOSE		A1 REG	(ALIAS)	PURPOSE
+	===============	===============	===============	=======================
+	A0.0	(A0StP)	Stack pointer	A1.0	(A1GbP)	Global base pointer
+	A0.1	(A0FrP)	Frame pointer	A1.1	(A1LbP)	Local base pointer
+	A0.2				A1.2
+	A0.3				A1.3
+
+
+==================
+USERLAND REGISTERS
+==================
+
+All the general purpose D0, D1, A0, A1 registers are preserved when entering the
+kernel (including asynchronous events such as interrupts and timer ticks) except
+the following which have special purposes in the ABI:
+
+	REGISTERS	WHEN	STATUS		PURPOSE
+	===============	=======	===============	===============================
+	D0.8		DSP	Preserved	ECH, determines what extended
+						DSP state to preserve.
+	A0.0	(A0StP)	ALWAYS	Preserved	Stack >= A0StP may be clobbered
+						at any time by the creation of a
+						signal frame.
+	A1.0	(A1GbP)	SMP	Clobbered	Used as temporary for loading
+						kernel stack pointer and saving
+						core context.
+	A0.15		!SMP	Protected	Stores kernel stack pointer.
+	A1.15		ALWAYS	Protected	Stores kernel base pointer.
+
+On UP A0.15 is used to store the kernel stack pointer for storing the userland
+context. A0.15 is global between hardware threads though which means it cannot
+be used on SMP for this purpose. Since no protected local registers are
+available A1GbP is reserved for use as a temporary to allow a percpu stack
+pointer to be loaded for storing the rest of the context.
+
+
+================
+KERNEL REGISTERS
+================
+
+When in the kernel the following registers have special purposes in the ABI:
+
+	REGISTERS	WHEN	STATUS		PURPOSE
+	===============	=======	===============	===============================
+	A0.0	(A0StP)	ALWAYS	Preserved	Stack >= A0StP may be clobbered
+						at any time by the creation of
+						an irq signal frame.
+	A1.0	(A1GbP)	ALWAYS	Preserved	Reserved (kernel base pointer).
+
+
+===============
+SYSTEM CALL ABI
+===============
+
+When a system call is made, the following registers are effective:
+
+	REGISTERS	CALL			RETURN
+	===============	=======================	===============================
+	D0.0	(D0Re0)				Return value (or -errno)
+	D1.0	(D1Re0)	System call number	Clobbered
+	D0.1	(D0Ar6)	Syscall arg #6		Preserved
+	D1.1	(D1Ar5)	Syscall arg #5		Preserved
+	D0.2	(D0Ar4)	Syscall arg #4		Preserved
+	D1.2	(D1Ar3)	Syscall arg #3		Preserved
+	D0.3	(D0Ar2)	Syscall arg #2		Preserved
+	D1.3	(D1Ar1)	Syscall arg #1		Preserved
+
+Due to the limited number of argument registers and some system calls with badly
+aligned 64-bit arguments, 64-bit values are always packed in consecutive
+arguments, even if this is contrary to the normal calling conventions (where the
+two halves would go in a matching pair of data registers).
+
+For example fadvise64_64 usually has the signature:
+
+	long sys_fadvise64_64(i32 fd, i64 offs, i64 len, i32 advice);
+
+But for metag fadvise64_64 is wrapped so that the 64-bit arguments are packed:
+
+	long sys_fadvise64_64_metag(i32 fd,      i32 offs_lo,
+				    i32 offs_hi, i32 len_lo,
+				    i32 len_hi,  i32 advice)
+
+So the arguments are packed in the registers like this:
+
+	D0 REG	(ALIAS)	VALUE		D1 REG	(ALIAS)	VALUE
+	===============	===============	===============	=======================
+	D0.1	(D0Ar6)	advice		D1.1	(D1Ar5)	hi(len)
+	D0.2	(D0Ar4)	lo(len)		D1.2	(D1Ar3)	hi(offs)
+	D0.3	(D0Ar2)	lo(offs)	D1.3	(D1Ar1)	fd
+
+
+===================
+CALLING CONVENTIONS
+===================
+
+These calling conventions apply to both user and kernel code. The stack grows
+from low addresses to high addresses in the metag ABI. The stack pointer (A0StP)
+should always point to the next free address on the stack and should at all
+times be 64-bit aligned. The following registers are effective at the point of a
+call:
+
+	REGISTERS	CALL			RETURN
+	===============	=======================	===============================
+	D0.0	(D0Re0)				32bit return value
+	D1.0	(D1Re0)				Upper half of 64bit return value
+	D0.1	(D0Ar6)	32bit argument #6	Clobbered
+	D1.1	(D1Ar5)	32bit argument #5	Clobbered
+	D0.2	(D0Ar4)	32bit argument #4	Clobbered
+	D1.2	(D1Ar3)	32bit argument #3	Clobbered
+	D0.3	(D0Ar2)	32bit argument #2	Clobbered
+	D1.3	(D1Ar1)	32bit argument #1	Clobbered
+	D0.4	(D0FrT)				Clobbered
+	D1.4	(D1RtP)	Return pointer		Clobbered
+	D{0-1}.{5-7}				Preserved
+	A0.0	(A0StP)	Stack pointer		Preserved
+	A1.0	(A0GbP)				Preserved
+	A0.1	(A0FrP)	Frame pointer		Preserved
+	A1.1	(A0LbP)				Preserved
+	A{0-1},{2-3}				Clobbered
+
+64-bit arguments are placed in matching pairs of registers (i.e. the same
+register number in both D0 and D1 units), with the least significant half in D0
+and the most significant half in D1, leaving a gap where necessary. Futher
+arguments are stored on the stack in reverse order (earlier arguments at higher
+addresses):
+
+	ADDRESS		0     1     2     3	4     5     6     7
+	===============	===== ===== ===== =====	===== ===== ===== =====
+	A0StP       -->
+	A0StP-0x08	32bit argument #8	32bit argument #7
+	A0StP-0x10	32bit argument #10	32bit argument #9
+
+Function prologues tend to look a bit like this:
+
+	/* If frame pointer in use, move it to frame temp register so it can be
+	   easily pushed onto stack */
+	MOV	D0FrT,A0FrP
+
+	/* If frame pointer in use, set it to stack pointer */
+	ADD	A0FrP,A0StP,#0
+
+	/* Preserve D0FrT, D1RtP, D{0-1}.{5-7} on stack, incrementing A0StP */
+	MSETL	[A0StP++],D0FrT,D0.5,D0.6,D0.7
+
+	/* Allocate some stack space for local variables */
+	ADD	A0StP,A0StP,#0x10
+
+At this point the stack would look like this:
+
+	ADDRESS		0     1     2     3	4     5     6     7
+	===============	===== ===== ===== =====	===== ===== ===== =====
+	A0StP       -->
+	A0StP-0x08
+	A0StP-0x10
+	A0StP-0x18	Old D0.7		Old D1.7
+	A0StP-0x20	Old D0.6		Old D1.6
+	A0StP-0x28	Old D0.5		Old D1.5
+	A0FrP       -->	Old A0FrP (frame ptr)	Old D1RtP (return ptr)
+	A0FrP-0x08	32bit argument #8	32bit argument #7
+	A0FrP-0x10	32bit argument #10	32bit argument #9
+
+Function epilogues tend to differ depending on the use of a frame pointer. An
+example of a frame pointer epilogue:
+
+	/* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack, incrementing A0FrP */
+	MGETL	D0FrT,D0.5,D0.6,D0.7,[A0FrP++]
+	/* Restore stack pointer to where frame pointer was before increment */
+	SUB	A0StP,A0FrP,#0x20
+	/* Restore frame pointer from frame temp */
+	MOV	A0FrP,D0FrT
+	/* Return to caller via restored return pointer */
+	MOV	PC,D1RtP
+
+If the function hasn't touched the frame pointer, MGETL cannot be safely used
+with A0StP as it always increments and that would expose the stack to clobbering
+by interrupts (kernel) or signals (user). Therefore it's common to see the MGETL
+split into separate GETL instructions:
+
+	/* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack */
+	GETL	D0FrT,D1RtP,[A0StP+#-0x30]
+	GETL	D0.5,D1.5,[A0StP+#-0x28]
+	GETL	D0.6,D1.6,[A0StP+#-0x20]
+	GETL	D0.7,D1.7,[A0StP+#-0x18]
+	/* Restore stack pointer */
+	SUB	A0StP,A0StP,#0x30
+	/* Return to caller via restored return pointer */
+	MOV	PC,D1RtP
diff --git a/MAINTAINERS b/MAINTAINERS
index aea0adf..e95b1e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5204,6 +5204,18 @@
 F:	include/linux/mtd/
 F:	include/uapi/mtd/
 
+METAG ARCHITECTURE
+M:	James Hogan <james.hogan@imgtec.com>
+S:	Supported
+F:	arch/metag/
+F:	Documentation/metag/
+F:	Documentation/devicetree/bindings/metag/
+F:	drivers/clocksource/metag_generic.c
+F:	drivers/irqchip/irq-metag.c
+F:	drivers/irqchip/irq-metag-ext.c
+F:	drivers/tty/metag_da.c
+F:	fs/imgdafs/
+
 MICROBLAZE ARCHITECTURE
 M:	Michal Simek <monstr@monstr.eu>
 L:	microblaze-uclinux@itee.uq.edu.au (moderated for non-subscribers)
diff --git a/arch/Kconfig b/arch/Kconfig
index dcd91a8..5a1779c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -103,6 +103,22 @@
 
 	  If in doubt, say "N".
 
+config HAVE_64BIT_ALIGNED_ACCESS
+	def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS
+	help
+	  Some architectures require 64 bit accesses to be 64 bit
+	  aligned, which also requires structs containing 64 bit values
+	  to be 64 bit aligned too. This includes some 32 bit
+	  architectures which can do 64 bit accesses, as well as 64 bit
+	  architectures without unaligned access.
+
+	  This symbol should be selected by an architecture if 64 bit
+	  accesses are required to be 64 bit aligned in this way even
+	  though it is not a 64 bit architecture.
+
+	  See Documentation/unaligned-memory-access.txt for more
+	  information on the topic of unaligned memory accesses.
+
 config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	bool
 	help
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
new file mode 100644
index 0000000..afc8973
--- /dev/null
+++ b/arch/metag/Kconfig
@@ -0,0 +1,290 @@
+config SYMBOL_PREFIX
+	string
+	default "_"
+
+config METAG
+	def_bool y
+	select EMBEDDED
+	select GENERIC_ATOMIC64
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_IRQ_SHOW
+	select GENERIC_SMP_IDLE_THREAD
+	select HAVE_64BIT_ALIGNED_ACCESS
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_C_RECORDMCOUNT
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	select HAVE_GENERIC_HARDIRQS
+	select HAVE_KERNEL_BZIP2
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZO
+	select HAVE_KERNEL_XZ
+	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_PERF_EVENTS
+	select HAVE_SYSCALL_TRACEPOINTS
+	select IRQ_DOMAIN
+	select MODULES_USE_ELF_RELA
+	select OF
+	select OF_EARLY_FLATTREE
+	select SPARSE_IRQ
+
+config STACKTRACE_SUPPORT
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config HAVE_LATENCYTOP_SUPPORT
+	def_bool y
+
+config RWSEM_GENERIC_SPINLOCK
+	def_bool y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config GENERIC_CALIBRATE_DELAY
+	def_bool y
+
+config GENERIC_GPIO
+	def_bool n
+
+config NO_IOPORT
+	def_bool y
+
+source "init/Kconfig"
+
+source "kernel/Kconfig.freezer"
+
+menu "Processor type and features"
+
+config MMU
+	def_bool y
+
+config STACK_GROWSUP
+	def_bool y
+
+config HOTPLUG_CPU
+	bool "Enable CPU hotplug support"
+	depends on SMP
+	help
+	  Say Y here to allow turning CPUs off and on. CPUs can be
+	  controlled through /sys/devices/system/cpu.
+
+	  Say N if you want to disable CPU hotplug.
+
+config HIGHMEM
+	bool "High Memory Support"
+	help
+	  The address space of Meta processors is only 4 Gigabytes large
+	  and it has to accommodate user address space, kernel address
+	  space as well as some memory mapped IO. That means that, if you
+	  have a large amount of physical memory and/or IO, not all of the
+	  memory can be "permanently mapped" by the kernel. The physical
+	  memory that is not permanently mapped is called "high memory".
+
+	  Depending on the selected kernel/user memory split, minimum
+	  vmalloc space and actual amount of RAM, you may not need this
+	  option which should result in a slightly faster kernel.
+
+	  If unsure, say n.
+
+source "arch/metag/mm/Kconfig"
+
+source "arch/metag/Kconfig.soc"
+
+config METAG_META12
+	bool
+	help
+	  Select this from the SoC config symbol to indicate that it contains a
+	  Meta 1.2 core.
+
+config METAG_META21
+	bool
+	help
+	  Select this from the SoC config symbol to indicate that it contains a
+	  Meta 2.1 core.
+
+config SMP
+	bool "Symmetric multi-processing support"
+	depends on METAG_META21 && METAG_META21_MMU
+	select USE_GENERIC_SMP_HELPERS
+	help
+	  This enables support for systems with more than one thread running
+	  Linux. If you have a system with only one thread running Linux,
+	  say N. Otherwise, say Y.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-4)" if SMP
+	range 2 4 if SMP
+	default "1" if !SMP
+	default "4" if SMP
+
+config METAG_SMP_WRITE_REORDERING
+	bool
+	help
+	  This attempts to prevent cache-memory incoherence due to external
+	  reordering of writes from different hardware threads when SMP is
+	  enabled. It adds fences (system event 0) to smp_mb and smp_rmb in an
+	  attempt to catch some of the cases, and also before writes to shared
+	  memory in LOCK1 protected atomics and spinlocks.
+	  This will not completely prevent cache incoherency on affected cores.
+
+config METAG_LNKGET_AROUND_CACHE
+	bool
+	depends on METAG_META21
+	help
+	  This indicates that the LNKGET/LNKSET instructions go around the
+	  cache, which requires some extra cache flushes when the memory needs
+	  to be accessed by normal GET/SET instructions too.
+
+choice
+	prompt "Atomicity primitive"
+	default METAG_ATOMICITY_LNKGET
+	help
+	  This option selects the mechanism for performing atomic operations.
+
+config METAG_ATOMICITY_IRQSOFF
+	depends on !SMP
+	bool "irqsoff"
+	help
+	  This option disables interrupts to achieve atomicity. This mechanism
+	  is not SMP-safe.
+
+config METAG_ATOMICITY_LNKGET
+	depends on METAG_META21
+	bool "lnkget/lnkset"
+	help
+	  This option uses the LNKGET and LNKSET instructions to achieve
+	  atomicity. LNKGET/LNKSET are load-link/store-conditional instructions.
+	  Choose this option if your system requires low latency.
+
+config METAG_ATOMICITY_LOCK1
+	depends on SMP
+	bool "lock1"
+	help
+	  This option uses the LOCK1 instruction for atomicity. This is mainly
+	  provided as a debugging aid if the lnkget/lnkset atomicity primitive
+	  isn't working properly.
+
+endchoice
+
+config METAG_FPU
+	bool "FPU Support"
+	depends on METAG_META21
+	default y
+	help
+	  This option allows processes to use FPU hardware available with this
+	  CPU. If this option is not enabled FPU registers will not be saved
+	  and restored on context-switch.
+
+	  If you plan on running programs which are compiled to use hard floats
+	  say Y here.
+
+config METAG_DSP
+	bool "DSP Support"
+	help
+	  This option allows processes to use DSP hardware available
+	  with this CPU. If this option is not enabled DSP registers
+	  will not be saved and restored on context-switch.
+
+	  If you plan on running DSP programs say Y here.
+
+config METAG_PERFCOUNTER_IRQS
+	bool "PerfCounters interrupt support"
+	depends on METAG_META21
+	help
+	  This option enables using interrupts to collect information from
+	  Performance Counters. This option is supported in new META21
+	  (starting from HTP265).
+
+	  When disabled, Performance Counters information will be collected
+	  based on Timer Interrupt.
+
+config METAG_DA
+	bool "DA support"
+	help
+	  Say Y if you plan to use a DA debug adapter with Linux. The presence
+	  of the DA will be detected automatically at boot, so it is safe to say
+	  Y to this option even when booting without a DA.
+
+	  This enables support for services provided by DA JTAG debug adapters,
+	  such as:
+	  - communication over DA channels (such as the console driver).
+	  - use of the DA filesystem.
+
+menu "Boot options"
+
+config METAG_BUILTIN_DTB
+	bool "Embed DTB in kernel image"
+	default y
+	help
+	  Embeds a device tree binary in the kernel image.
+
+config METAG_BUILTIN_DTB_NAME
+	string "Built in DTB"
+	depends on METAG_BUILTIN_DTB
+	help
+	  Set the name of the DTB to embed (leave blank to pick one
+	  automatically based on kernel configuration).
+
+config CMDLINE_BOOL
+	bool "Default bootloader kernel arguments"
+
+config CMDLINE
+	string "Kernel command line"
+	depends on CMDLINE_BOOL
+	help
+	  On some architectures there is currently no way for the boot loader
+	  to pass arguments to the kernel. For these architectures, you should
+	  supply some command-line options at build time by entering them
+	  here.
+
+config CMDLINE_FORCE
+	bool "Force default kernel command string"
+	depends on CMDLINE_BOOL
+	help
+	  Set this to have arguments from the default kernel command string
+	  override those passed by the boot loader.
+
+endmenu
+
+source "kernel/Kconfig.preempt"
+
+source kernel/Kconfig.hz
+
+endmenu
+
+menu "Power management options"
+
+source kernel/power/Kconfig
+
+endmenu
+
+menu "Executable file formats"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/metag/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff --git a/arch/metag/Kconfig.debug b/arch/metag/Kconfig.debug
new file mode 100644
index 0000000..e45bbf6
--- /dev/null
+++ b/arch/metag/Kconfig.debug
@@ -0,0 +1,40 @@
+menu "Kernel hacking"
+
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
+source "lib/Kconfig.debug"
+
+config DEBUG_STACKOVERFLOW
+	bool "Check for stack overflows"
+	depends on DEBUG_KERNEL
+	help
+	  This option will cause messages to be printed if free stack space
+	  drops below a certain limit.
+
+config 4KSTACKS
+	bool "Use 4Kb for kernel stacks instead of 8Kb"
+	depends on DEBUG_KERNEL
+	help
+	  If you say Y here the kernel will use a 4Kb stacksize for the
+	  kernel stack attached to each process/thread. This facilitates
+	  running more threads on a system and also reduces the pressure
+	  on the VM subsystem for higher order allocations. This option
+	  will also use IRQ stacks to compensate for the reduced stackspace.
+
+config METAG_FUNCTION_TRACE
+	bool "Output Meta real-time trace data for function entry/exit"
+	help
+	  If you say Y here the kernel will use the Meta hardware trace
+	  unit to output information about function entry and exit that
+	  can be used by a debugger for profiling and call-graphs.
+
+config METAG_POISON_CATCH_BUFFERS
+	bool "Poison catch buffer contents on kernel entry"
+	help
+	  If you say Y here the kernel will write poison data to the
+	  catch buffer registers on kernel entry. This will make any
+	  problem with catch buffer handling much more apparent.
+
+endmenu
diff --git a/arch/metag/Kconfig.soc b/arch/metag/Kconfig.soc
new file mode 100644
index 0000000..ec079cf
--- /dev/null
+++ b/arch/metag/Kconfig.soc
@@ -0,0 +1,55 @@
+choice
+	prompt "SoC Type"
+	default META21_FPGA
+
+config META12_FPGA
+	bool "Meta 1.2 FPGA"
+	select METAG_META12
+	help
+	  This is a Meta 1.2 FPGA bitstream, just a bare CPU.
+
+config META21_FPGA
+	bool "Meta 2.1 FPGA"
+	select METAG_META21
+	help
+	  This is a Meta 2.1 FPGA bitstream, just a bare CPU.
+
+endchoice
+
+menu "SoC configuration"
+
+if METAG_META21
+
+# Meta 2.x specific options
+
+config METAG_META21_MMU
+	bool "Meta 2.x MMU mode"
+	default y
+	help
+	  Use the Meta 2.x MMU in extended mode.
+
+config METAG_UNALIGNED
+	bool "Meta 2.x unaligned access checking"
+	default y
+	help
+	  All memory accesses will be checked for alignment and an exception
+	  raised on unaligned accesses. This feature does cost performance
+	  but without it there will be no notification of this type of error.
+
+config METAG_USER_TCM
+	bool "Meta on-chip memory support for userland"
+	select GENERIC_ALLOCATOR
+	default y
+	help
+	  Allow the on-chip memories of Meta SoCs to be used by user
+	  applications.
+
+endif
+
+config METAG_HALT_ON_PANIC
+	bool "Halt the core on panic"
+	help
+	  Halt the core when a panic occurs. This is useful when running
+	  pre-production silicon or in an FPGA environment.
+
+endmenu
diff --git a/arch/metag/Makefile b/arch/metag/Makefile
new file mode 100644
index 0000000..81bd6a1
--- /dev/null
+++ b/arch/metag/Makefile
@@ -0,0 +1,87 @@
+#
+# metag/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" cleaning up for this architecture.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#               2007,2008,2012 by Imagination Technologies Ltd.
+#
+
+LDFLAGS					:=
+OBJCOPYFLAGS				:= -O binary -R .note -R .comment -S
+
+checkflags-$(CONFIG_METAG_META12)	+= -DMETAC_1_2
+checkflags-$(CONFIG_METAG_META21)	+= -DMETAC_2_1
+CHECKFLAGS				+= -D__metag__ $(checkflags-y)
+
+KBUILD_DEFCONFIG			:= meta2_defconfig
+
+sflags-$(CONFIG_METAG_META12)		+= -mmetac=1.2
+ifeq ($(CONFIG_METAG_META12),y)
+# Only use TBI API 1.4 if DSP is enabled for META12 cores
+sflags-$(CONFIG_METAG_DSP)		+= -DTBI_1_4
+endif
+sflags-$(CONFIG_METAG_META21)		+= -mmetac=2.1 -DTBI_1_4
+
+cflags-$(CONFIG_METAG_FUNCTION_TRACE)	+= -mhwtrace-leaf -mhwtrace-retpc
+cflags-$(CONFIG_METAG_META21)		+= -mextensions=bex
+
+KBUILD_CFLAGS				+= -pipe
+KBUILD_CFLAGS				+= -ffunction-sections
+
+KBUILD_CFLAGS				+= $(sflags-y) $(cflags-y)
+KBUILD_AFLAGS				+= $(sflags-y)
+
+LDFLAGS_vmlinux				:= $(ldflags-y)
+
+head-y					:= arch/metag/kernel/head.o
+
+core-y					+= arch/metag/boot/dts/
+core-y					+= arch/metag/kernel/
+core-y					+= arch/metag/mm/
+
+libs-y					+= arch/metag/lib/
+libs-y					+= arch/metag/tbx/
+
+boot					:= arch/metag/boot
+
+boot_targets				+= uImage
+boot_targets				+= uImage.gz
+boot_targets				+= uImage.bz2
+boot_targets				+= uImage.xz
+boot_targets				+= uImage.lzo
+boot_targets				+= uImage.bin
+boot_targets				+= vmlinux.bin
+
+PHONY					+= $(boot_targets)
+
+all: vmlinux.bin
+
+$(boot_targets): vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+%.dtb %.dtb.S %.dtb.o: scripts
+	$(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
+
+dtbs: scripts
+	$(Q)$(MAKE) $(build)=$(boot)/dts dtbs
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+define archhelp
+  echo  '* vmlinux.bin	- Binary kernel image (arch/$(ARCH)/boot/vmlinux.bin)'
+  @echo '  uImage  	- Alias to bootable U-Boot image'
+  @echo '  uImage.bin	- Kernel-only image for U-Boot (bin)'
+  @echo '  uImage.gz	- Kernel-only image for U-Boot (gzip)'
+  @echo '  uImage.bz2	- Kernel-only image for U-Boot (bzip2)'
+  @echo '  uImage.xz	- Kernel-only image for U-Boot (xz)'
+  @echo '  uImage.lzo	- Kernel-only image for U-Boot (lzo)'
+  @echo '  dtbs		- Build device tree blobs for enabled boards'
+endef
diff --git a/arch/metag/boot/.gitignore b/arch/metag/boot/.gitignore
new file mode 100644
index 0000000..a021da2
--- /dev/null
+++ b/arch/metag/boot/.gitignore
@@ -0,0 +1,4 @@
+vmlinux*
+uImage*
+ramdisk.*
+*.dtb
diff --git a/arch/metag/boot/Makefile b/arch/metag/boot/Makefile
new file mode 100644
index 0000000..5a1f88c
--- /dev/null
+++ b/arch/metag/boot/Makefile
@@ -0,0 +1,68 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2007,2012  Imagination Technologies Ltd.
+#
+
+suffix-y := bin
+suffix-$(CONFIG_KERNEL_GZIP)	:= gz
+suffix-$(CONFIG_KERNEL_BZIP2)	:= bz2
+suffix-$(CONFIG_KERNEL_XZ)	:= xz
+suffix-$(CONFIG_KERNEL_LZO)	:= lzo
+
+targets += vmlinux.bin
+targets += uImage
+targets += uImage.gz
+targets += uImage.bz2
+targets += uImage.xz
+targets += uImage.lzo
+targets += uImage.bin
+
+extra-y += vmlinux.bin
+extra-y += vmlinux.bin.gz
+extra-y += vmlinux.bin.bz2
+extra-y += vmlinux.bin.xz
+extra-y += vmlinux.bin.lzo
+
+UIMAGE_LOADADDR = $(CONFIG_PAGE_OFFSET)
+
+ifeq ($(CONFIG_FUNCTION_TRACER),y)
+orig_cflags := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg, , $(orig_cflags))
+endif
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,bzip2)
+
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,xzkern)
+
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lzo)
+
+$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,uimage,gzip)
+
+$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 FORCE
+	$(call if_changed,uimage,bzip2)
+
+$(obj)/uImage.xz: $(obj)/vmlinux.bin.xz FORCE
+	$(call if_changed,uimage,xz)
+
+$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo FORCE
+	$(call if_changed,uimage,lzo)
+
+$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,uimage,none)
+
+$(obj)/uImage: $(obj)/uImage.$(suffix-y)
+	@ln -sf $(notdir $<) $@
+	@echo '  Image $@ is ready'
diff --git a/arch/metag/boot/dts/Makefile b/arch/metag/boot/dts/Makefile
new file mode 100644
index 0000000..e0b5afd
--- /dev/null
+++ b/arch/metag/boot/dts/Makefile
@@ -0,0 +1,16 @@
+dtb-y	+= skeleton.dtb
+
+# Built-in dtb
+builtindtb-y				:= skeleton
+
+ifneq ($(CONFIG_METAG_BUILTIN_DTB_NAME),"")
+	builtindtb-y			:= $(CONFIG_METAG_BUILTIN_DTB_NAME)
+endif
+obj-$(CONFIG_METAG_BUILTIN_DTB)	+= $(patsubst "%",%,$(builtindtb-y)).dtb.o
+
+targets	+= dtbs
+targets	+= $(dtb-y)
+
+dtbs: $(addprefix $(obj)/, $(dtb-y))
+
+clean-files += *.dtb
diff --git a/arch/metag/boot/dts/skeleton.dts b/arch/metag/boot/dts/skeleton.dts
new file mode 100644
index 0000000..7244d1f
--- /dev/null
+++ b/arch/metag/boot/dts/skeleton.dts
@@ -0,0 +1,10 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
diff --git a/arch/metag/boot/dts/skeleton.dtsi b/arch/metag/boot/dts/skeleton.dtsi
new file mode 100644
index 0000000..78229ea
--- /dev/null
+++ b/arch/metag/boot/dts/skeleton.dtsi
@@ -0,0 +1,14 @@
+/*
+ * Skeleton device tree; the bare minimum needed to boot; just include and
+ * add a compatible value.  The bootloader will typically populate the memory
+ * node.
+ */
+
+/ {
+	compatible = "img,meta";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	chosen { };
+	aliases { };
+	memory { device_type = "memory"; reg = <0 0>; };
+};
diff --git a/arch/metag/configs/meta1_defconfig b/arch/metag/configs/meta1_defconfig
new file mode 100644
index 0000000..c35a75e
--- /dev/null
+++ b/arch/metag/configs/meta1_defconfig
@@ -0,0 +1,40 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_META12_FPGA=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/configs/meta2_defconfig b/arch/metag/configs/meta2_defconfig
new file mode 100644
index 0000000..fb31484
--- /dev/null
+++ b/arch/metag/configs/meta2_defconfig
@@ -0,0 +1,41 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_METAG_L2C=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_METAG_HALT_ON_PANIC=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/configs/meta2_smp_defconfig b/arch/metag/configs/meta2_smp_defconfig
new file mode 100644
index 0000000..6c7b777
--- /dev/null
+++ b/arch/metag/configs/meta2_smp_defconfig
@@ -0,0 +1,42 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_METAG_L2C=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_METAG_HALT_ON_PANIC=y
+CONFIG_SMP=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
new file mode 100644
index 0000000..6ae0ccb
--- /dev/null
+++ b/arch/metag/include/asm/Kbuild
@@ -0,0 +1,54 @@
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += bugs.h
+generic-y += clkdev.h
+generic-y += cputime.h
+generic-y += current.h
+generic-y += device.h
+generic-y += dma.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += exec.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += futex.h
+generic-y += hardirq.h
+generic-y += hw_irq.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
+generic-y += kvm_para.h
+generic-y += local.h
+generic-y += local64.h
+generic-y += msgbuf.h
+generic-y += mutex.h
+generic-y += param.h
+generic-y += pci.h
+generic-y += percpu.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += scatterlist.h
+generic-y += sections.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += switch_to.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += timex.h
+generic-y += trace_clock.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += unaligned.h
+generic-y += user.h
+generic-y += vga.h
+generic-y += xor.h
diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h
new file mode 100644
index 0000000..307ecd2
--- /dev/null
+++ b/arch/metag/include/asm/atomic.h
@@ -0,0 +1,53 @@
+#ifndef __ASM_METAG_ATOMIC_H
+#define __ASM_METAG_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/cmpxchg.h>
+
+#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF)
+/* The simple UP case. */
+#include <asm-generic/atomic.h>
+#else
+
+#if defined(CONFIG_METAG_ATOMICITY_LOCK1)
+#include <asm/atomic_lock1.h>
+#else
+#include <asm/atomic_lnkget.h>
+#endif
+
+#define atomic_add_negative(a, v)       (atomic_add_return((a), (v)) < 0)
+
+#define atomic_dec_return(v) atomic_sub_return(1, (v))
+#define atomic_inc_return(v) atomic_add_return(1, (v))
+
+/*
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+
+#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
+#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
+
+#define atomic_inc(v) atomic_add(1, (v))
+#define atomic_dec(v) atomic_sub(1, (v))
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#endif
+
+#define atomic_dec_if_positive(v)       atomic_sub_if_positive(1, v)
+
+#include <asm-generic/atomic64.h>
+
+#endif /* __ASM_METAG_ATOMIC_H */
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
new file mode 100644
index 0000000..d2e60a1
--- /dev/null
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -0,0 +1,234 @@
+#ifndef __ASM_METAG_ATOMIC_LNKGET_H
+#define __ASM_METAG_ATOMIC_LNKGET_H
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+#define atomic_set(v, i)		((v)->counter = (i))
+
+#include <linux/compiler.h>
+
+#include <asm/barrier.h>
+
+/*
+ * None of these asm statements clobber memory as LNKSET writes around
+ * the cache so the memory it modifies cannot safely be read by any means
+ * other than these accessors.
+ */
+
+static inline int atomic_read(const atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"LNKGETD %0, [%1]\n"
+		: "=da" (temp)
+		: "da" (&v->counter));
+
+	return temp;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"1:	LNKGETD %0, [%1]\n"
+		"	ADD	%0, %0, %2\n"
+		"	LNKSETD [%1], %0\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"1:	LNKGETD %0, [%1]\n"
+		"	SUB	%0, %0, %2\n"
+		"	LNKSETD [%1], %0\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ 1b\n"
+		: "=&d" (temp)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	int result, temp;
+
+	smp_mb();
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	ADD	%1, %1, %3\n"
+		"	LNKSETD [%2], %1\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ 1b\n"
+		: "=&d" (temp), "=&da" (result)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+
+	smp_mb();
+
+	return result;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	int result, temp;
+
+	smp_mb();
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	SUB	%1, %1, %3\n"
+		"	LNKSETD [%2], %1\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp), "=&da" (result)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+
+	smp_mb();
+
+	return result;
+}
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"1:	LNKGETD %0, [%1]\n"
+		"	AND	%0, %0, %2\n"
+		"	LNKSETD	[%1] %0\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp)
+		: "da" (&v->counter), "bd" (~mask)
+		: "cc");
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	int temp;
+
+	asm volatile (
+		"1:	LNKGETD %0, [%1]\n"
+		"	OR	%0, %0, %2\n"
+		"	LNKSETD	[%1], %0\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp)
+		: "da" (&v->counter), "bd" (mask)
+		: "cc");
+}
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int result, temp;
+
+	smp_mb();
+
+	asm volatile (
+		"1:	LNKGETD	%1, [%2]\n"
+		"	CMP	%1, %3\n"
+		"	LNKSETDEQ [%2], %4\n"
+		"	BNE	2f\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		"2:\n"
+		: "=&d" (temp), "=&d" (result)
+		: "da" (&v->counter), "bd" (old), "da" (new)
+		: "cc");
+
+	smp_mb();
+
+	return result;
+}
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+	int temp, old;
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	LNKSETD	[%2], %3\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		: "=&d" (temp), "=&d" (old)
+		: "da" (&v->counter), "da" (new)
+		: "cc");
+
+	return old;
+}
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int result, temp;
+
+	smp_mb();
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	CMP	%1, %3\n"
+		"	ADD	%0, %1, %4\n"
+		"	LNKSETDNE [%2], %0\n"
+		"	BEQ	2f\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		"2:\n"
+		: "=&d" (temp), "=&d" (result)
+		: "da" (&v->counter), "bd" (u), "bd" (a)
+		: "cc");
+
+	smp_mb();
+
+	return result;
+}
+
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+	int result, temp;
+
+	asm volatile (
+		"1:	LNKGETD %1, [%2]\n"
+		"	SUBS	%1, %1, %3\n"
+		"	LNKSETDGE [%2], %1\n"
+		"	BLT	2f\n"
+		"	DEFR	%0, TXSTAT\n"
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"
+		"	CMPT	%0, #HI(0x02000000)\n"
+		"	BNZ	1b\n"
+		"2:\n"
+		: "=&d" (temp), "=&da" (result)
+		: "da" (&v->counter), "bd" (i)
+		: "cc");
+
+	return result;
+}
+
+#endif /* __ASM_METAG_ATOMIC_LNKGET_H */
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
new file mode 100644
index 0000000..e578955
--- /dev/null
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -0,0 +1,160 @@
+#ifndef __ASM_METAG_ATOMIC_LOCK1_H
+#define __ASM_METAG_ATOMIC_LOCK1_H
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+#include <linux/compiler.h>
+
+#include <asm/barrier.h>
+#include <asm/global_lock.h>
+
+static inline int atomic_read(const atomic_t *v)
+{
+	return (v)->counter;
+}
+
+/*
+ * atomic_set needs to be take the lock to protect atomic_add_unless from a
+ * possible race, as it reads the counter twice:
+ *
+ *  CPU0                               CPU1
+ *  atomic_add_unless(1, 0)
+ *    ret = v->counter (non-zero)
+ *    if (ret != u)                    v->counter = 0
+ *      v->counter += 1 (counter set to 1)
+ *
+ * Making atomic_set take the lock ensures that ordering and logical
+ * consistency is preserved.
+ */
+static inline int atomic_set(atomic_t *v, int i)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter = i;
+	__global_unlock1(flags);
+	return i;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter += i;
+	__global_unlock1(flags);
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter -= i;
+	__global_unlock1(flags);
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	unsigned long result;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	result = v->counter;
+	result += i;
+	fence();
+	v->counter = result;
+	__global_unlock1(flags);
+
+	return result;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	unsigned long result;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	result = v->counter;
+	result -= i;
+	fence();
+	v->counter = result;
+	__global_unlock1(flags);
+
+	return result;
+}
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter &= ~mask;
+	__global_unlock1(flags);
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	unsigned long flags;
+
+	__global_lock1(flags);
+	fence();
+	v->counter |= mask;
+	__global_unlock1(flags);
+}
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	ret = v->counter;
+	if (ret == old) {
+		fence();
+		v->counter = new;
+	}
+	__global_unlock1(flags);
+
+	return ret;
+}
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	ret = v->counter;
+	if (ret != u) {
+		fence();
+		v->counter += a;
+	}
+	__global_unlock1(flags);
+
+	return ret;
+}
+
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+	int ret;
+	unsigned long flags;
+
+	__global_lock1(flags);
+	ret = v->counter - 1;
+	if (ret >= 0) {
+		fence();
+		v->counter = ret;
+	}
+	__global_unlock1(flags);
+
+	return ret;
+}
+
+#endif /* __ASM_METAG_ATOMIC_LOCK1_H */
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
new file mode 100644
index 0000000..c90bfc6
--- /dev/null
+++ b/arch/metag/include/asm/barrier.h
@@ -0,0 +1,85 @@
+#ifndef _ASM_METAG_BARRIER_H
+#define _ASM_METAG_BARRIER_H
+
+#include <asm/metag_mem.h>
+
+#define nop()		asm volatile ("NOP")
+#define mb()		wmb()
+#define rmb()		barrier()
+
+#ifdef CONFIG_METAG_META21
+
+/* HTP and above have a system event to fence writes */
+static inline void wr_fence(void)
+{
+	volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_FENCE;
+	barrier();
+	*flushptr = 0;
+}
+
+#else /* CONFIG_METAG_META21 */
+
+/*
+ * ATP doesn't have system event to fence writes, so it is necessary to flush
+ * the processor write queues as well as possibly the write combiner (depending
+ * on the page being written).
+ * To ensure the write queues are flushed we do 4 writes to a system event
+ * register (in this case write combiner flush) which will also flush the write
+ * combiner.
+ */
+static inline void wr_fence(void)
+{
+	volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH;
+	barrier();
+	*flushptr = 0;
+	*flushptr = 0;
+	*flushptr = 0;
+	*flushptr = 0;
+}
+
+#endif /* !CONFIG_METAG_META21 */
+
+static inline void wmb(void)
+{
+	/* flush writes through the write combiner */
+	wr_fence();
+}
+
+#define read_barrier_depends()  do { } while (0)
+
+#ifndef CONFIG_SMP
+#define fence()		do { } while (0)
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
+#else
+
+#ifdef CONFIG_METAG_SMP_WRITE_REORDERING
+/*
+ * Write to the atomic memory unlock system event register (command 0). This is
+ * needed before a write to shared memory in a critical section, to prevent
+ * external reordering of writes before the fence on other threads with writes
+ * after the fence on this thread (and to prevent the ensuing cache-memory
+ * incoherence). It is therefore ineffective if used after and on the same
+ * thread as a write.
+ */
+static inline void fence(void)
+{
+	volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_ATOMIC_UNLOCK;
+	barrier();
+	*flushptr = 0;
+}
+#define smp_mb()        fence()
+#define smp_rmb()       fence()
+#define smp_wmb()       barrier()
+#else
+#define fence()		do { } while (0)
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
+#endif
+#endif
+#define smp_read_barrier_depends()     do { } while (0)
+#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+
+#endif /* _ASM_METAG_BARRIER_H */
diff --git a/arch/metag/include/asm/bitops.h b/arch/metag/include/asm/bitops.h
new file mode 100644
index 0000000..c0d0df0
--- /dev/null
+++ b/arch/metag/include/asm/bitops.h
@@ -0,0 +1,132 @@
+#ifndef __ASM_METAG_BITOPS_H
+#define __ASM_METAG_BITOPS_H
+
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <asm/global_lock.h>
+
+/*
+ * clear_bit() doesn't provide any barrier for the compiler.
+ */
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+#ifdef CONFIG_SMP
+/*
+ * These functions are the basis of our bit ops.
+ */
+static inline void set_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	fence();
+	*p |= mask;
+	__global_unlock1(flags);
+}
+
+static inline void clear_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	fence();
+	*p &= ~mask;
+	__global_unlock1(flags);
+}
+
+static inline void change_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	fence();
+	*p ^= mask;
+	__global_unlock1(flags);
+}
+
+static inline int test_and_set_bit(unsigned int bit, volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long old;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	old = *p;
+	if (!(old & mask)) {
+		fence();
+		*p = old | mask;
+	}
+	__global_unlock1(flags);
+
+	return (old & mask) != 0;
+}
+
+static inline int test_and_clear_bit(unsigned int bit,
+				     volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long old;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	old = *p;
+	if (old & mask) {
+		fence();
+		*p = old & ~mask;
+	}
+	__global_unlock1(flags);
+
+	return (old & mask) != 0;
+}
+
+static inline int test_and_change_bit(unsigned int bit,
+				      volatile unsigned long *p)
+{
+	unsigned long flags;
+	unsigned long old;
+	unsigned long mask = 1UL << (bit & 31);
+
+	p += bit >> 5;
+
+	__global_lock1(flags);
+	fence();
+	old = *p;
+	*p = old ^ mask;
+	__global_unlock1(flags);
+
+	return (old & mask) != 0;
+}
+
+#else
+#include <asm-generic/bitops/atomic.h>
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* __ASM_METAG_BITOPS_H */
diff --git a/arch/metag/include/asm/bug.h b/arch/metag/include/asm/bug.h
new file mode 100644
index 0000000..d04b48c
--- /dev/null
+++ b/arch/metag/include/asm/bug.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_METAG_BUG_H
+#define _ASM_METAG_BUG_H
+
+#include <asm-generic/bug.h>
+
+struct pt_regs;
+
+extern const char *trap_name(int trapno);
+extern void die(const char *str, struct pt_regs *regs, long err,
+		unsigned long addr) __attribute__ ((noreturn));
+
+#endif
diff --git a/arch/metag/include/asm/cache.h b/arch/metag/include/asm/cache.h
new file mode 100644
index 0000000..a43b650
--- /dev/null
+++ b/arch/metag/include/asm/cache.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_METAG_CACHE_H
+#define __ASM_METAG_CACHE_H
+
+/* L1 cache line size (64 bytes) */
+#define L1_CACHE_SHIFT		6
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+/* Meta requires large data items to be 8 byte aligned. */
+#define ARCH_SLAB_MINALIGN	8
+
+/*
+ * With an L2 cache, we may invalidate dirty lines, so we need to ensure DMA
+ * buffers have cache line alignment.
+ */
+#ifdef CONFIG_METAG_L2C
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+#else
+#define ARCH_DMA_MINALIGN	8
+#endif
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#endif
diff --git a/arch/metag/include/asm/cacheflush.h b/arch/metag/include/asm/cacheflush.h
new file mode 100644
index 0000000..7787ec5
--- /dev/null
+++ b/arch/metag/include/asm/cacheflush.h
@@ -0,0 +1,250 @@
+#ifndef _METAG_CACHEFLUSH_H
+#define _METAG_CACHEFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/io.h>
+
+#include <asm/l2cache.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+
+void metag_cache_probe(void);
+
+void metag_data_cache_flush_all(const void *start);
+void metag_code_cache_flush_all(const void *start);
+
+/*
+ * Routines to flush physical cache lines that may be used to cache data or code
+ * normally accessed via the linear address range supplied. The region flushed
+ * must either lie in local or global address space determined by the top bit of
+ * the pStart address. If Bytes is >= 4K then the whole of the related cache
+ * state will be flushed rather than a limited range.
+ */
+void metag_data_cache_flush(const void *start, int bytes);
+void metag_code_cache_flush(const void *start, int bytes);
+
+#ifdef CONFIG_METAG_META12
+
+/* Write through, virtually tagged, split I/D cache. */
+
+static inline void __flush_cache_all(void)
+{
+	metag_code_cache_flush_all((void *) PAGE_OFFSET);
+	metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#define flush_cache_all() __flush_cache_all()
+
+/* flush the entire user address space referenced in this mm structure */
+static inline void flush_cache_mm(struct mm_struct *mm)
+{
+	if (mm == current->mm)
+		__flush_cache_all();
+}
+
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
+/* flush a range of addresses from this mm */
+static inline void flush_cache_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end)
+{
+	flush_cache_mm(vma->vm_mm);
+}
+
+static inline void flush_cache_page(struct vm_area_struct *vma,
+				    unsigned long vmaddr, unsigned long pfn)
+{
+	flush_cache_mm(vma->vm_mm);
+}
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE	1
+static inline void flush_dcache_page(struct page *page)
+{
+	metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+static inline void flush_icache_page(struct vm_area_struct *vma,
+				     struct page *page)
+{
+	metag_code_cache_flush(page_to_virt(page), PAGE_SIZE);
+}
+
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+	metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+	metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#else
+
+/* Write through, physically tagged, split I/D cache. */
+
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_icache_page(vma, pg)		do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE	1
+static inline void flush_dcache_page(struct page *page)
+{
+	/* FIXME: We can do better than this. All we are trying to do is
+	 * make the i-cache coherent, we should use the PG_arch_1 bit like
+	 * e.g. powerpc.
+	 */
+#ifdef CONFIG_SMP
+	metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+	metag_code_cache_flush_all((void *) PAGE_OFFSET);
+#endif
+}
+
+#endif
+
+/* Push n pages at kernel virtual address and clear the icache */
+static inline void flush_icache_range(unsigned long address,
+				      unsigned long endaddr)
+{
+#ifdef CONFIG_SMP
+	metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+	metag_code_cache_flush((void *) address, endaddr - address);
+#endif
+}
+
+static inline void flush_cache_sigtramp(unsigned long addr, int size)
+{
+	/*
+	 * Flush the icache in case there was previously some code
+	 * fetched from this address, perhaps a previous sigtramp.
+	 *
+	 * We don't need to flush the dcache, it's write through and
+	 * we just wrote the sigtramp code through it.
+	 */
+#ifdef CONFIG_SMP
+	metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+	metag_code_cache_flush((void *) addr, size);
+#endif
+}
+
+#ifdef CONFIG_METAG_L2C
+
+/*
+ * Perform a single specific CACHEWD operation on an address, masking lower bits
+ * of address first.
+ */
+static inline void cachewd_line(void *addr, unsigned int data)
+{
+	unsigned long masked = (unsigned long)addr & -0x40;
+	__builtin_meta2_cachewd((void *)masked, data);
+}
+
+/* Perform a certain CACHEW op on each cache line in a range */
+static inline void cachew_region_op(void *start, unsigned long size,
+				    unsigned int op)
+{
+	unsigned long offset = (unsigned long)start & 0x3f;
+	int i;
+	if (offset) {
+		size += offset;
+		start -= offset;
+	}
+	i = (size - 1) >> 6;
+	do {
+		__builtin_meta2_cachewd(start, op);
+		start += 0x40;
+	} while (i--);
+}
+
+/* prevent write fence and flushbacks being reordered in L2 */
+static inline void l2c_fence_flush(void *addr)
+{
+	/*
+	 * Synchronise by reading back and re-flushing.
+	 * It is assumed this access will miss, as the caller should have just
+	 * flushed the cache line.
+	 */
+	(void)(volatile u8 *)addr;
+	cachewd_line(addr, CACHEW_FLUSH_L1D_L2);
+}
+
+/* prevent write fence and writebacks being reordered in L2 */
+static inline void l2c_fence(void *addr)
+{
+	/*
+	 * A write back has occurred, but not necessarily an invalidate, so the
+	 * readback in l2c_fence_flush() would hit in the cache and have no
+	 * effect. Therefore fully flush the line first.
+	 */
+	cachewd_line(addr, CACHEW_FLUSH_L1D_L2);
+	l2c_fence_flush(addr);
+}
+
+/* Used to keep memory consistent when doing DMA. */
+static inline void flush_dcache_region(void *start, unsigned long size)
+{
+	/* metag_data_cache_flush won't flush L2 cache lines if size >= 4096 */
+	if (meta_l2c_is_enabled()) {
+		cachew_region_op(start, size, CACHEW_FLUSH_L1D_L2);
+		if (meta_l2c_is_writeback())
+			l2c_fence_flush(start + size - 1);
+	} else {
+		metag_data_cache_flush(start, size);
+	}
+}
+
+/* Write back dirty lines to memory (or do nothing if no writeback caches) */
+static inline void writeback_dcache_region(void *start, unsigned long size)
+{
+	if (meta_l2c_is_enabled() && meta_l2c_is_writeback()) {
+		cachew_region_op(start, size, CACHEW_WRITEBACK_L1D_L2);
+		l2c_fence(start + size - 1);
+	}
+}
+
+/* Invalidate (may also write back if necessary) */
+static inline void invalidate_dcache_region(void *start, unsigned long size)
+{
+	if (meta_l2c_is_enabled())
+		cachew_region_op(start, size, CACHEW_INVALIDATE_L1D_L2);
+	else
+		metag_data_cache_flush(start, size);
+}
+#else
+#define flush_dcache_region(s, l)	metag_data_cache_flush((s), (l))
+#define writeback_dcache_region(s, l)	do {} while (0)
+#define invalidate_dcache_region(s, l)	flush_dcache_region((s), (l))
+#endif
+
+static inline void copy_to_user_page(struct vm_area_struct *vma,
+				     struct page *page, unsigned long vaddr,
+				     void *dst, const void *src,
+				     unsigned long len)
+{
+	memcpy(dst, src, len);
+	flush_icache_range((unsigned long)dst, (unsigned long)dst + len);
+}
+
+static inline void copy_from_user_page(struct vm_area_struct *vma,
+				       struct page *page, unsigned long vaddr,
+				       void *dst, const void *src,
+				       unsigned long len)
+{
+	memcpy(dst, src, len);
+}
+
+#endif /* _METAG_CACHEFLUSH_H */
diff --git a/arch/metag/include/asm/cachepart.h b/arch/metag/include/asm/cachepart.h
new file mode 100644
index 0000000..cf6b44e
--- /dev/null
+++ b/arch/metag/include/asm/cachepart.h
@@ -0,0 +1,42 @@
+/*
+ * Meta cache partition manipulation.
+ *
+ * Copyright 2010 Imagination Technologies Ltd.
+ */
+
+#ifndef _METAG_CACHEPART_H_
+#define _METAG_CACHEPART_H_
+
+/**
+ * get_dcache_size() - Get size of data cache.
+ */
+unsigned int get_dcache_size(void);
+
+/**
+ * get_icache_size() - Get size of code cache.
+ */
+unsigned int get_icache_size(void);
+
+/**
+ * get_global_dcache_size() - Get the thread's global dcache.
+ *
+ * Returns the size of the current thread's global dcache partition.
+ */
+unsigned int get_global_dcache_size(void);
+
+/**
+ * get_global_icache_size() - Get the thread's global icache.
+ *
+ * Returns the size of the current thread's global icache partition.
+ */
+unsigned int get_global_icache_size(void);
+
+/**
+ * check_for_dache_aliasing() - Ensure that the bootloader has configured the
+ * dache and icache properly to avoid aliasing
+ * @thread_id: Hardware thread ID
+ *
+ */
+void check_for_cache_aliasing(int thread_id);
+
+#endif
diff --git a/arch/metag/include/asm/checksum.h b/arch/metag/include/asm/checksum.h
new file mode 100644
index 0000000..999bf76
--- /dev/null
+++ b/arch/metag/include/asm/checksum.h
@@ -0,0 +1,92 @@
+#ifndef _METAG_CHECKSUM_H
+#define _METAG_CHECKSUM_H
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy(const void *src, void *dst, int len,
+				__wsum sum);
+
+/*
+ * the same as csum_partial_copy, but copies from user space.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+					int len, __wsum sum, int *csum_err);
+
+#define csum_partial_copy_nocheck(src, dst, len, sum)	\
+	csum_partial_copy((src), (dst), (len), (sum))
+
+/*
+ * Fold a partial checksum
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __sum16)~sum;
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+					unsigned short len,
+					unsigned short proto,
+					__wsum sum)
+{
+	unsigned long len_proto = (proto + len) << 8;
+	asm ("ADD    %0, %0, %1\n"
+	     "ADDS   %0, %0, %2\n"
+	     "ADDCS  %0, %0, #1\n"
+	     "ADDS   %0, %0, %3\n"
+	     "ADDCS  %0, %0, #1\n"
+	     : "=d" (sum)
+	     : "d" (daddr), "d" (saddr), "d" (len_proto),
+	       "0" (sum)
+	     : "cc");
+	return sum;
+}
+
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
+		  unsigned short proto, __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+#endif /* _METAG_CHECKSUM_H */
diff --git a/arch/metag/include/asm/clock.h b/arch/metag/include/asm/clock.h
new file mode 100644
index 0000000..3e2915a
--- /dev/null
+++ b/arch/metag/include/asm/clock.h
@@ -0,0 +1,51 @@
+/*
+ * arch/metag/include/asm/clock.h
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _METAG_CLOCK_H_
+#define _METAG_CLOCK_H_
+
+#include <asm/mach/arch.h>
+
+/**
+ * struct meta_clock_desc - Meta Core clock callbacks.
+ * @get_core_freq:	Get the frequency of the Meta core. If this is NULL, the
+ *			core frequency will be determined like this:
+ *			Meta 1: based on loops_per_jiffy.
+ *			Meta 2: (EXPAND_TIMER_DIV + 1) MHz.
+ */
+struct meta_clock_desc {
+	unsigned long		(*get_core_freq)(void);
+};
+
+extern struct meta_clock_desc _meta_clock;
+
+/*
+ * Set up the default clock, ensuring all callbacks are valid - only accessible
+ * during boot.
+ */
+void setup_meta_clocks(struct meta_clock_desc *desc);
+
+/**
+ * get_coreclock() - Get the frequency of the Meta core clock.
+ *
+ * Returns:	The Meta core clock frequency in Hz.
+ */
+static inline unsigned long get_coreclock(void)
+{
+	/*
+	 * Use the current clock callback. If set correctly this will provide
+	 * the most accurate frequency as it can be calculated directly from the
+	 * PLL configuration. otherwise a default callback will have been set
+	 * instead.
+	 */
+	return _meta_clock.get_core_freq();
+}
+
+#endif /* _METAG_CLOCK_H_ */
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
new file mode 100644
index 0000000..b1bc1be
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -0,0 +1,65 @@
+#ifndef __ASM_METAG_CMPXCHG_H
+#define __ASM_METAG_CMPXCHG_H
+
+#include <asm/barrier.h>
+
+#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF)
+#include <asm/cmpxchg_irq.h>
+#elif defined(CONFIG_METAG_ATOMICITY_LOCK1)
+#include <asm/cmpxchg_lock1.h>
+#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
+#include <asm/cmpxchg_lnkget.h>
+#endif
+
+extern void __xchg_called_with_bad_pointer(void);
+
+#define __xchg(ptr, x, size)				\
+({							\
+	unsigned long __xchg__res;			\
+	volatile void *__xchg_ptr = (ptr);		\
+	switch (size) {					\
+	case 4:						\
+		__xchg__res = xchg_u32(__xchg_ptr, x);	\
+		break;					\
+	case 1:						\
+		__xchg__res = xchg_u8(__xchg_ptr, x);	\
+		break;					\
+	default:					\
+		__xchg_called_with_bad_pointer();	\
+		__xchg__res = x;			\
+		break;					\
+	}						\
+							\
+	__xchg__res;					\
+})
+
+#define xchg(ptr, x)	\
+	((__typeof__(*(ptr)))__xchg((ptr), (unsigned long)(x), sizeof(*(ptr))))
+
+/* This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg(). */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_u32(ptr, old, new);
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+#define cmpxchg(ptr, o, n)						\
+	({								\
+		__typeof__(*(ptr)) _o_ = (o);				\
+		__typeof__(*(ptr)) _n_ = (n);				\
+		(__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
+					       (unsigned long)_n_,	\
+					       sizeof(*(ptr)));		\
+	})
+
+#endif /* __ASM_METAG_CMPXCHG_H */
diff --git a/arch/metag/include/asm/cmpxchg_irq.h b/arch/metag/include/asm/cmpxchg_irq.h
new file mode 100644
index 0000000..6495731
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg_irq.h
@@ -0,0 +1,42 @@
+#ifndef __ASM_METAG_CMPXCHG_IRQ_H
+#define __ASM_METAG_CMPXCHG_IRQ_H
+
+#include <linux/irqflags.h>
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	unsigned long flags, retval;
+
+	local_irq_save(flags);
+	retval = *m;
+	*m = val;
+	local_irq_restore(flags);
+	return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+	unsigned long flags, retval;
+
+	local_irq_save(flags);
+	retval = *m;
+	*m = val & 0xff;
+	local_irq_restore(flags);
+	return retval;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+					  unsigned long new)
+{
+	__u32 retval;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	retval = *m;
+	if (retval == old)
+		*m = new;
+	local_irq_restore(flags);       /* implies memory barrier  */
+	return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_IRQ_H */
diff --git a/arch/metag/include/asm/cmpxchg_lnkget.h b/arch/metag/include/asm/cmpxchg_lnkget.h
new file mode 100644
index 0000000..0154e28
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg_lnkget.h
@@ -0,0 +1,86 @@
+#ifndef __ASM_METAG_CMPXCHG_LNKGET_H
+#define __ASM_METAG_CMPXCHG_LNKGET_H
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	int temp, old;
+
+	smp_mb();
+
+	asm volatile (
+		      "1:	LNKGETD %1, [%2]\n"
+		      "	LNKSETD	[%2], %3\n"
+		      "	DEFR	%0, TXSTAT\n"
+		      "	ANDT	%0, %0, #HI(0x3f000000)\n"
+		      "	CMPT	%0, #HI(0x02000000)\n"
+		      "	BNZ	1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+		      "	DCACHE	[%2], %0\n"
+#endif
+		      : "=&d" (temp), "=&d" (old)
+		      : "da" (m), "da" (val)
+		      : "cc"
+		      );
+
+	smp_mb();
+
+	return old;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+	int temp, old;
+
+	smp_mb();
+
+	asm volatile (
+		      "1:	LNKGETD %1, [%2]\n"
+		      "	LNKSETD	[%2], %3\n"
+		      "	DEFR	%0, TXSTAT\n"
+		      "	ANDT	%0, %0, #HI(0x3f000000)\n"
+		      "	CMPT	%0, #HI(0x02000000)\n"
+		      "	BNZ	1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+		      "	DCACHE	[%2], %0\n"
+#endif
+		      : "=&d" (temp), "=&d" (old)
+		      : "da" (m), "da" (val & 0xff)
+		      : "cc"
+		      );
+
+	smp_mb();
+
+	return old;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+					  unsigned long new)
+{
+	__u32 retval, temp;
+
+	smp_mb();
+
+	asm volatile (
+		      "1:	LNKGETD	%1, [%2]\n"
+		      "	CMP	%1, %3\n"
+		      "	LNKSETDEQ [%2], %4\n"
+		      "	BNE	2f\n"
+		      "	DEFR	%0, TXSTAT\n"
+		      "	ANDT	%0, %0, #HI(0x3f000000)\n"
+		      "	CMPT	%0, #HI(0x02000000)\n"
+		      "	BNZ	1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+		      "	DCACHE	[%2], %0\n"
+#endif
+		      "2:\n"
+		      : "=&d" (temp), "=&da" (retval)
+		      : "da" (m), "bd" (old), "da" (new)
+		      : "cc"
+		      );
+
+	smp_mb();
+
+	return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_LNKGET_H */
diff --git a/arch/metag/include/asm/cmpxchg_lock1.h b/arch/metag/include/asm/cmpxchg_lock1.h
new file mode 100644
index 0000000..fd68504
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg_lock1.h
@@ -0,0 +1,48 @@
+#ifndef __ASM_METAG_CMPXCHG_LOCK1_H
+#define __ASM_METAG_CMPXCHG_LOCK1_H
+
+#include <asm/global_lock.h>
+
+/* Use LOCK2 as these have to be atomic w.r.t. ordinary accesses. */
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	unsigned long flags, retval;
+
+	__global_lock2(flags);
+	fence();
+	retval = *m;
+	*m = val;
+	__global_unlock2(flags);
+	return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+	unsigned long flags, retval;
+
+	__global_lock2(flags);
+	fence();
+	retval = *m;
+	*m = val & 0xff;
+	__global_unlock2(flags);
+	return retval;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+					  unsigned long new)
+{
+	__u32 retval;
+	unsigned long flags;
+
+	__global_lock2(flags);
+	retval = *m;
+	if (retval == old) {
+		fence();
+		*m = new;
+	}
+	__global_unlock2(flags);
+	return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_LOCK1_H */
diff --git a/arch/metag/include/asm/core_reg.h b/arch/metag/include/asm/core_reg.h
new file mode 100644
index 0000000..bdbc3a5
--- /dev/null
+++ b/arch/metag/include/asm/core_reg.h
@@ -0,0 +1,35 @@
+#ifndef __ASM_METAG_CORE_REG_H_
+#define __ASM_METAG_CORE_REG_H_
+
+#include <asm/metag_regs.h>
+
+extern void core_reg_write(int unit, int reg, int thread, unsigned int val);
+extern unsigned int core_reg_read(int unit, int reg, int thread);
+
+/*
+ * These macros allow direct access from C to any register known to the
+ * assembler. Example candidates are TXTACTCYC, TXIDLECYC, and TXPRIVEXT.
+ */
+
+#define __core_reg_get(reg) ({						\
+	unsigned int __grvalue;						\
+	asm volatile("MOV	%0," #reg				\
+		     : "=r" (__grvalue));				\
+	__grvalue;							\
+})
+
+#define __core_reg_set(reg, value) do {					\
+	unsigned int __srvalue = (value);				\
+	asm volatile("MOV	" #reg ",%0"				\
+		     :							\
+		     : "r" (__srvalue));				\
+} while (0)
+
+#define __core_reg_swap(reg, value) do {				\
+	unsigned int __srvalue = (value);				\
+	asm volatile("SWAP	" #reg ",%0"				\
+		     : "+r" (__srvalue));				\
+	(value) = __srvalue;						\
+} while (0)
+
+#endif
diff --git a/arch/metag/include/asm/cpu.h b/arch/metag/include/asm/cpu.h
new file mode 100644
index 0000000..decf129
--- /dev/null
+++ b/arch/metag/include/asm/cpu.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_METAG_CPU_H
+#define _ASM_METAG_CPU_H
+
+#include <linux/percpu.h>
+
+struct cpuinfo_metag {
+	struct cpu cpu;
+#ifdef CONFIG_SMP
+	unsigned long loops_per_jiffy;
+#endif
+};
+
+DECLARE_PER_CPU(struct cpuinfo_metag, cpu_data);
+#endif /* _ASM_METAG_CPU_H */
diff --git a/arch/metag/include/asm/da.h b/arch/metag/include/asm/da.h
new file mode 100644
index 0000000..81bd521
--- /dev/null
+++ b/arch/metag/include/asm/da.h
@@ -0,0 +1,43 @@
+/*
+ * Meta DA JTAG debugger control.
+ *
+ * Copyright 2012 Imagination Technologies Ltd.
+ */
+
+#ifndef _METAG_DA_H_
+#define _METAG_DA_H_
+
+#ifdef CONFIG_METAG_DA
+
+#include <linux/init.h>
+#include <linux/types.h>
+
+extern bool _metag_da_present;
+
+/**
+ * metag_da_enabled() - Find whether a DA is currently enabled.
+ *
+ * Returns:	true if a DA was detected, false if not.
+ */
+static inline bool metag_da_enabled(void)
+{
+	return _metag_da_present;
+}
+
+/**
+ * metag_da_probe() - Try and detect a connected DA.
+ *
+ * This is used at start up to detect whether a DA is active.
+ *
+ * Returns:	0 on detection, -err otherwise.
+ */
+int __init metag_da_probe(void);
+
+#else /* !CONFIG_METAG_DA */
+
+#define metag_da_enabled() false
+#define metag_da_probe() do {} while (0)
+
+#endif
+
+#endif /* _METAG_DA_H_ */
diff --git a/arch/metag/include/asm/delay.h b/arch/metag/include/asm/delay.h
new file mode 100644
index 0000000..9c92f99
--- /dev/null
+++ b/arch/metag/include/asm/delay.h
@@ -0,0 +1,29 @@
+#ifndef _METAG_DELAY_H
+#define _METAG_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/metag/lib/delay.c
+ */
+
+/* Undefined functions to get compile-time errors */
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __const_udelay(unsigned long xloops);
+extern void __delay(unsigned long loops);
+
+/* 0x10c7 is 2**32 / 1000000 (rounded up) */
+#define udelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \
+	__udelay(n))
+
+/* 0x5 is 2**32 / 1000000000 (rounded up) */
+#define ndelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+	__ndelay(n))
+
+#endif /* _METAG_DELAY_H */
diff --git a/arch/metag/include/asm/div64.h b/arch/metag/include/asm/div64.h
new file mode 100644
index 0000000..0fdd116
--- /dev/null
+++ b/arch/metag/include/asm/div64.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_DIV64_H__
+#define __ASM_DIV64_H__
+
+#include <asm-generic/div64.h>
+
+extern u64 div_u64(u64 dividend, u64 divisor);
+extern s64 div_s64(s64 dividend, s64 divisor);
+
+#define div_u64 div_u64
+#define div_s64 div_s64
+
+#endif
diff --git a/arch/metag/include/asm/dma-mapping.h b/arch/metag/include/asm/dma-mapping.h
new file mode 100644
index 0000000..14b23ef
--- /dev/null
+++ b/arch/metag/include/asm/dma-mapping.h
@@ -0,0 +1,190 @@
+#ifndef _ASM_METAG_DMA_MAPPING_H
+#define _ASM_METAG_DMA_MAPPING_H
+
+#include <linux/mm.h>
+
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <linux/scatterlist.h>
+#include <asm/bug.h>
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *dma_handle, gfp_t flag);
+
+void dma_free_coherent(struct device *dev, size_t size,
+		       void *vaddr, dma_addr_t dma_handle);
+
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction);
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction);
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+		      void *cpu_addr, dma_addr_t dma_addr, size_t size);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+			  void *cpu_addr, dma_addr_t dma_addr, size_t size);
+
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(size == 0);
+	dma_sync_for_device(ptr, size, direction);
+	return virt_to_phys(ptr);
+}
+
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	dma_sync_for_cpu(phys_to_virt(dma_addr), size, direction);
+}
+
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
+	   enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(nents == 0 || sglist[0].length == 0);
+
+	for_each_sg(sglist, sg, nents, i) {
+		BUG_ON(!sg_page(sg));
+
+		sg->dma_address = sg_phys(sg);
+		dma_sync_for_device(sg_virt(sg), sg->length, direction);
+	}
+
+	return nents;
+}
+
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+	     size_t size, enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	dma_sync_for_device((void *)(page_to_phys(page) + offset), size,
+			    direction);
+	return page_to_phys(page) + offset;
+}
+
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
+}
+
+
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nhwentries,
+	     enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(!valid_dma_direction(direction));
+	WARN_ON(nhwentries == 0 || sglist[0].length == 0);
+
+	for_each_sg(sglist, sg, nhwentries, i) {
+		BUG_ON(!sg_page(sg));
+
+		sg->dma_address = sg_phys(sg);
+		dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+	}
+}
+
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction direction)
+{
+	dma_sync_for_cpu(phys_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+			   size_t size, enum dma_data_direction direction)
+{
+	dma_sync_for_device(phys_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+			      unsigned long offset, size_t size,
+			      enum dma_data_direction direction)
+{
+	dma_sync_for_cpu(phys_to_virt(dma_handle)+offset, size,
+			 direction);
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+				 unsigned long offset, size_t size,
+				 enum dma_data_direction direction)
+{
+	dma_sync_for_device(phys_to_virt(dma_handle)+offset, size,
+			    direction);
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		    enum dma_data_direction direction)
+{
+	int i;
+	for (i = 0; i < nelems; i++, sg++)
+		dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		       enum dma_data_direction direction)
+{
+	int i;
+	for (i = 0; i < nelems; i++, sg++)
+		dma_sync_for_device(sg_virt(sg), sg->length, direction);
+}
+
+static inline int
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+#define dma_supported(dev, mask)        (1)
+
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
+/*
+ * dma_alloc_noncoherent() returns non-cacheable memory, so there's no need to
+ * do any flushing here.
+ */
+static inline void
+dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+	       enum dma_data_direction direction)
+{
+}
+
+/* drivers/base/dma-mapping.c */
+extern int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
+				  void *cpu_addr, dma_addr_t dma_addr,
+				  size_t size);
+
+#define dma_get_sgtable(d, t, v, h, s) dma_common_get_sgtable(d, t, v, h, s)
+
+#endif
diff --git a/arch/metag/include/asm/elf.h b/arch/metag/include/asm/elf.h
new file mode 100644
index 0000000..d63b9d0
--- /dev/null
+++ b/arch/metag/include/asm/elf.h
@@ -0,0 +1,128 @@
+#ifndef __ASM_METAG_ELF_H
+#define __ASM_METAG_ELF_H
+
+#define EM_METAG      174
+
+/* Meta relocations */
+#define R_METAG_HIADDR16                 0
+#define R_METAG_LOADDR16                 1
+#define R_METAG_ADDR32                   2
+#define R_METAG_NONE                     3
+#define R_METAG_RELBRANCH                4
+#define R_METAG_GETSETOFF                5
+
+/* Backward compatability */
+#define R_METAG_REG32OP1                 6
+#define R_METAG_REG32OP2                 7
+#define R_METAG_REG32OP3                 8
+#define R_METAG_REG16OP1                 9
+#define R_METAG_REG16OP2                10
+#define R_METAG_REG16OP3                11
+#define R_METAG_REG32OP4                12
+
+#define R_METAG_HIOG                    13
+#define R_METAG_LOOG                    14
+
+/* GNU */
+#define R_METAG_GNU_VTINHERIT           30
+#define R_METAG_GNU_VTENTRY             31
+
+/* PIC relocations */
+#define R_METAG_HI16_GOTOFF             32
+#define R_METAG_LO16_GOTOFF             33
+#define R_METAG_GETSET_GOTOFF           34
+#define R_METAG_GETSET_GOT              35
+#define R_METAG_HI16_GOTPC              36
+#define R_METAG_LO16_GOTPC              37
+#define R_METAG_HI16_PLT                38
+#define R_METAG_LO16_PLT                39
+#define R_METAG_RELBRANCH_PLT           40
+#define R_METAG_GOTOFF                  41
+#define R_METAG_PLT                     42
+#define R_METAG_COPY                    43
+#define R_METAG_JMP_SLOT                44
+#define R_METAG_RELATIVE                45
+#define R_METAG_GLOB_DAT                46
+
+/*
+ * ELF register definitions.
+ */
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_gp_regs) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef unsigned long elf_fpregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_METAG)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA	ELFDATA2LSB
+#define ELF_ARCH	EM_METAG
+
+#define ELF_PLAT_INIT(_r, load_addr)	\
+	do { _r->ctx.AX[0].U0 = 0; } while (0)
+
+#define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE         0x08000000UL
+
+#define ELF_CORE_COPY_REGS(_dest, _regs)			\
+	memcpy((char *)&_dest, (char *)_regs, sizeof(struct pt_regs));
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this cpu supports.  */
+
+#define ELF_HWCAP	(0)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.  */
+
+#define ELF_PLATFORM  (NULL)
+
+#define SET_PERSONALITY(ex) \
+	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
+
+#define STACK_RND_MASK (0)
+
+#ifdef CONFIG_METAG_USER_TCM
+
+struct elf32_phdr;
+struct file;
+
+unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
+			      struct elf32_phdr *eppnt, int prot, int type,
+			      unsigned long total_size);
+
+static inline unsigned long metag_elf_map(struct file *filep,
+					  unsigned long addr,
+					  struct elf32_phdr *eppnt, int prot,
+					  int type, unsigned long total_size)
+{
+	return __metag_elf_map(filep, addr, eppnt, prot, type, total_size);
+}
+#define elf_map metag_elf_map
+
+#endif
+
+#endif
diff --git a/arch/metag/include/asm/fixmap.h b/arch/metag/include/asm/fixmap.h
new file mode 100644
index 0000000..3331275
--- /dev/null
+++ b/arch/metag/include/asm/fixmap.h
@@ -0,0 +1,99 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <asm/pgtable.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special  addresses
+ * from the end of the consistent memory region backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * higher than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+enum fixed_addresses {
+#define FIX_N_COLOURS 8
+#ifdef CONFIG_HIGHMEM
+	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_BEGIN,
+	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+	__end_of_fixed_addresses
+};
+
+#define FIXADDR_TOP     (CONSISTENT_START - PAGE_SIZE)
+#define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START	((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK)
+
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+extern void __this_fixmap_does_not_exist(void);
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+	/*
+	 * this branch gets completely eliminated after inlining,
+	 * except when someone tries to use fixaddr indices in an
+	 * illegal way. (such as mixing up address types or using
+	 * out-of-range indices).
+	 *
+	 * If it doesn't get removed, the linker will complain
+	 * loudly with a reasonably clear error message..
+	 */
+	if (idx >= __end_of_fixed_addresses)
+		__this_fixmap_does_not_exist();
+
+	return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+	return __virt_to_fix(vaddr);
+}
+
+#define kmap_get_fixmap_pte(vaddr) \
+	pte_offset_kernel( \
+		pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), \
+		(vaddr) \
+	)
+
+/*
+ * Called from pgtable_init()
+ */
+extern void fixrange_init(unsigned long start, unsigned long end,
+	pgd_t *pgd_base);
+
+
+#endif
diff --git a/arch/metag/include/asm/ftrace.h b/arch/metag/include/asm/ftrace.h
new file mode 100644
index 0000000..2901f0f
--- /dev/null
+++ b/arch/metag/include/asm/ftrace.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_METAG_FTRACE
+#define _ASM_METAG_FTRACE
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE	8 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void mcount_wrapper(void);
+#define MCOUNT_ADDR		((long)(mcount_wrapper))
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+struct dyn_arch_ftrace {
+	/* No extra data needed on metag */
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_METAG_FTRACE */
diff --git a/arch/metag/include/asm/global_lock.h b/arch/metag/include/asm/global_lock.h
new file mode 100644
index 0000000..fc831c8
--- /dev/null
+++ b/arch/metag/include/asm/global_lock.h
@@ -0,0 +1,100 @@
+#ifndef __ASM_METAG_GLOBAL_LOCK_H
+#define __ASM_METAG_GLOBAL_LOCK_H
+
+#include <asm/metag_mem.h>
+
+/**
+ * __global_lock1() - Acquire global voluntary lock (LOCK1).
+ * @flags:	Variable to store flags into.
+ *
+ * Acquires the Meta global voluntary lock (LOCK1), also taking care to disable
+ * all triggers so we cannot be interrupted, and to enforce a compiler barrier
+ * so that the compiler cannot reorder memory accesses across the lock.
+ *
+ * No other hardware thread will be able to acquire the voluntary or exclusive
+ * locks until the voluntary lock is released with @__global_unlock1, but they
+ * may continue to execute as long as they aren't trying to acquire either of
+ * the locks.
+ */
+#define __global_lock1(flags) do {					\
+	unsigned int __trval;						\
+	asm volatile("MOV	%0,#0\n\t"				\
+		     "SWAP	%0,TXMASKI\n\t"				\
+		     "LOCK1"						\
+		     : "=r" (__trval)					\
+		     :							\
+		     : "memory");					\
+	(flags) = __trval;						\
+} while (0)
+
+/**
+ * __global_unlock1() - Release global voluntary lock (LOCK1).
+ * @flags:	Variable to restore flags from.
+ *
+ * Releases the Meta global voluntary lock (LOCK1) acquired with
+ * @__global_lock1, also taking care to re-enable triggers, and to enforce a
+ * compiler barrier so that the compiler cannot reorder memory accesses across
+ * the unlock.
+ *
+ * This immediately allows another hardware thread to acquire the voluntary or
+ * exclusive locks.
+ */
+#define __global_unlock1(flags) do {					\
+	unsigned int __trval = (flags);					\
+	asm volatile("LOCK0\n\t"					\
+		     "MOV	TXMASKI,%0"				\
+		     :							\
+		     : "r" (__trval)					\
+		     : "memory");					\
+} while (0)
+
+/**
+ * __global_lock2() - Acquire global exclusive lock (LOCK2).
+ * @flags:	Variable to store flags into.
+ *
+ * Acquires the Meta global voluntary lock and global exclusive lock (LOCK2),
+ * also taking care to disable all triggers so we cannot be interrupted, to take
+ * the atomic lock (system event) and to enforce a compiler barrier so that the
+ * compiler cannot reorder memory accesses across the lock.
+ *
+ * No other hardware thread will be able to execute code until the locks are
+ * released with @__global_unlock2.
+ */
+#define __global_lock2(flags) do {					\
+	unsigned int __trval;						\
+	unsigned int __aloc_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+	asm volatile("MOV	%0,#0\n\t"				\
+		     "SWAP	%0,TXMASKI\n\t"				\
+		     "LOCK2\n\t"					\
+		     "SETD	[%1+#0x40],D1RtP"			\
+		     : "=r&" (__trval)					\
+		     : "u" (__aloc_hi)					\
+		     : "memory");					\
+	(flags) = __trval;						\
+} while (0)
+
+/**
+ * __global_unlock2() - Release global exclusive lock (LOCK2).
+ * @flags:	Variable to restore flags from.
+ *
+ * Releases the Meta global exclusive lock (LOCK2) and global voluntary lock
+ * acquired with @__global_lock2, also taking care to release the atomic lock
+ * (system event), re-enable triggers, and to enforce a compiler barrier so that
+ * the compiler cannot reorder memory accesses across the unlock.
+ *
+ * This immediately allows other hardware threads to continue executing and one
+ * of them to acquire locks.
+ */
+#define __global_unlock2(flags) do {					\
+	unsigned int __trval = (flags);					\
+	unsigned int __alock_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+	asm volatile("SETD	[%1+#0x00],D1RtP\n\t"			\
+		     "LOCK0\n\t"					\
+		     "MOV	TXMASKI,%0"				\
+		     :							\
+		     : "r" (__trval),					\
+		       "u" (__alock_hi)					\
+		     : "memory");					\
+} while (0)
+
+#endif /* __ASM_METAG_GLOBAL_LOCK_H */
diff --git a/arch/metag/include/asm/gpio.h b/arch/metag/include/asm/gpio.h
new file mode 100644
index 0000000..b3799d8
--- /dev/null
+++ b/arch/metag/include/asm/gpio.h
@@ -0,0 +1,4 @@
+#ifndef __LINUX_GPIO_H
+#warning Include linux/gpio.h instead of asm/gpio.h
+#include <linux/gpio.h>
+#endif
diff --git a/arch/metag/include/asm/highmem.h b/arch/metag/include/asm/highmem.h
new file mode 100644
index 0000000..6646a15
--- /dev/null
+++ b/arch/metag/include/asm/highmem.h
@@ -0,0 +1,62 @@
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+/*
+ * Ordering is (from lower to higher memory addresses):
+ *
+ * high_memory
+ *			Persistent kmap area
+ * PKMAP_BASE
+ *			fixed_addresses
+ * FIXADDR_START
+ * FIXADDR_TOP
+ *			Vmalloc area
+ * VMALLOC_START
+ * VMALLOC_END
+ */
+#define PKMAP_BASE		(FIXADDR_START - PMD_SIZE)
+#define LAST_PKMAP		PTRS_PER_PTE
+#define LAST_PKMAP_MASK		(LAST_PKMAP - 1)
+#define PKMAP_NR(virt)		(((virt) - PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)		(PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+#define kmap_prot		PAGE_KERNEL
+
+static inline void flush_cache_kmaps(void)
+{
+	flush_cache_all();
+}
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *pkmap_page_table;
+
+extern void *kmap_high(struct page *page);
+extern void kunmap_high(struct page *page);
+
+extern void kmap_init(void);
+
+/*
+ * The following functions are already defined by <linux/highmem.h>
+ * when CONFIG_HIGHMEM is not set.
+ */
+#ifdef CONFIG_HIGHMEM
+extern void *kmap(struct page *page);
+extern void kunmap(struct page *page);
+extern void *kmap_atomic(struct page *page);
+extern void __kunmap_atomic(void *kvaddr);
+extern void *kmap_atomic_pfn(unsigned long pfn);
+extern struct page *kmap_atomic_to_page(void *ptr);
+#endif
+
+#endif
diff --git a/arch/metag/include/asm/hugetlb.h b/arch/metag/include/asm/hugetlb.h
new file mode 100644
index 0000000..f545477
--- /dev/null
+++ b/arch/metag/include/asm/hugetlb.h
@@ -0,0 +1,86 @@
+#ifndef _ASM_METAG_HUGETLB_H
+#define _ASM_METAG_HUGETLB_H
+
+#include <asm/page.h>
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr,
+					 unsigned long len) {
+	return 0;
+}
+
+int prepare_hugepage_range(struct file *file, unsigned long addr,
+						unsigned long len);
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+#endif /* _ASM_METAG_HUGETLB_H */
diff --git a/arch/metag/include/asm/hwthread.h b/arch/metag/include/asm/hwthread.h
new file mode 100644
index 0000000..8f97866
--- /dev/null
+++ b/arch/metag/include/asm/hwthread.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies
+ */
+#ifndef __METAG_HWTHREAD_H
+#define __METAG_HWTHREAD_H
+
+#include <linux/bug.h>
+#include <linux/io.h>
+
+#include <asm/metag_mem.h>
+
+#define BAD_HWTHREAD_ID		(0xFFU)
+#define BAD_CPU_ID		(0xFFU)
+
+extern u8 cpu_2_hwthread_id[];
+extern u8 hwthread_id_2_cpu[];
+
+/*
+ * Each hardware thread's Control Unit registers are memory-mapped
+ * and can therefore be accessed by any other hardware thread.
+ *
+ * This helper function returns the memory address where "thread"'s
+ * register "regnum" is mapped.
+ */
+static inline
+void __iomem *__CU_addr(unsigned int thread, unsigned int regnum)
+{
+	unsigned int base, thread_offset, thread_regnum;
+
+	WARN_ON(thread == BAD_HWTHREAD_ID);
+
+	base = T0UCTREG0;	/* Control unit base */
+
+	thread_offset = TnUCTRX_STRIDE * thread;
+	thread_regnum = TXUCTREGn_STRIDE * regnum;
+
+	return (void __iomem *)(base + thread_offset + thread_regnum);
+}
+
+#endif /* __METAG_HWTHREAD_H */
diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h
new file mode 100644
index 0000000..9359e50
--- /dev/null
+++ b/arch/metag/include/asm/io.h
@@ -0,0 +1,165 @@
+#ifndef _ASM_METAG_IO_H
+#define _ASM_METAG_IO_H
+
+#include <linux/types.h>
+
+#define IO_SPACE_LIMIT  0
+
+#define page_to_bus page_to_phys
+#define bus_to_page phys_to_page
+
+/*
+ * Generic I/O
+ */
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	u8 ret;
+	asm volatile("GETB %0,[%1]"
+		     : "=da" (ret)
+		     : "da" (addr)
+		     : "memory");
+	return ret;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	u16 ret;
+	asm volatile("GETW %0,[%1]"
+		     : "=da" (ret)
+		     : "da" (addr)
+		     : "memory");
+	return ret;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	u32 ret;
+	asm volatile("GETD %0,[%1]"
+		     : "=da" (ret)
+		     : "da" (addr)
+		     : "memory");
+	return ret;
+}
+
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	u64 ret;
+	asm volatile("GETL %0,%t0,[%1]"
+		     : "=da" (ret)
+		     : "da" (addr)
+		     : "memory");
+	return ret;
+}
+
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
+{
+	asm volatile("SETB [%0],%1"
+		     :
+		     : "da" (addr),
+		       "da" (b)
+		     : "memory");
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 b, volatile void __iomem *addr)
+{
+	asm volatile("SETW [%0],%1"
+		     :
+		     : "da" (addr),
+		       "da" (b)
+		     : "memory");
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 b, volatile void __iomem *addr)
+{
+	asm volatile("SETD [%0],%1"
+		     :
+		     : "da" (addr),
+		       "da" (b)
+		     : "memory");
+}
+
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 b, volatile void __iomem *addr)
+{
+	asm volatile("SETL [%0],%1,%t1"
+		     :
+		     : "da" (addr),
+		       "da" (b)
+		     : "memory");
+}
+
+/*
+ * The generic io.h can define all the other generic accessors
+ */
+
+#include <asm-generic/io.h>
+
+/*
+ * Despite being a 32bit architecture, Meta can do 64bit memory accesses
+ * (assuming the bus supports it).
+ */
+
+#define readq	__raw_readq
+#define writeq	__raw_writeq
+
+/*
+ * Meta specific I/O for accessing non-MMU areas.
+ *
+ * These can be provided with a physical address rather than an __iomem pointer
+ * and should only be used by core architecture code for accessing fixed core
+ * registers. Generic drivers should use ioremap and the generic I/O accessors.
+ */
+
+#define metag_in8(addr)		__raw_readb((volatile void __iomem *)(addr))
+#define metag_in16(addr)	__raw_readw((volatile void __iomem *)(addr))
+#define metag_in32(addr)	__raw_readl((volatile void __iomem *)(addr))
+#define metag_in64(addr)	__raw_readq((volatile void __iomem *)(addr))
+
+#define metag_out8(b, addr)	__raw_writeb(b, (volatile void __iomem *)(addr))
+#define metag_out16(b, addr)	__raw_writew(b, (volatile void __iomem *)(addr))
+#define metag_out32(b, addr)	__raw_writel(b, (volatile void __iomem *)(addr))
+#define metag_out64(b, addr)	__raw_writeq(b, (volatile void __iomem *)(addr))
+
+/*
+ * io remapping functions
+ */
+
+extern void __iomem *__ioremap(unsigned long offset,
+			       size_t size, unsigned long flags);
+extern void __iounmap(void __iomem *addr);
+
+/**
+ *	ioremap		-	map bus memory into CPU space
+ *	@offset:	bus address of the memory
+ *	@size:		size of the resource to map
+ *
+ *	ioremap performs a platform specific sequence of operations to
+ *	make bus memory CPU accessible via the readb/readw/readl/writeb/
+ *	writew/writel functions and the other mmio helpers. The returned
+ *	address is not guaranteed to be usable directly as a virtual
+ *	address.
+ */
+#define ioremap(offset, size)                   \
+	__ioremap((offset), (size), 0)
+
+#define ioremap_nocache(offset, size)           \
+	__ioremap((offset), (size), 0)
+
+#define ioremap_cached(offset, size)            \
+	__ioremap((offset), (size), _PAGE_CACHEABLE)
+
+#define ioremap_wc(offset, size)                \
+	__ioremap((offset), (size), _PAGE_WR_COMBINE)
+
+#define iounmap(addr)                           \
+	__iounmap(addr)
+
+#endif  /* _ASM_METAG_IO_H */
diff --git a/arch/metag/include/asm/irq.h b/arch/metag/include/asm/irq.h
new file mode 100644
index 0000000..be0c8f3
--- /dev/null
+++ b/arch/metag/include/asm/irq.h
@@ -0,0 +1,32 @@
+#ifndef __ASM_METAG_IRQ_H
+#define __ASM_METAG_IRQ_H
+
+#ifdef CONFIG_4KSTACKS
+extern void irq_ctx_init(int cpu);
+extern void irq_ctx_exit(int cpu);
+# define __ARCH_HAS_DO_SOFTIRQ
+#else
+# define irq_ctx_init(cpu) do { } while (0)
+# define irq_ctx_exit(cpu) do { } while (0)
+#endif
+
+void tbi_startup_interrupt(int);
+void tbi_shutdown_interrupt(int);
+
+struct pt_regs;
+
+int tbisig_map(unsigned int hw);
+extern void do_IRQ(int irq, struct pt_regs *regs);
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+int traps_save_context(void);
+int traps_restore_context(void);
+#endif
+
+#include <asm-generic/irq.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void migrate_irqs(void);
+#endif
+
+#endif /* __ASM_METAG_IRQ_H */
diff --git a/arch/metag/include/asm/irqflags.h b/arch/metag/include/asm/irqflags.h
new file mode 100644
index 0000000..339b16f
--- /dev/null
+++ b/arch/metag/include/asm/irqflags.h
@@ -0,0 +1,93 @@
+/*
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() functions from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/core_reg.h>
+#include <asm/metag_regs.h>
+
+#define INTS_OFF_MASK TXSTATI_BGNDHALT_BIT
+
+#ifdef CONFIG_SMP
+extern unsigned int get_trigger_mask(void);
+#else
+
+extern unsigned int global_trigger_mask;
+
+static inline unsigned int get_trigger_mask(void)
+{
+	return global_trigger_mask;
+}
+#endif
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return __core_reg_get(TXMASKI);
+}
+
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & ~INTS_OFF_MASK) == 0;
+}
+
+static inline int arch_irqs_disabled(void)
+{
+	unsigned long flags = arch_local_save_flags();
+
+	return arch_irqs_disabled_flags(flags);
+}
+
+static inline unsigned long __irqs_disabled(void)
+{
+	/*
+	 * We shouldn't enable exceptions if they are not already
+	 * enabled. This is required for chancalls to work correctly.
+	 */
+	return arch_local_save_flags() & INTS_OFF_MASK;
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = __irqs_disabled();
+
+	asm volatile("SWAP %0,TXMASKI\n" : "=r" (flags) : "0" (flags)
+		     : "memory");
+
+	return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory");
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	unsigned long flags = __irqs_disabled();
+
+	asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory");
+}
+
+#ifdef CONFIG_SMP
+/* Avoid circular include dependencies through <linux/preempt.h> */
+void arch_local_irq_enable(void);
+#else
+static inline void arch_local_irq_enable(void)
+{
+	arch_local_irq_restore(get_trigger_mask());
+}
+#endif
+
+#endif /* (__ASSEMBLY__) */
+
+#endif /* !(_ASM_IRQFLAGS_H) */
diff --git a/arch/metag/include/asm/l2cache.h b/arch/metag/include/asm/l2cache.h
new file mode 100644
index 0000000..bffbeaa
--- /dev/null
+++ b/arch/metag/include/asm/l2cache.h
@@ -0,0 +1,258 @@
+#ifndef _METAG_L2CACHE_H
+#define _METAG_L2CACHE_H
+
+#ifdef CONFIG_METAG_L2C
+
+#include <asm/global_lock.h>
+#include <asm/io.h>
+
+/*
+ * Store the last known value of pfenable (we don't want prefetch enabled while
+ * L2 is off).
+ */
+extern int l2c_pfenable;
+
+/* defined in arch/metag/drivers/core-sysfs.c */
+extern struct sysdev_class cache_sysclass;
+
+static inline void wr_fence(void);
+
+/*
+ * Functions for reading of L2 cache configuration.
+ */
+
+/* Get raw L2 config register (CORE_CONFIG3) */
+static inline unsigned int meta_l2c_config(void)
+{
+	const unsigned int *corecfg3 = (const unsigned int *)METAC_CORE_CONFIG3;
+	return *corecfg3;
+}
+
+/* Get whether the L2 is present */
+static inline int meta_l2c_is_present(void)
+{
+	return meta_l2c_config() & METAC_CORECFG3_L2C_HAVE_L2C_BIT;
+}
+
+/* Get whether the L2 is configured for write-back instead of write-through */
+static inline int meta_l2c_is_writeback(void)
+{
+	return meta_l2c_config() & METAC_CORECFG3_L2C_MODE_BIT;
+}
+
+/* Get whether the L2 is unified instead of separated code/data */
+static inline int meta_l2c_is_unified(void)
+{
+	return meta_l2c_config() & METAC_CORECFG3_L2C_UNIFIED_BIT;
+}
+
+/* Get the L2 cache size in bytes */
+static inline unsigned int meta_l2c_size(void)
+{
+	unsigned int size_s;
+	if (!meta_l2c_is_present())
+		return 0;
+	size_s = (meta_l2c_config() & METAC_CORECFG3_L2C_SIZE_BITS)
+			>> METAC_CORECFG3_L2C_SIZE_S;
+	/* L2CSIZE is in KiB */
+	return 1024 << size_s;
+}
+
+/* Get the number of ways in the L2 cache */
+static inline unsigned int meta_l2c_ways(void)
+{
+	unsigned int ways_s;
+	if (!meta_l2c_is_present())
+		return 0;
+	ways_s = (meta_l2c_config() & METAC_CORECFG3_L2C_NUM_WAYS_BITS)
+			>> METAC_CORECFG3_L2C_NUM_WAYS_S;
+	return 0x1 << ways_s;
+}
+
+/* Get the line size of the L2 cache */
+static inline unsigned int meta_l2c_linesize(void)
+{
+	unsigned int line_size;
+	if (!meta_l2c_is_present())
+		return 0;
+	line_size = (meta_l2c_config() & METAC_CORECFG3_L2C_LINE_SIZE_BITS)
+			>> METAC_CORECFG3_L2C_LINE_SIZE_S;
+	switch (line_size) {
+	case METAC_CORECFG3_L2C_LINE_SIZE_64B:
+		return 64;
+	default:
+		return 0;
+	}
+}
+
+/* Get the revision ID of the L2 cache */
+static inline unsigned int meta_l2c_revision(void)
+{
+	return (meta_l2c_config() & METAC_CORECFG3_L2C_REV_ID_BITS)
+			>> METAC_CORECFG3_L2C_REV_ID_S;
+}
+
+
+/*
+ * Start an initialisation of the L2 cachelines and wait for completion.
+ * This should only be done in a LOCK1 or LOCK2 critical section while the L2
+ * is disabled.
+ */
+static inline void _meta_l2c_init(void)
+{
+	metag_out32(SYSC_L2C_INIT_INIT, SYSC_L2C_INIT);
+	while (metag_in32(SYSC_L2C_INIT) == SYSC_L2C_INIT_IN_PROGRESS)
+		/* do nothing */;
+}
+
+/*
+ * Start a writeback of dirty L2 cachelines and wait for completion.
+ * This should only be done in a LOCK1 or LOCK2 critical section.
+ */
+static inline void _meta_l2c_purge(void)
+{
+	metag_out32(SYSC_L2C_PURGE_PURGE, SYSC_L2C_PURGE);
+	while (metag_in32(SYSC_L2C_PURGE) == SYSC_L2C_PURGE_IN_PROGRESS)
+		/* do nothing */;
+}
+
+/* Set whether the L2 cache is enabled. */
+static inline void _meta_l2c_enable(int enabled)
+{
+	unsigned int enable;
+
+	enable = metag_in32(SYSC_L2C_ENABLE);
+	if (enabled)
+		enable |= SYSC_L2C_ENABLE_ENABLE_BIT;
+	else
+		enable &= ~SYSC_L2C_ENABLE_ENABLE_BIT;
+	metag_out32(enable, SYSC_L2C_ENABLE);
+}
+
+/* Set whether the L2 cache prefetch is enabled. */
+static inline void _meta_l2c_pf_enable(int pfenabled)
+{
+	unsigned int enable;
+
+	enable = metag_in32(SYSC_L2C_ENABLE);
+	if (pfenabled)
+		enable |= SYSC_L2C_ENABLE_PFENABLE_BIT;
+	else
+		enable &= ~SYSC_L2C_ENABLE_PFENABLE_BIT;
+	metag_out32(enable, SYSC_L2C_ENABLE);
+}
+
+/* Return whether the L2 cache is enabled */
+static inline int _meta_l2c_is_enabled(void)
+{
+	return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_ENABLE_BIT;
+}
+
+/* Return whether the L2 cache prefetch is enabled */
+static inline int _meta_l2c_pf_is_enabled(void)
+{
+	return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_PFENABLE_BIT;
+}
+
+
+/* Return whether the L2 cache is enabled */
+static inline int meta_l2c_is_enabled(void)
+{
+	int en;
+
+	/*
+	 * There is no need to lock at the moment, as the enable bit is never
+	 * intermediately changed, so we will never see an intermediate result.
+	 */
+	en = _meta_l2c_is_enabled();
+
+	return en;
+}
+
+/*
+ * Ensure the L2 cache is disabled.
+ * Return whether the L2 was previously disabled.
+ */
+int meta_l2c_disable(void);
+
+/*
+ * Ensure the L2 cache is enabled.
+ * Return whether the L2 was previously enabled.
+ */
+int meta_l2c_enable(void);
+
+/* Return whether the L2 cache prefetch is enabled */
+static inline int meta_l2c_pf_is_enabled(void)
+{
+	return l2c_pfenable;
+}
+
+/*
+ * Set whether the L2 cache prefetch is enabled.
+ * Return whether the L2 prefetch was previously enabled.
+ */
+int meta_l2c_pf_enable(int pfenable);
+
+/*
+ * Flush the L2 cache.
+ * Return 1 if the L2 is disabled.
+ */
+int meta_l2c_flush(void);
+
+/*
+ * Write back all dirty cache lines in the L2 cache.
+ * Return 1 if the L2 is disabled or there isn't any writeback.
+ */
+static inline int meta_l2c_writeback(void)
+{
+	unsigned long flags;
+	int en;
+
+	/* no need to purge if it's not a writeback cache */
+	if (!meta_l2c_is_writeback())
+		return 1;
+
+	/*
+	 * Purge only works if the L2 is enabled, and involves reading back to
+	 * detect completion, so keep this operation atomic with other threads.
+	 */
+	__global_lock1(flags);
+	en = meta_l2c_is_enabled();
+	if (likely(en)) {
+		wr_fence();
+		_meta_l2c_purge();
+	}
+	__global_unlock1(flags);
+
+	return !en;
+}
+
+#else /* CONFIG_METAG_L2C */
+
+#define meta_l2c_config()		0
+#define meta_l2c_is_present()		0
+#define meta_l2c_is_writeback()		0
+#define meta_l2c_is_unified()		0
+#define meta_l2c_size()			0
+#define meta_l2c_ways()			0
+#define meta_l2c_linesize()		0
+#define meta_l2c_revision()		0
+
+#define meta_l2c_is_enabled()		0
+#define _meta_l2c_pf_is_enabled()	0
+#define meta_l2c_pf_is_enabled()	0
+#define meta_l2c_disable()		1
+#define meta_l2c_enable()		0
+#define meta_l2c_pf_enable(X)		0
+static inline int meta_l2c_flush(void)
+{
+	return 1;
+}
+static inline int meta_l2c_writeback(void)
+{
+	return 1;
+}
+
+#endif /* CONFIG_METAG_L2C */
+
+#endif /* _METAG_L2CACHE_H */
diff --git a/arch/metag/include/asm/linkage.h b/arch/metag/include/asm/linkage.h
new file mode 100644
index 0000000..73bf25b
--- /dev/null
+++ b/arch/metag/include/asm/linkage.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN .p2align 2
+#define __ALIGN_STR ".p2align 2"
+
+#endif
diff --git a/arch/metag/include/asm/mach/arch.h b/arch/metag/include/asm/mach/arch.h
new file mode 100644
index 0000000..12c5664
--- /dev/null
+++ b/arch/metag/include/asm/mach/arch.h
@@ -0,0 +1,86 @@
+/*
+ * arch/metag/include/asm/mach/arch.h
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * based on the ARM version:
+ *  Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _METAG_MACH_ARCH_H_
+#define _METAG_MACH_ARCH_H_
+
+#include <linux/stddef.h>
+
+#include <asm/clock.h>
+
+/**
+ * struct machine_desc - Describes a board controlled by a Meta.
+ * @name:		Board/SoC name.
+ * @dt_compat:		Array of device tree 'compatible' strings.
+ * @clocks:		Clock callbacks.
+ *
+ * @nr_irqs:		Maximum number of IRQs.
+ *			If 0, defaults to NR_IRQS in asm-generic/irq.h.
+ *
+ * @init_early:		Early init callback.
+ * @init_irq:		IRQ init callback for setting up IRQ controllers.
+ * @init_machine:	Arch init callback for setting up devices.
+ * @init_late:		Late init callback.
+ *
+ * This structure is provided by each board which can be controlled by a Meta.
+ * It is chosen by matching the compatible strings in the device tree provided
+ * by the bootloader with the strings in @dt_compat, and sets up any aspects of
+ * the machine that aren't configured with device tree (yet).
+ */
+struct machine_desc {
+	const char		*name;
+	const char		**dt_compat;
+	struct meta_clock_desc	*clocks;
+
+	unsigned int		nr_irqs;
+
+	void			(*init_early)(void);
+	void			(*init_irq)(void);
+	void			(*init_machine)(void);
+	void			(*init_late)(void);
+};
+
+/*
+ * Current machine - only accessible during boot.
+ */
+extern struct machine_desc *machine_desc;
+
+/*
+ * Machine type table - also only accessible during boot
+ */
+extern struct machine_desc __arch_info_begin[], __arch_info_end[];
+#define for_each_machine_desc(p)			\
+	for (p = __arch_info_begin; p < __arch_info_end; p++)
+
+static inline struct machine_desc *default_machine_desc(void)
+{
+	/* the default machine is the last one linked in */
+	if (__arch_info_end - 1 < __arch_info_begin)
+		return NULL;
+	return __arch_info_end - 1;
+}
+
+/*
+ * Set of macros to define architecture features.  This is built into
+ * a table by the linker.
+ */
+#define MACHINE_START(_type, _name)			\
+static const struct machine_desc __mach_desc_##_type	\
+__used							\
+__attribute__((__section__(".arch.info.init"))) = {	\
+	.name		= _name,
+
+#define MACHINE_END				\
+};
+
+#endif /* _METAG_MACH_ARCH_H_ */
diff --git a/arch/metag/include/asm/metag_isa.h b/arch/metag/include/asm/metag_isa.h
new file mode 100644
index 0000000..c8aa2ae
--- /dev/null
+++ b/arch/metag/include/asm/metag_isa.h
@@ -0,0 +1,81 @@
+/*
+ * asm/metag_isa.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta instruction set.
+ */
+
+#ifndef _ASM_METAG_ISA_H_
+#define _ASM_METAG_ISA_H_
+
+
+/* L1 cache layout */
+
+/* Data cache line size as bytes and shift */
+#define DCACHE_LINE_BYTES 64
+#define DCACHE_LINE_S     6
+
+/* Number of ways in the data cache */
+#define DCACHE_WAYS       4
+
+/* Instruction cache line size as bytes and shift */
+#define ICACHE_LINE_BYTES 64
+#define ICACHE_LINE_S     6
+
+/* Number of ways in the instruction cache */
+#define ICACHE_WAYS       4
+
+
+/*
+ * CACHEWD/CACHEWL instructions use the bottom 8 bits of the data presented to
+ * control the operation actually achieved.
+ */
+/* Use of these two bits should be discouraged since the bits dont have
+ * consistent meanings
+ */
+#define CACHEW_ICACHE_BIT           0x01
+#define CACHEW_TLBFLUSH_BIT         0x02
+
+#define CACHEW_FLUSH_L1D_L2         0x0
+#define CACHEW_INVALIDATE_L1I       0x1
+#define CACHEW_INVALIDATE_L1DTLB    0x2
+#define CACHEW_INVALIDATE_L1ITLB    0x3
+#define CACHEW_WRITEBACK_L1D_L2     0x4
+#define CACHEW_INVALIDATE_L1D       0x8
+#define CACHEW_INVALIDATE_L1D_L2    0xC
+
+/*
+ * CACHERD/CACHERL instructions use bits 3:5 of the address presented to
+ * control the operation achieved and hence the specific result.
+ */
+#define CACHER_ADDR_BITS            0xFFFFFFC0
+#define CACHER_OPER_BITS            0x00000030
+#define CACHER_OPER_S               4
+#define     CACHER_OPER_LINPHY          0
+#define CACHER_ICACHE_BIT           0x00000008
+#define CACHER_ICACHE_S             3
+
+/*
+ * CACHERD/CACHERL LINPHY Oper result is one/two 32-bit words
+ *
+ *  If CRLINPHY0_VAL_BIT (Bit 0) set then,
+ *      Lower 32-bits corresponds to MMCU_ENTRY_* above.
+ *      Upper 32-bits corresponds to CRLINPHY1_* values below (if requested).
+ *  else
+ *      Lower 32-bits corresponds to CRLINPHY0_* values below.
+ *      Upper 32-bits undefined.
+ */
+#define CRLINPHY0_VAL_BIT      0x00000001
+#define CRLINPHY0_FIRST_BIT    0x00000004 /* Set if VAL=0 due to first level */
+
+#define CRLINPHY1_READ_BIT     0x00000001 /* Set if reads permitted          */
+#define CRLINPHY1_SINGLE_BIT   0x00000004 /* Set if TLB does not cache entry */
+#define CRLINPHY1_PAGEMSK_BITS 0x0000FFF0 /* Set to ((2^n-1)>>12) value      */
+#define CRLINPHY1_PAGEMSK_S    4
+
+#endif /* _ASM_METAG_ISA_H_ */
diff --git a/arch/metag/include/asm/metag_mem.h b/arch/metag/include/asm/metag_mem.h
new file mode 100644
index 0000000..3f7b54d
--- /dev/null
+++ b/arch/metag/include/asm/metag_mem.h
@@ -0,0 +1,1106 @@
+/*
+ * asm/metag_mem.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta (memory-mapped) registers.
+ */
+
+#ifndef _ASM_METAG_MEM_H_
+#define _ASM_METAG_MEM_H_
+
+/*****************************************************************************
+ *                   META MEMORY MAP LINEAR ADDRESS VALUES
+ ****************************************************************************/
+/*
+ * COMMON MEMORY MAP
+ * -----------------
+ */
+
+#define LINSYSTEM_BASE  0x00200000
+#define LINSYSTEM_LIMIT 0x07FFFFFF
+
+/* Linear cache flush now implemented via DCACHE instruction. These defines
+   related to a special region that used to exist for achieving cache flushes.
+ */
+#define         LINSYSLFLUSH_S 0
+
+#define     LINSYSRES0_BASE     0x00200000
+#define     LINSYSRES0_LIMIT    0x01FFFFFF
+
+#define     LINSYSCUSTOM_BASE 0x02000000
+#define     LINSYSCUSTOM_LIMIT   0x02FFFFFF
+
+#define     LINSYSEXPAND_BASE 0x03000000
+#define     LINSYSEXPAND_LIMIT   0x03FFFFFF
+
+#define     LINSYSEVENT_BASE  0x04000000
+#define         LINSYSEVENT_WR_ATOMIC_UNLOCK    0x04000000
+#define         LINSYSEVENT_WR_ATOMIC_LOCK      0x04000040
+#define         LINSYSEVENT_WR_CACHE_DISABLE    0x04000080
+#define         LINSYSEVENT_WR_CACHE_ENABLE     0x040000C0
+#define         LINSYSEVENT_WR_COMBINE_FLUSH    0x04000100
+#define         LINSYSEVENT_WR_FENCE            0x04000140
+#define     LINSYSEVENT_LIMIT   0x04000FFF
+
+#define     LINSYSCFLUSH_BASE   0x04400000
+#define         LINSYSCFLUSH_DCACHE_LINE    0x04400000
+#define         LINSYSCFLUSH_ICACHE_LINE    0x04500000
+#define         LINSYSCFLUSH_MMCU           0x04700000
+#ifndef METAC_1_2
+#define         LINSYSCFLUSH_TxMMCU_BASE    0x04700020
+#define         LINSYSCFLUSH_TxMMCU_STRIDE  0x00000008
+#endif
+#define         LINSYSCFLUSH_ADDR_BITS      0x000FFFFF
+#define         LINSYSCFLUSH_ADDR_S         0
+#define     LINSYSCFLUSH_LIMIT  0x047FFFFF
+
+#define     LINSYSCTRL_BASE     0x04800000
+#define     LINSYSCTRL_LIMIT    0x04FFFFFF
+
+#define     LINSYSMTABLE_BASE   0x05000000
+#define     LINSYSMTABLE_LIMIT  0x05FFFFFF
+
+#define     LINSYSDIRECT_BASE   0x06000000
+#define     LINSYSDIRECT_LIMIT  0x07FFFFFF
+
+#define LINLOCAL_BASE   0x08000000
+#define LINLOCAL_LIMIT  0x7FFFFFFF
+
+#define LINCORE_BASE    0x80000000
+#define LINCORE_LIMIT   0x87FFFFFF
+
+#define LINCORE_CODE_BASE  0x80000000
+#define LINCORE_CODE_LIMIT 0x81FFFFFF
+
+#define LINCORE_DATA_BASE  0x82000000
+#define LINCORE_DATA_LIMIT 0x83FFFFFF
+
+
+/* The core can support locked icache lines in this region */
+#define LINCORE_ICACHE_BASE  0x84000000
+#define LINCORE_ICACHE_LIMIT 0x85FFFFFF
+
+/* The core can support locked dcache lines in this region */
+#define LINCORE_DCACHE_BASE  0x86000000
+#define LINCORE_DCACHE_LIMIT 0x87FFFFFF
+
+#define LINGLOBAL_BASE  0x88000000
+#define LINGLOBAL_LIMIT 0xFFFDFFFF
+
+/*
+ * CHIP Core Register Map
+ * ----------------------
+ */
+#define CORE_HWBASE     0x04800000
+#define PRIV_HWBASE     0x04810000
+#define TRIG_HWBASE     0x04820000
+#define SYSC_HWBASE     0x04830000
+
+/*****************************************************************************
+ *         INTER-THREAD KICK REGISTERS FOR SOFTWARE EVENT GENERATION
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that can be used to supply
+ * kicks to threads that service arbitrary software events.
+ */
+
+#define T0KICK     0x04800800   /* Background kick 0     */
+#define     TXXKICK_MAX 0xFFFF  /* Maximum kicks */
+#define     TnXKICK_STRIDE      0x00001000  /* Thread scale value    */
+#define     TnXKICK_STRIDE_S    12
+#define T0KICKI    0x04800808   /* Interrupt kick 0      */
+#define     TXIKICK_OFFSET  0x00000008  /* Int level offset value */
+#define T1KICK     0x04801800   /* Background kick 1     */
+#define T1KICKI    0x04801808   /* Interrupt kick 1      */
+#define T2KICK     0x04802800   /* Background kick 2     */
+#define T2KICKI    0x04802808   /* Interrupt kick 2      */
+#define T3KICK     0x04803800   /* Background kick 3     */
+#define T3KICKI    0x04803808   /* Interrupt kick 3      */
+
+/*****************************************************************************
+ *                GLOBAL REGISTER ACCESS RESOURCES
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that allow access to the
+ * internal state of all threads in order to allow global set-up of thread
+ * state and external handling of thread events, errors, or debugging.
+ *
+ * The actual unit and register index values needed to access individul
+ * registers are chip specific see - METAC_TXUXX_VALUES in metac_x_y.h.
+ * However two C array initialisers TXUXX_MASKS and TGUXX_MASKS will always be
+ * defined to allow arbitrary loading, display, and saving of all valid
+ * register states without detailed knowledge of their purpose - TXUXX sets
+ * bits for all valid registers and TGUXX sets bits for the sub-set which are
+ * global.
+ */
+
+#define T0UCTREG0   0x04800000  /* Access to all CT regs */
+#define TnUCTRX_STRIDE      0x00001000  /* Thread scale value    */
+#define TXUCTREGn_STRIDE    0x00000008  /* Register scale value  */
+
+#define TXUXXRXDT  0x0480FFF0   /* Data to/from any threads reg */
+#define TXUXXRXRQ  0x0480FFF8
+#define     TXUXXRXRQ_DREADY_BIT 0x80000000  /* Poll for done */
+#define     TXUXXRXRQ_DSPEXT_BIT 0x00020000  /* Addr DSP Regs */
+#define     TXUXXRXRQ_RDnWR_BIT  0x00010000  /* Set for read  */
+#define     TXUXXRXRQ_TX_BITS    0x00003000  /* Thread number */
+#define     TXUXXRXRQ_TX_S       12
+#define     TXUXXRXRQ_RX_BITS    0x000001F0  /* Register num  */
+#define     TXUXXRXRQ_RX_S       4
+#define         TXUXXRXRQ_DSPRARD0    0      /* DSP RAM A Read Pointer 0 */
+#define         TXUXXRXRQ_DSPRARD1    1      /* DSP RAM A Read Pointer 1 */
+#define         TXUXXRXRQ_DSPRAWR0    2      /* DSP RAM A Write Pointer 0 */
+#define         TXUXXRXRQ_DSPRAWR2    3      /* DSP RAM A Write Pointer 1 */
+#define         TXUXXRXRQ_DSPRBRD0    4      /* DSP RAM B Read Pointer 0 */
+#define         TXUXXRXRQ_DSPRBRD1    5      /* DSP RAM B Read Pointer 1 */
+#define         TXUXXRXRQ_DSPRBWR0    6      /* DSP RAM B Write Pointer 0 */
+#define         TXUXXRXRQ_DSPRBWR1    7      /* DSP RAM B Write Pointer 1 */
+#define         TXUXXRXRQ_DSPRARINC0  8      /* DSP RAM A Read Increment 0 */
+#define         TXUXXRXRQ_DSPRARINC1  9      /* DSP RAM A Read Increment 1 */
+#define         TXUXXRXRQ_DSPRAWINC0 10      /* DSP RAM A Write Increment 0 */
+#define         TXUXXRXRQ_DSPRAWINC1 11      /* DSP RAM A Write Increment 1 */
+#define         TXUXXRXRQ_DSPRBRINC0 12      /* DSP RAM B Read Increment 0 */
+#define         TXUXXRXRQ_DSPRBRINC1 13      /* DSP RAM B Read Increment 1 */
+#define         TXUXXRXRQ_DSPRBWINC0 14      /* DSP RAM B Write Increment 0 */
+#define         TXUXXRXRQ_DSPRBWINC1 15      /* DSP RAM B Write Increment 1 */
+
+#define         TXUXXRXRQ_ACC0L0     16      /* Accumulator 0 bottom 32-bits */
+#define         TXUXXRXRQ_ACC1L0     17      /* Accumulator 1 bottom 32-bits */
+#define         TXUXXRXRQ_ACC2L0     18      /* Accumulator 2 bottom 32-bits */
+#define         TXUXXRXRQ_ACC3L0     19      /* Accumulator 3 bottom 32-bits */
+#define         TXUXXRXRQ_ACC0HI     20      /* Accumulator 0 top 8-bits */
+#define         TXUXXRXRQ_ACC1HI     21      /* Accumulator 1 top 8-bits */
+#define         TXUXXRXRQ_ACC2HI     22      /* Accumulator 2 top 8-bits */
+#define         TXUXXRXRQ_ACC3HI     23      /* Accumulator 3 top 8-bits */
+#define     TXUXXRXRQ_UXX_BITS   0x0000000F  /* Unit number   */
+#define     TXUXXRXRQ_UXX_S      0
+
+/*****************************************************************************
+ *          PRIVILEGE CONTROL VALUES FOR MEMORY MAPPED RESOURCES
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that give control over and
+ * the privilege required to access other memory mapped resources. These
+ * registers themselves always require privilege to update them.
+ */
+
+#define TXPRIVREG_STRIDE    0x8 /* Delta between per-thread regs */
+#define TXPRIVREG_STRIDE_S  3
+
+/*
+ * Each bit 0 to 15 defines privilege required to access internal register
+ * regions 0x04800000 to 0x048FFFFF in 64k chunks
+ */
+#define T0PIOREG    0x04810100
+#define T1PIOREG    0x04810108
+#define T2PIOREG    0x04810110
+#define T3PIOREG    0x04810118
+
+/*
+ * Each bit 0 to 31 defines privilege required to use the pair of
+ * system events implemented as writee in the regions 0x04000000 to
+ * 0x04000FFF in 2*64 byte chunks.
+ */
+#define T0PSYREG    0x04810180
+#define T1PSYREG    0x04810188
+#define T2PSYREG    0x04810190
+#define T3PSYREG    0x04810198
+
+/*
+ * CHIP PRIV CONTROLS
+ * ------------------
+ */
+
+/* The TXPIOREG register holds a bit mask directly mappable to
+   corresponding addresses in the range 0x04800000 to 049FFFFF */
+#define     TXPIOREG_ADDR_BITS  0x1F0000 /* Up to 32x64K bytes */
+#define     TXPIOREG_ADDR_S     16
+
+/* Hence based on the _HWBASE values ... */
+#define     TXPIOREG_CORE_BIT       (1<<((0x04800000>>16)&0x1F))
+#define     TXPIOREG_PRIV_BIT       (1<<((0x04810000>>16)&0x1F))
+#define     TXPIOREG_TRIG_BIT       (1<<((0x04820000>>16)&0x1F))
+#define     TXPIOREG_SYSC_BIT       (1<<((0x04830000>>16)&0x1F))
+
+#define     TXPIOREG_WRC_BIT          0x00080000  /* Wr combiner reg priv */
+#define     TXPIOREG_LOCALBUS_RW_BIT  0x00040000  /* Local bus rd/wr priv */
+#define     TXPIOREG_SYSREGBUS_RD_BIT 0x00020000  /* Sys reg bus write priv */
+#define     TXPIOREG_SYSREGBUS_WR_BIT 0x00010000  /* Sys reg bus read priv */
+
+/* CORE region privilege controls */
+#define T0PRIVCORE 0x04800828
+#define         TXPRIVCORE_TXBKICK_BIT   0x001  /* Background kick priv */
+#define         TXPRIVCORE_TXIKICK_BIT   0x002  /* Interrupt kick priv  */
+#define         TXPRIVCORE_TXAMAREGX_BIT 0x004  /* TXAMAREG4|5|6 priv   */
+#define TnPRIVCORE_STRIDE 0x00001000
+
+#define T0PRIVSYSR 0x04810000
+#define     TnPRIVSYSR_STRIDE   0x00000008
+#define     TnPRIVSYSR_STRIDE_S 3
+#define     TXPRIVSYSR_CFLUSH_BIT     0x01
+#define     TXPRIVSYSR_MTABLE_BIT     0x02
+#define     TXPRIVSYSR_DIRECT_BIT     0x04
+#ifdef METAC_1_2
+#define     TXPRIVSYSR_ALL_BITS       0x07
+#else
+#define     TXPRIVSYSR_CORE_BIT       0x08
+#define     TXPRIVSYSR_CORECODE_BIT   0x10
+#define     TXPRIVSYSR_ALL_BITS       0x1F
+#endif
+#define T1PRIVSYSR 0x04810008
+#define T2PRIVSYSR 0x04810010
+#define T3PRIVSYSR 0x04810018
+
+/*****************************************************************************
+ *          H/W TRIGGER STATE/LEVEL REGISTERS AND H/W TRIGGER VECTORS
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that give control over and
+ * the state of hardware trigger sources both external to the META processor
+ * and internal to it.
+ */
+
+#define HWSTATMETA  0x04820000  /* Hardware status/clear META trig */
+#define         HWSTATMETA_T0HALT_BITS 0xF
+#define         HWSTATMETA_T0HALT_S    0
+#define     HWSTATMETA_T0BHALT_BIT 0x1  /* Background HALT */
+#define     HWSTATMETA_T0IHALT_BIT 0x2  /* Interrupt HALT  */
+#define     HWSTATMETA_T0PHALT_BIT 0x4  /* PF/RO Memory HALT */
+#define     HWSTATMETA_T0AMATR_BIT 0x8  /* AMA trigger */
+#define     HWSTATMETA_TnINT_S     4    /* Shift by (thread*4) */
+#define HWSTATEXT   0x04820010  /* H/W status/clear external trigs  0-31 */
+#define HWSTATEXT2  0x04820018  /* H/W status/clear external trigs 32-63 */
+#define HWSTATEXT4  0x04820020  /* H/W status/clear external trigs 64-95 */
+#define HWSTATEXT6  0x04820028  /* H/W status/clear external trigs 96-128 */
+#define HWLEVELEXT  0x04820030  /* Edge/Level type of external trigs  0-31 */
+#define HWLEVELEXT2 0x04820038  /* Edge/Level type of external trigs 32-63 */
+#define HWLEVELEXT4 0x04820040  /* Edge/Level type of external trigs 64-95 */
+#define HWLEVELEXT6 0x04820048  /* Edge/Level type of external trigs 96-128 */
+#define     HWLEVELEXT_XXX_LEVEL 1  /* Level sense logic in HWSTATEXTn */
+#define     HWLEVELEXT_XXX_EDGE  0
+#define HWMASKEXT   0x04820050  /* Enable/disable of external trigs  0-31 */
+#define HWMASKEXT2  0x04820058  /* Enable/disable of external trigs 32-63 */
+#define HWMASKEXT4  0x04820060  /* Enable/disable of external trigs 64-95 */
+#define HWMASKEXT6  0x04820068  /* Enable/disable of external trigs 96-128 */
+#define T0VECINT_BHALT  0x04820500  /* Background HALT trigger vector */
+#define     TXVECXXX_BITS   0xF       /* Per-trigger vector vals 0,1,4-15 */
+#define     TXVECXXX_S  0
+#define T0VECINT_IHALT  0x04820508  /* Interrupt HALT */
+#define T0VECINT_PHALT  0x04820510  /* PF/RO memory fault */
+#define T0VECINT_AMATR  0x04820518  /* AMA trigger */
+#define     TnVECINT_STRIDE 0x00000020  /* Per thread stride */
+#define HWVEC0EXT   0x04820700  /* Vectors for external triggers  0-31 */
+#define HWVEC20EXT  0x04821700  /* Vectors for external triggers 32-63 */
+#define HWVEC40EXT  0x04822700  /* Vectors for external triggers 64-95 */
+#define HWVEC60EXT  0x04823700  /* Vectors for external triggers 96-127 */
+#define     HWVECnEXT_STRIDE 0x00000008 /* Per trigger stride */
+#define HWVECnEXT_DEBUG 0x1         /* Redirect trigger to debug i/f */
+
+/*
+ * CORE HWCODE-BREAKPOINT REGISTERS/VALUES
+ * ---------------------------------------
+ */
+#define CODEB0ADDR         0x0480FF00  /* Address specifier */
+#define     CODEBXADDR_MATCHX_BITS 0xFFFFFFFC
+#define     CODEBXADDR_MATCHX_S    2
+#define CODEB0CTRL         0x0480FF08  /* Control */
+#define     CODEBXCTRL_MATEN_BIT   0x80000000   /* Match 'Enable'  */
+#define     CODEBXCTRL_MATTXEN_BIT 0x10000000   /* Match threadn enable */
+#define     CODEBXCTRL_HITC_BITS   0x00FF0000   /* Hit counter   */
+#define     CODEBXCTRL_HITC_S      16
+#define           CODEBXHITC_NEXT  0xFF     /* Next 'hit' will trigger */
+#define           CODEBXHITC_HIT1  0x00     /* No 'hits' after trigger */
+#define     CODEBXCTRL_MMASK_BITS  0x0000FFFC   /* Mask ADDR_MATCH bits */
+#define     CODEBXCTRL_MMASK_S     2
+#define     CODEBXCTRL_MATLTX_BITS 0x00000003   /* Match threadn LOCAL addr */
+#define     CODEBXCTRL_MATLTX_S    0            /* Match threadn LOCAL addr */
+#define CODEBnXXXX_STRIDE      0x00000010  /* Stride between CODEB reg sets */
+#define CODEBnXXXX_STRIDE_S    4
+#define CODEBnXXXX_LIMIT       3           /* Sets 0-3 */
+
+/*
+ * CORE DATA-WATCHPOINT REGISTERS/VALUES
+ * -------------------------------------
+ */
+#define DATAW0ADDR         0x0480FF40  /* Address specifier */
+#define     DATAWXADDR_MATCHR_BITS 0xFFFFFFF8
+#define     DATAWXADDR_MATCHR_S    3
+#define     DATAWXADDR_MATCHW_BITS 0xFFFFFFFF
+#define     DATAWXADDR_MATCHW_S    0
+#define DATAW0CTRL         0x0480FF48  /* Control */
+#define     DATAWXCTRL_MATRD_BIT   0x80000000   /* Match 'Read'  */
+#ifndef METAC_1_2
+#define     DATAWXCTRL_MATNOTTX_BIT 0x20000000  /* Invert threadn enable */
+#endif
+#define     DATAWXCTRL_MATWR_BIT   0x40000000   /* Match 'Write' */
+#define     DATAWXCTRL_MATTXEN_BIT 0x10000000   /* Match threadn enable */
+#define     DATAWXCTRL_WRSIZE_BITS 0x0F000000   /* Write Match Size */
+#define     DATAWXCTRL_WRSIZE_S    24
+#define         DATAWWRSIZE_ANY   0         /* Any size transaction matches */
+#define         DATAWWRSIZE_8BIT  1     /* Specific sizes ... */
+#define         DATAWWRSIZE_16BIT 2
+#define         DATAWWRSIZE_32BIT 3
+#define         DATAWWRSIZE_64BIT 4
+#define     DATAWXCTRL_HITC_BITS   0x00FF0000   /* Hit counter   */
+#define     DATAWXCTRL_HITC_S      16
+#define           DATAWXHITC_NEXT  0xFF     /* Next 'hit' will trigger */
+#define           DATAWXHITC_HIT1  0x00     /* No 'hits' after trigger */
+#define     DATAWXCTRL_MMASK_BITS 0x0000FFF8    /* Mask ADDR_MATCH bits */
+#define     DATAWXCTRL_MMASK_S    3
+#define     DATAWXCTRL_MATLTX_BITS 0x00000003   /* Match threadn LOCAL addr */
+#define     DATAWXCTRL_MATLTX_S    0            /* Match threadn LOCAL addr */
+#define DATAW0DMATCH0       0x0480FF50 /* Write match data */
+#define DATAW0DMATCH1       0x0480FF58
+#define DATAW0DMASK0        0x0480FF60 /* Write match data mask */
+#define DATAW0DMASK1        0x0480FF68
+#define DATAWnXXXX_STRIDE      0x00000040  /* Stride between DATAW reg sets */
+#define DATAWnXXXX_STRIDE_S    6
+#define DATAWnXXXX_LIMIT       1           /* Sets 0,1 */
+
+/*
+ * CHIP Automatic Mips Allocation control registers
+ * ------------------------------------------------
+ */
+
+/* CORE memory mapped AMA registers */
+#define T0AMAREG4   0x04800810
+#define     TXAMAREG4_POOLSIZE_BITS 0x3FFFFF00
+#define     TXAMAREG4_POOLSIZE_S    8
+#define     TXAMAREG4_AVALUE_BITS   0x000000FF
+#define     TXAMAREG4_AVALUE_S  0
+#define T0AMAREG5   0x04800818
+#define     TXAMAREG5_POOLC_BITS    0x07FFFFFF
+#define         TXAMAREG5_POOLC_S       0
+#define T0AMAREG6   0x04800820
+#define     TXAMAREG6_DLINEDEF_BITS 0x00FFFFF0
+#define         TXAMAREG6_DLINEDEF_S    0
+#define TnAMAREGX_STRIDE    0x00001000
+
+/*
+ * Memory Management Control Unit Table Entries
+ * --------------------------------------------
+ */
+#define MMCU_ENTRY_S         4            /* -> Entry size                */
+#define MMCU_ENTRY_ADDR_BITS 0xFFFFF000   /* Physical address             */
+#define MMCU_ENTRY_ADDR_S    12           /* -> Page size                 */
+#define MMCU_ENTRY_CWIN_BITS 0x000000C0   /* Caching 'window' selection   */
+#define MMCU_ENTRY_CWIN_S    6
+#define     MMCU_CWIN_UNCACHED  0 /* May not be memory etc.  */
+#define     MMCU_CWIN_BURST     1 /* Cached but LRU unset */
+#define     MMCU_CWIN_C1SET     2 /* Cached in 1 set only */
+#define     MMCU_CWIN_CACHED    3 /* Fully cached            */
+#define MMCU_ENTRY_CACHE_BIT 0x00000080   /* Set for cached region         */
+#define     MMCU_ECACHE1_FULL_BIT  0x00000040 /* Use all the sets */
+#define     MMCU_ECACHE0_BURST_BIT 0x00000040 /* Match bursts     */
+#define MMCU_ENTRY_SYS_BIT   0x00000010   /* Sys-coherent access required  */
+#define MMCU_ENTRY_WRC_BIT   0x00000008   /* Write combining allowed       */
+#define MMCU_ENTRY_PRIV_BIT  0x00000004   /* Privilege required            */
+#define MMCU_ENTRY_WR_BIT    0x00000002   /* Writes allowed                */
+#define MMCU_ENTRY_VAL_BIT   0x00000001   /* Entry is valid                */
+
+#ifdef METAC_2_1
+/*
+ * Extended first-level/top table entries have extra/larger fields in later
+ * cores as bits 11:0 previously had no effect in such table entries.
+ */
+#define MMCU_E1ENT_ADDR_BITS 0xFFFFFFC0   /* Physical address             */
+#define MMCU_E1ENT_ADDR_S    6            /*   -> resolution < page size  */
+#define MMCU_E1ENT_PGSZ_BITS 0x0000001E   /* Page size for 2nd level      */
+#define MMCU_E1ENT_PGSZ_S    1
+#define     MMCU_E1ENT_PGSZ0_POWER   12   /* PgSz  0 -> 4K */
+#define     MMCU_E1ENT_PGSZ_MAX      10   /* PgSz 10 -> 4M maximum */
+#define MMCU_E1ENT_MINIM_BIT 0x00000020
+#endif /* METAC_2_1 */
+
+/* MMCU control register in SYSC region */
+#define MMCU_TABLE_PHYS_ADDR        0x04830010
+#define     MMCU_TABLE_PHYS_ADDR_BITS   0xFFFFFFFC
+#ifdef METAC_2_1
+#define     MMCU_TABLE_PHYS_EXTEND      0x00000001     /* See below */
+#endif
+#define MMCU_DCACHE_CTRL_ADDR       0x04830018
+#define     MMCU_xCACHE_CTRL_ENABLE_BIT     0x00000001
+#define     MMCU_xCACHE_CTRL_PARTITION_BIT  0x00000000 /* See xCPART below */
+#define MMCU_ICACHE_CTRL_ADDR       0x04830020
+
+#ifdef METAC_2_1
+
+/*
+ * Allow direct access to physical memory used to implement MMU table.
+ *
+ * Each is based on a corresponding MMCU_TnLOCAL_TABLE_PHYSn or similar
+ *    MMCU_TnGLOBAL_TABLE_PHYSn register pair (see next).
+ */
+#define LINSYSMEMT0L_BASE   0x05000000
+#define LINSYSMEMT0L_LIMIT  0x051FFFFF
+#define     LINSYSMEMTnX_STRIDE     0x00200000  /*  2MB Local per thread */
+#define     LINSYSMEMTnX_STRIDE_S   21
+#define     LINSYSMEMTXG_OFFSET     0x00800000  /* +2MB Global per thread */
+#define     LINSYSMEMTXG_OFFSET_S   23
+#define LINSYSMEMT1L_BASE   0x05200000
+#define LINSYSMEMT1L_LIMIT  0x053FFFFF
+#define LINSYSMEMT2L_BASE   0x05400000
+#define LINSYSMEMT2L_LIMIT  0x055FFFFF
+#define LINSYSMEMT3L_BASE   0x05600000
+#define LINSYSMEMT3L_LIMIT  0x057FFFFF
+#define LINSYSMEMT0G_BASE   0x05800000
+#define LINSYSMEMT0G_LIMIT  0x059FFFFF
+#define LINSYSMEMT1G_BASE   0x05A00000
+#define LINSYSMEMT1G_LIMIT  0x05BFFFFF
+#define LINSYSMEMT2G_BASE   0x05C00000
+#define LINSYSMEMT2G_LIMIT  0x05DFFFFF
+#define LINSYSMEMT3G_BASE   0x05E00000
+#define LINSYSMEMT3G_LIMIT  0x05FFFFFF
+
+/*
+ * Extended MMU table functionality allows a sparse or flat table to be
+ * described much more efficiently than before.
+ */
+#define MMCU_T0LOCAL_TABLE_PHYS0    0x04830700
+#define   MMCU_TnX_TABLE_PHYSX_STRIDE    0x20   /* Offset per thread */
+#define   MMCU_TnX_TABLE_PHYSX_STRIDE_S  5
+#define   MMCU_TXG_TABLE_PHYSX_OFFSET    0x10   /* Global versus local */
+#define   MMCU_TXG_TABLE_PHYSX_OFFSET_S  4
+#define     MMCU_TBLPHYS0_DCCTRL_BITS       0x000000DF  /* DC controls  */
+#define     MMCU_TBLPHYS0_ENTLB_BIT         0x00000020  /* Cache in TLB */
+#define     MMCU_TBLPHYS0_TBLSZ_BITS        0x00000F00  /* Area supported */
+#define     MMCU_TBLPHYS0_TBLSZ_S           8
+#define         MMCU_TBLPHYS0_TBLSZ0_POWER      22  /* 0 -> 4M */
+#define         MMCU_TBLPHYS0_TBLSZ_MAX         9   /* 9 -> 2G */
+#define     MMCU_TBLPHYS0_LINBASE_BITS      0xFFC00000  /* Linear base */
+#define     MMCU_TBLPHYS0_LINBASE_S         22
+
+#define MMCU_T0LOCAL_TABLE_PHYS1    0x04830708
+#define     MMCU_TBLPHYS1_ADDR_BITS         0xFFFFFFFC  /* Physical base */
+#define     MMCU_TBLPHYS1_ADDR_S            2
+
+#define MMCU_T0GLOBAL_TABLE_PHYS0   0x04830710
+#define MMCU_T0GLOBAL_TABLE_PHYS1   0x04830718
+#define MMCU_T1LOCAL_TABLE_PHYS0    0x04830720
+#define MMCU_T1LOCAL_TABLE_PHYS1    0x04830728
+#define MMCU_T1GLOBAL_TABLE_PHYS0   0x04830730
+#define MMCU_T1GLOBAL_TABLE_PHYS1   0x04830738
+#define MMCU_T2LOCAL_TABLE_PHYS0    0x04830740
+#define MMCU_T2LOCAL_TABLE_PHYS1    0x04830748
+#define MMCU_T2GLOBAL_TABLE_PHYS0   0x04830750
+#define MMCU_T2GLOBAL_TABLE_PHYS1   0x04830758
+#define MMCU_T3LOCAL_TABLE_PHYS0    0x04830760
+#define MMCU_T3LOCAL_TABLE_PHYS1    0x04830768
+#define MMCU_T3GLOBAL_TABLE_PHYS0   0x04830770
+#define MMCU_T3GLOBAL_TABLE_PHYS1   0x04830778
+
+#define MMCU_T0EBWCCTRL             0x04830640
+#define     MMCU_TnEBWCCTRL_BITS    0x00000007
+#define     MMCU_TnEBWCCTRL_S       0
+#define         MMCU_TnEBWCCCTRL_DISABLE_ALL 0
+#define         MMCU_TnEBWCCCTRL_ABIT25      1
+#define         MMCU_TnEBWCCCTRL_ABIT26      2
+#define         MMCU_TnEBWCCCTRL_ABIT27      3
+#define         MMCU_TnEBWCCCTRL_ABIT28      4
+#define         MMCU_TnEBWCCCTRL_ABIT29      5
+#define         MMCU_TnEBWCCCTRL_ABIT30      6
+#define         MMCU_TnEBWCCCTRL_ENABLE_ALL  7
+#define MMCU_TnEBWCCTRL_STRIDE      8
+
+#endif /* METAC_2_1 */
+
+
+/* Registers within the SYSC register region */
+#define METAC_ID                0x04830000
+#define     METAC_ID_MAJOR_BITS     0xFF000000
+#define     METAC_ID_MAJOR_S        24
+#define     METAC_ID_MINOR_BITS     0x00FF0000
+#define     METAC_ID_MINOR_S        16
+#define     METAC_ID_REV_BITS       0x0000FF00
+#define     METAC_ID_REV_S          8
+#define     METAC_ID_MAINT_BITS     0x000000FF
+#define     METAC_ID_MAINT_S        0
+
+#ifdef METAC_2_1
+/* Use of this section is strongly deprecated */
+#define METAC_ID2               0x04830008
+#define     METAC_ID2_DESIGNER_BITS 0xFFFF0000  /* Modified by customer */
+#define     METAC_ID2_DESIGNER_S    16
+#define     METAC_ID2_MINOR2_BITS   0x00000F00  /* 3rd digit of prod rev */
+#define     METAC_ID2_MINOR2_S      8
+#define     METAC_ID2_CONFIG_BITS   0x000000FF  /* Wrapper configuration */
+#define     METAC_ID2_CONFIG_S      0
+
+/* Primary core identification and configuration information */
+#define METAC_CORE_ID           0x04831000
+#define     METAC_COREID_GROUP_BITS   0xFF000000
+#define     METAC_COREID_GROUP_S      24
+#define         METAC_COREID_GROUP_METAG  0x14
+#define     METAC_COREID_ID_BITS      0x00FF0000
+#define     METAC_COREID_ID_S         16
+#define         METAC_COREID_ID_W32       0x10   /* >= for 32-bit pipeline */
+#define     METAC_COREID_CONFIG_BITS  0x0000FFFF
+#define     METAC_COREID_CONFIG_S     0
+#define       METAC_COREID_CFGCACHE_BITS    0x0007
+#define       METAC_COREID_CFGCACHE_S       0
+#define           METAC_COREID_CFGCACHE_NOM       0
+#define           METAC_COREID_CFGCACHE_TYPE0     1
+#define           METAC_COREID_CFGCACHE_NOMMU     1 /* Alias for TYPE0 */
+#define           METAC_COREID_CFGCACHE_NOCACHE   2
+#define           METAC_COREID_CFGCACHE_PRIVNOMMU 3
+#define       METAC_COREID_CFGDSP_BITS      0x0038
+#define       METAC_COREID_CFGDSP_S         3
+#define           METAC_COREID_CFGDSP_NOM       0
+#define           METAC_COREID_CFGDSP_MIN       1
+#define       METAC_COREID_NOFPACC_BIT      0x0040 /* Set if no FPU accum */
+#define       METAC_COREID_CFGFPU_BITS      0x0180
+#define       METAC_COREID_CFGFPU_S         7
+#define           METAC_COREID_CFGFPU_NOM       0
+#define           METAC_COREID_CFGFPU_SNGL      1
+#define           METAC_COREID_CFGFPU_DBL       2
+#define       METAC_COREID_NOAMA_BIT        0x0200 /* Set if no AMA present */
+#define       METAC_COREID_NOCOH_BIT        0x0400 /* Set if no Gbl coherency */
+
+/* Core revision information */
+#define METAC_CORE_REV          0x04831008
+#define     METAC_COREREV_DESIGN_BITS   0xFF000000
+#define     METAC_COREREV_DESIGN_S      24
+#define     METAC_COREREV_MAJOR_BITS    0x00FF0000
+#define     METAC_COREREV_MAJOR_S       16
+#define     METAC_COREREV_MINOR_BITS    0x0000FF00
+#define     METAC_COREREV_MINOR_S       8
+#define     METAC_COREREV_MAINT_BITS    0x000000FF
+#define     METAC_COREREV_MAINT_S       0
+
+/* Configuration information control outside the core */
+#define METAC_CORE_DESIGNER1    0x04831010      /* Arbitrary value */
+#define METAC_CORE_DESIGNER2    0x04831018      /* Arbitrary value */
+
+/* Configuration information covering presence/number of various features */
+#define METAC_CORE_CONFIG2      0x04831020
+#define     METAC_CORECFG2_COREDBGTYPE_BITS 0x60000000   /* Core debug type */
+#define     METAC_CORECFG2_COREDBGTYPE_S    29
+#define     METAC_CORECFG2_DCSMALL_BIT      0x04000000   /* Data cache small */
+#define     METAC_CORECFG2_ICSMALL_BIT      0x02000000   /* Inst cache small */
+#define     METAC_CORECFG2_DCSZNP_BITS      0x01C00000   /* Data cache size np */
+#define     METAC_CORECFG2_DCSZNP_S         22
+#define     METAC_CORECFG2_ICSZNP_BITS      0x00380000  /* Inst cache size np */
+#define     METAC_CORECFG2_ICSZNP_S         19
+#define     METAC_CORECFG2_DCSZ_BITS        0x00070000   /* Data cache size */
+#define     METAC_CORECFG2_DCSZ_S           16
+#define         METAC_CORECFG2_xCSZ_4K          0        /* Allocated values */
+#define         METAC_CORECFG2_xCSZ_8K          1
+#define         METAC_CORECFG2_xCSZ_16K         2
+#define         METAC_CORECFG2_xCSZ_32K         3
+#define         METAC_CORECFG2_xCSZ_64K         4
+#define     METAC_CORE_C2ICSZ_BITS          0x0000E000   /* Inst cache size */
+#define     METAC_CORE_C2ICSZ_S             13
+#define     METAC_CORE_GBLACC_BITS          0x00001800   /* Number of Global Acc */
+#define     METAC_CORE_GBLACC_S             11
+#define     METAC_CORE_GBLDXR_BITS          0x00000700   /* 0 -> 0, R -> 2^(R-1) */
+#define     METAC_CORE_GBLDXR_S             8
+#define     METAC_CORE_GBLAXR_BITS          0x000000E0   /* 0 -> 0, R -> 2^(R-1) */
+#define     METAC_CORE_GBLAXR_S             5
+#define     METAC_CORE_RTTRACE_BIT          0x00000010
+#define     METAC_CORE_WATCHN_BITS          0x0000000C   /* 0 -> 0, N -> 2^N */
+#define     METAC_CORE_WATCHN_S             2
+#define     METAC_CORE_BREAKN_BITS          0x00000003   /* 0 -> 0, N -> 2^N */
+#define     METAC_CORE_BREAKN_S             0
+
+/* Configuration information covering presence/number of various features */
+#define METAC_CORE_CONFIG3      0x04831028
+#define     METAC_CORECFG3_L2C_REV_ID_BITS          0x000F0000   /* Revision of L2 cache */
+#define     METAC_CORECFG3_L2C_REV_ID_S             16
+#define     METAC_CORECFG3_L2C_LINE_SIZE_BITS       0x00003000   /* L2 line size */
+#define     METAC_CORECFG3_L2C_LINE_SIZE_S          12
+#define         METAC_CORECFG3_L2C_LINE_SIZE_64B    0x0          /* 64 bytes */
+#define     METAC_CORECFG3_L2C_NUM_WAYS_BITS        0x00000F00   /* L2 number of ways (2^n) */
+#define     METAC_CORECFG3_L2C_NUM_WAYS_S           8
+#define     METAC_CORECFG3_L2C_SIZE_BITS            0x000000F0   /* L2 size (2^n) */
+#define     METAC_CORECFG3_L2C_SIZE_S               4
+#define     METAC_CORECFG3_L2C_UNIFIED_BIT          0x00000004   /* Unified cache: */
+#define     METAC_CORECFG3_L2C_UNIFIED_S            2
+#define       METAC_CORECFG3_L2C_UNIFIED_UNIFIED    1            /* - Unified D/I cache */
+#define       METAC_CORECFG3_L2C_UNIFIED_SEPARATE   0            /* - Separate D/I cache */
+#define     METAC_CORECFG3_L2C_MODE_BIT             0x00000002   /* Cache Mode: */
+#define     METAC_CORECFG3_L2C_MODE_S               1
+#define       METAC_CORECFG3_L2C_MODE_WRITE_BACK    1            /* - Write back */
+#define       METAC_CORECFG3_L2C_MODE_WRITE_THROUGH 0            /* - Write through */
+#define     METAC_CORECFG3_L2C_HAVE_L2C_BIT         0x00000001   /* Have L2C */
+#define     METAC_CORECFG3_L2C_HAVE_L2C_S           0
+
+#endif /* METAC_2_1 */
+
+#define SYSC_CACHE_MMU_CONFIG       0x04830028
+#ifdef METAC_2_1
+#define     SYSC_CMMUCFG_DCSKEWABLE_BIT 0x00000040
+#define     SYSC_CMMUCFG_ICSKEWABLE_BIT 0x00000020
+#define     SYSC_CMMUCFG_DCSKEWOFF_BIT  0x00000010  /* Skew association override  */
+#define     SYSC_CMMUCFG_ICSKEWOFF_BIT  0x00000008  /* -> default 0 on if present */
+#define     SYSC_CMMUCFG_MODE_BITS      0x00000007  /* Access to old state */
+#define     SYSC_CMMUCFG_MODE_S         0
+#define         SYSC_CMMUCFG_ON             0x7
+#define         SYSC_CMMUCFG_EBYPASS        0x6   /* Enhanced by-pass mode */
+#define         SYSC_CMMUCFG_EBYPASSIC      0x4   /* EB just inst cache */
+#define         SYSC_CMMUCFG_EBYPASSDC      0x2   /* EB just data cache */
+#endif /* METAC_2_1 */
+/* Old definitions, Keep them for now */
+#define         SYSC_CMMUCFG_MMU_ON_BIT     0x1
+#define         SYSC_CMMUCFG_DC_ON_BIT      0x2
+#define         SYSC_CMMUCFG_IC_ON_BIT      0x4
+
+#define SYSC_JTAG_THREAD            0x04830030
+#define     SYSC_JTAG_TX_BITS           0x00000003 /* Read only bits! */
+#define     SYSC_JTAG_TX_S              0
+#define     SYSC_JTAG_PRIV_BIT          0x00000004
+#ifdef METAC_2_1
+#define     SYSC_JTAG_SLAVETX_BITS      0x00000018
+#define     SYSC_JTAG_SLAVETX_S         3
+#endif /* METAC_2_1 */
+
+#define SYSC_DCACHE_FLUSH           0x04830038
+#define SYSC_ICACHE_FLUSH           0x04830040
+#define  SYSC_xCACHE_FLUSH_INIT     0x1
+#define MMCU_DIRECTMAP0_ADDR        0x04830080 /* LINSYSDIRECT_BASE -> */
+#define     MMCU_DIRECTMAPn_STRIDE      0x00000010 /* 4 Region settings */
+#define     MMCU_DIRECTMAPn_S           4
+#define         MMCU_DIRECTMAPn_ADDR_BITS       0xFF800000
+#define         MMCU_DIRECTMAPn_ADDR_S          23
+#define         MMCU_DIRECTMAPn_ADDR_SCALE      0x00800000 /* 8M Regions */
+#ifdef METAC_2_1
+/*
+ * These fields in the above registers provide MMCU_ENTRY_* values
+ *   for each direct mapped region to enable optimisation of these areas.
+ *       (LSB similar to VALID must be set for enhancments to be active)
+ */
+#define         MMCU_DIRECTMAPn_ENHANCE_BIT     0x00000001 /* 0 = no optim */
+#define         MMCU_DIRECTMAPn_DCCTRL_BITS     0x000000DF /* Get DC Ctrl */
+#define         MMCU_DIRECTMAPn_DCCTRL_S        0
+#define         MMCU_DIRECTMAPn_ICCTRL_BITS     0x0000C000 /* Get IC Ctrl */
+#define         MMCU_DIRECTMAPn_ICCTRL_S        8
+#define         MMCU_DIRECTMAPn_ENTLB_BIT       0x00000020 /* Cache in TLB */
+#define         MMCU_DIRECTMAPn_ICCWIN_BITS     0x0000C000 /* Get IC Win Bits */
+#define         MMCU_DIRECTMAPn_ICCWIN_S        14
+#endif /* METAC_2_1 */
+
+#define MMCU_DIRECTMAP1_ADDR        0x04830090
+#define MMCU_DIRECTMAP2_ADDR        0x048300a0
+#define MMCU_DIRECTMAP3_ADDR        0x048300b0
+
+/*
+ * These bits partion each threads use of data cache or instruction cache
+ * resource by modifying the top 4 bits of the address within the cache
+ * storage area.
+ */
+#define SYSC_DCPART0 0x04830200
+#define     SYSC_xCPARTn_STRIDE   0x00000008
+#define     SYSC_xCPARTL_AND_BITS 0x0000000F /* Masks top 4 bits */
+#define     SYSC_xCPARTL_AND_S    0
+#define     SYSC_xCPARTG_AND_BITS 0x00000F00 /* Masks top 4 bits */
+#define     SYSC_xCPARTG_AND_S    8
+#define     SYSC_xCPARTL_OR_BITS  0x000F0000 /* Ors into top 4 bits */
+#define     SYSC_xCPARTL_OR_S     16
+#define     SYSC_xCPARTG_OR_BITS  0x0F000000 /* Ors into top 4 bits */
+#define     SYSC_xCPARTG_OR_S     24
+#define     SYSC_CWRMODE_BIT      0x80000000 /* Write cache mode bit */
+
+#define SYSC_DCPART1 0x04830208
+#define SYSC_DCPART2 0x04830210
+#define SYSC_DCPART3 0x04830218
+#define SYSC_ICPART0 0x04830220
+#define SYSC_ICPART1 0x04830228
+#define SYSC_ICPART2 0x04830230
+#define SYSC_ICPART3 0x04830238
+
+/*
+ * META Core Memory and Cache Update registers
+ */
+#define SYSC_MCMDATAX  0x04830300   /* 32-bit read/write data register */
+#define SYSC_MCMDATAT  0x04830308   /* Read or write data triggers oper */
+#define SYSC_MCMGCTRL  0x04830310   /* Control register */
+#define     SYSC_MCMGCTRL_READ_BIT  0x00000001 /* Set to issue 1st read */
+#define     SYSC_MCMGCTRL_AINC_BIT  0x00000002 /* Set for auto-increment */
+#define     SYSC_MCMGCTRL_ADDR_BITS 0x000FFFFC /* Address or index */
+#define     SYSC_MCMGCTRL_ADDR_S    2
+#define     SYSC_MCMGCTRL_ID_BITS   0x0FF00000 /* Internal memory block Id */
+#define     SYSC_MCMGCTRL_ID_S      20
+#define         SYSC_MCMGID_NODEV       0xFF /* No Device Selected */
+#define         SYSC_MCMGID_DSPRAM0A    0x04 /* DSP RAM D0 block A access */
+#define         SYSC_MCMGID_DSPRAM0B    0x05 /* DSP RAM D0 block B access */
+#define         SYSC_MCMGID_DSPRAM1A    0x06 /* DSP RAM D1 block A access */
+#define         SYSC_MCMGID_DSPRAM1B    0x07 /* DSP RAM D1 block B access */
+#define         SYSC_MCMGID_DCACHEL     0x08 /* DCACHE lines (64-bytes/line) */
+#ifdef METAC_2_1
+#define         SYSC_MCMGID_DCACHETLB   0x09 /* DCACHE TLB ( Read Only )     */
+#endif /* METAC_2_1 */
+#define         SYSC_MCMGID_DCACHET     0x0A /* DCACHE tags (32-bits/line)   */
+#define         SYSC_MCMGID_DCACHELRU   0x0B /* DCACHE LRU (8-bits/line)     */
+#define         SYSC_MCMGID_ICACHEL     0x0C /* ICACHE lines (64-bytes/line  */
+#ifdef METAC_2_1
+#define         SYSC_MCMGID_ICACHETLB   0x0D /* ICACHE TLB (Read Only )     */
+#endif /* METAC_2_1 */
+#define         SYSC_MCMGID_ICACHET     0x0E /* ICACHE Tags (32-bits/line)   */
+#define         SYSC_MCMGID_ICACHELRU   0x0F /* ICACHE LRU (8-bits/line )    */
+#define         SYSC_MCMGID_COREIRAM0   0x10 /* Core code mem id 0 */
+#define         SYSC_MCMGID_COREIRAMn   0x17
+#define         SYSC_MCMGID_COREDRAM0   0x18 /* Core data mem id 0 */
+#define         SYSC_MCMGID_COREDRAMn   0x1F
+#ifdef METAC_2_1
+#define         SYSC_MCMGID_DCACHEST    0x20 /* DCACHE ST ( Read Only )      */
+#define         SYSC_MCMGID_ICACHEST    0x21 /* ICACHE ST ( Read Only )      */
+#define         SYSC_MCMGID_DCACHETLBLRU 0x22 /* DCACHE TLB LRU ( Read Only )*/
+#define         SYSC_MCMGID_ICACHETLBLRU 0x23 /* ICACHE TLB LRU( Read Only ) */
+#define         SYSC_MCMGID_DCACHESTLRU 0x24 /* DCACHE ST LRU ( Read Only )  */
+#define         SYSC_MCMGID_ICACHESTLRU 0x25 /* ICACHE ST LRU ( Read Only )  */
+#define         SYSC_MCMGID_DEBUGTLB    0x26 /* DEBUG TLB ( Read Only )      */
+#define         SYSC_MCMGID_DEBUGST     0x27 /* DEBUG ST ( Read Only )       */
+#define         SYSC_MCMGID_L2CACHEL    0x30 /* L2 Cache Lines (64-bytes/line) */
+#define         SYSC_MCMGID_L2CACHET    0x31 /* L2 Cache Tags (32-bits/line) */
+#define         SYSC_MCMGID_COPROX0     0x70 /* Coprocessor port id 0 */
+#define         SYSC_MCMGID_COPROXn     0x77
+#endif /* METAC_2_1 */
+#define     SYSC_MCMGCTRL_TR31_BIT  0x80000000 /* Trigger 31 on completion */
+#define SYSC_MCMSTATUS 0x04830318   /* Status read only */
+#define     SYSC_MCMSTATUS_IDLE_BIT 0x00000001
+
+/* META System Events */
+#define SYSC_SYS_EVENT            0x04830400
+#define     SYSC_SYSEVT_ATOMIC_BIT      0x00000001
+#define     SYSC_SYSEVT_CACHEX_BIT      0x00000002
+#define SYSC_ATOMIC_LOCK          0x04830408
+#define     SYSC_ATOMIC_STATE_TX_BITS 0x0000000F
+#define     SYSC_ATOMIC_STATE_TX_S    0
+#ifdef METAC_1_2
+#define     SYSC_ATOMIC_STATE_DX_BITS 0x000000F0
+#define     SYSC_ATOMIC_STATE_DX_S    4
+#else /* METAC_1_2 */
+#define     SYSC_ATOMIC_SOURCE_BIT    0x00000010
+#endif /* !METAC_1_2 */
+
+
+#ifdef METAC_2_1
+
+/* These definitions replace the EXPAND_TIMER_DIV register defines which are to
+ * be deprecated.
+ */
+#define SYSC_TIMER_DIV            0x04830140
+#define     SYSC_TIMDIV_BITS      0x000000FF
+#define     SYSC_TIMDIV_S         0
+
+/* META Enhanced by-pass control for local and global region */
+#define MMCU_LOCAL_EBCTRL   0x04830600
+#define MMCU_GLOBAL_EBCTRL  0x04830608
+#define     MMCU_EBCTRL_SINGLE_BIT      0x00000020 /* TLB Uncached */
+/*
+ * These fields in the above registers provide MMCU_ENTRY_* values
+ *   for each direct mapped region to enable optimisation of these areas.
+ */
+#define     MMCU_EBCTRL_DCCTRL_BITS     0x000000C0 /* Get DC Ctrl */
+#define     MMCU_EBCTRL_DCCTRL_S        0
+#define     MMCU_EBCTRL_ICCTRL_BITS     0x0000C000 /* Get DC Ctrl */
+#define     MMCU_EBCTRL_ICCTRL_S        8
+
+/* META Cached Core Mode Registers */
+#define MMCU_T0CCM_ICCTRL   0x04830680     /* Core cached code control */
+#define     MMCU_TnCCM_xxCTRL_STRIDE    8
+#define     MMCU_TnCCM_xxCTRL_STRIDE_S  3
+#define MMCU_T1CCM_ICCTRL   0x04830688
+#define MMCU_T2CCM_ICCTRL   0x04830690
+#define MMCU_T3CCM_ICCTRL   0x04830698
+#define MMCU_T0CCM_DCCTRL   0x048306C0     /* Core cached data control */
+#define MMCU_T1CCM_DCCTRL   0x048306C8
+#define MMCU_T2CCM_DCCTRL   0x048306D0
+#define MMCU_T3CCM_DCCTRL   0x048306D8
+#define     MMCU_TnCCM_ENABLE_BIT       0x00000001
+#define     MMCU_TnCCM_WIN3_BIT         0x00000002
+#define     MMCU_TnCCM_DCWRITE_BIT      0x00000004  /* In DCCTRL only */
+#define     MMCU_TnCCM_REGSZ_BITS       0x00000F00
+#define     MMCU_TnCCM_REGSZ_S          8
+#define         MMCU_TnCCM_REGSZ0_POWER      12     /* RegSz 0 -> 4K */
+#define         MMCU_TnCCM_REGSZ_MAXBYTES    0x00080000  /* 512K max */
+#define     MMCU_TnCCM_ADDR_BITS        0xFFFFF000
+#define     MMCU_TnCCM_ADDR_S           12
+
+#endif /* METAC_2_1 */
+
+/*
+ * Hardware performance counter registers
+ * --------------------------------------
+ */
+#ifdef METAC_2_1
+/* Two Performance Counter Internal Core Events Control registers */
+#define PERF_ICORE0   0x0480FFD0
+#define PERF_ICORE1   0x0480FFD8
+#define     PERFI_CTRL_BITS    0x0000000F
+#define     PERFI_CTRL_S       0
+#define         PERFI_CAH_DMISS    0x0  /* Dcache Misses in cache (TLB Hit) */
+#define         PERFI_CAH_IMISS    0x1  /* Icache Misses in cache (TLB Hit) */
+#define         PERFI_TLB_DMISS    0x2  /* Dcache Misses in per-thread TLB */
+#define         PERFI_TLB_IMISS    0x3  /* Icache Misses in per-thread TLB */
+#define         PERFI_TLB_DWRHITS  0x4  /* DC Write-Hits in per-thread TLB */
+#define         PERFI_TLB_DWRMISS  0x5  /* DC Write-Miss in per-thread TLB */
+#define         PERFI_CAH_DLFETCH  0x8  /* DC Read cache line fetch */
+#define         PERFI_CAH_ILFETCH  0x9  /* DC Read cache line fetch */
+#define         PERFI_CAH_DWFETCH  0xA  /* DC Read cache word fetch */
+#define         PERFI_CAH_IWFETCH  0xB  /* DC Read cache word fetch */
+#endif /* METAC_2_1 */
+
+/* Two memory-mapped hardware performance counter registers */
+#define PERF_COUNT0 0x0480FFE0
+#define PERF_COUNT1 0x0480FFE8
+
+/* Fields in PERF_COUNTn registers */
+#define PERF_COUNT_BITS  0x00ffffff /* Event count value */
+
+#define PERF_THREAD_BITS 0x0f000000 /* Thread mask selects threads */
+#define PERF_THREAD_S    24
+
+#define PERF_CTRL_BITS   0xf0000000 /* Event filter control */
+#define PERF_CTRL_S      28
+
+#define    PERFCTRL_SUPER   0  /* Superthread cycles */
+#define    PERFCTRL_REWIND  1  /* Rewinds due to Dcache Misses */
+#ifdef METAC_2_1
+#define    PERFCTRL_SUPREW  2  /* Rewinds of superthreaded cycles (no mask) */
+
+#define    PERFCTRL_CYCLES  3  /* Counts all cycles (no mask) */
+
+#define    PERFCTRL_PREDBC  4  /* Conditional branch predictions */
+#define    PERFCTRL_MISPBC  5  /* Conditional branch mispredictions */
+#define    PERFCTRL_PREDRT  6  /* Return predictions */
+#define    PERFCTRL_MISPRT  7  /* Return mispredictions */
+#endif /* METAC_2_1 */
+
+#define    PERFCTRL_DHITS   8  /* Dcache Hits */
+#define    PERFCTRL_IHITS   9  /* Icache Hits */
+#define    PERFCTRL_IMISS   10 /* Icache Misses in cache or TLB */
+#ifdef METAC_2_1
+#define    PERFCTRL_DCSTALL 11 /* Dcache+TLB o/p delayed (per-thread) */
+#define    PERFCTRL_ICSTALL 12 /* Icache+TLB o/p delayed (per-thread) */
+
+#define    PERFCTRL_INT     13 /* Internal core delailed events (see next) */
+#define    PERFCTRL_EXT     15 /* External source in core periphery */
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/* These definitions replace the EXPAND_PERFCHANx register defines which are to
+ * be deprecated.
+ */
+#define PERF_CHAN0 0x04830150
+#define PERF_CHAN1 0x04830158
+#define     PERF_CHAN_BITS    0x0000000F
+#define     PERF_CHAN_S       0
+#define         PERFCHAN_WRC_WRBURST   0x0   /* Write combiner write burst */
+#define         PERFCHAN_WRC_WRITE     0x1   /* Write combiner write       */
+#define         PERFCHAN_WRC_RDBURST   0x2   /* Write combiner read burst  */
+#define         PERFCHAN_WRC_READ      0x3   /* Write combiner read        */
+#define         PERFCHAN_PREARB_DELAY  0x4   /* Pre-arbiter delay cycle    */
+					     /* Cross-bar hold-off cycle:  */
+#define         PERFCHAN_XBAR_HOLDWRAP 0x5   /*    wrapper register        */
+#define         PERFCHAN_XBAR_HOLDSBUS 0x6   /*    system bus (ATP only)   */
+#define         PERFCHAN_XBAR_HOLDCREG 0x9   /*    core registers          */
+#define         PERFCHAN_L2C_MISS      0x6   /* L2 Cache miss              */
+#define         PERFCHAN_L2C_HIT       0x7   /* L2 Cache hit               */
+#define         PERFCHAN_L2C_WRITEBACK 0x8   /* L2 Cache writeback         */
+					     /* Admission delay cycle:     */
+#define         PERFCHAN_INPUT_CREG    0xB   /*    core registers          */
+#define         PERFCHAN_INPUT_INTR    0xC   /*    internal ram            */
+#define         PERFCHAN_INPUT_WRC     0xD   /*    write combiners(memory) */
+
+/* Should following be removed as not in TRM anywhere? */
+#define         PERFCHAN_XBAR_HOLDINTR 0x8   /*    internal ram            */
+#define         PERFCHAN_INPUT_SBUS    0xA   /*    register port           */
+/* End of remove section. */
+
+#define         PERFCHAN_MAINARB_DELAY 0xF   /* Main arbiter delay cycle   */
+
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Write combiner registers
+ * ------------------------
+ *
+ * These replace the EXPAND_T0WRCOMBINE register defines, which will be
+ * deprecated.
+ */
+#define WRCOMB_CONFIG0             0x04830100
+#define     WRCOMB_LFFEn_BIT           0x00004000  /* Enable auto line full flush */
+#define     WRCOMB_ENABLE_BIT          0x00002000  /* Enable write combiner */
+#define     WRCOMB_TIMEOUT_ENABLE_BIT  0x00001000  /* Timeout flush enable */
+#define     WRCOMB_TIMEOUT_COUNT_BITS  0x000003FF
+#define     WRCOMB_TIMEOUT_COUNT_S     0
+#define WRCOMB_CONFIG4             0x04830180
+#define     WRCOMB_PARTALLOC_BITS      0x000000C0
+#define     WRCOMB_PARTALLOC_S         64
+#define     WRCOMB_PARTSIZE_BITS       0x00000030
+#define     WRCOMB_PARTSIZE_S          4
+#define     WRCOMB_PARTOFFSET_BITS     0x0000000F
+#define     WRCOMB_PARTOFFSET_S        0
+#define WRCOMB_CONFIG_STRIDE       8
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Thread arbiter registers
+ * ------------------------
+ *
+ * These replace the EXPAND_T0ARBITER register defines, which will be
+ * deprecated.
+ */
+#define ARBITER_ARBCONFIG0       0x04830120
+#define     ARBCFG_BPRIORITY_BIT     0x02000000
+#define     ARBCFG_IPRIORITY_BIT     0x01000000
+#define     ARBCFG_PAGE_BITS         0x00FF0000
+#define     ARBCFG_PAGE_S            16
+#define     ARBCFG_BBASE_BITS        0x0000FF00
+#define     ARGCFG_BBASE_S           8
+#define     ARBCFG_IBASE_BITS        0x000000FF
+#define     ARBCFG_IBASE_S           0
+#define ARBITER_TTECONFIG0       0x04820160
+#define     ARBTTE_IUPPER_BITS       0xFF000000
+#define     ARBTTE_IUPPER_S          24
+#define     ARBTTE_ILOWER_BITS       0x00FF0000
+#define     ARBTTE_ILOWER_S          16
+#define     ARBTTE_BUPPER_BITS       0x0000FF00
+#define     ARBTTE_BUPPER_S          8
+#define     ARBTTE_BLOWER_BITS       0x000000FF
+#define     ARBTTE_BLOWER_S          0
+#define ARBITER_STRIDE           8
+#endif /* METAC_2_1 */
+
+/*
+ * Expansion area registers
+ * --------------------------------------
+ */
+
+/* These defines are to be deprecated. See above instead. */
+#define EXPAND_T0WRCOMBINE         0x03000000
+#ifdef METAC_2_1
+#define     EXPWRC_LFFEn_BIT           0x00004000  /* Enable auto line full flush */
+#endif /* METAC_2_1 */
+#define     EXPWRC_ENABLE_BIT          0x00002000  /* Enable write combiner */
+#define     EXPWRC_TIMEOUT_ENABLE_BIT  0x00001000  /* Timeout flush enable */
+#define     EXPWRC_TIMEOUT_COUNT_BITS  0x000003FF
+#define     EXPWRC_TIMEOUT_COUNT_S     0
+#define EXPAND_TnWRCOMBINE_STRIDE  0x00000008
+
+/* These defines are to be deprecated. See above instead. */
+#define EXPAND_T0ARBITER         0x03000020
+#define     EXPARB_BPRIORITY_BIT 0x02000000
+#define     EXPARB_IPRIORITY_BIT 0x01000000
+#define     EXPARB_PAGE_BITS     0x00FF0000
+#define     EXPARB_PAGE_S        16
+#define     EXPARB_BBASE_BITS    0x0000FF00
+#define     EXPARB_BBASE_S       8
+#define     EXPARB_IBASE_BITS    0x000000FF
+#define     EXPARB_IBASE_S       0
+#define EXPAND_TnARBITER_STRIDE  0x00000008
+
+/* These definitions are to be deprecated. See above instead. */
+#define EXPAND_TIMER_DIV   0x03000040
+#define     EXPTIM_DIV_BITS      0x000000FF
+#define     EXPTIM_DIV_S         0
+
+/* These definitions are to be deprecated. See above instead. */
+#define EXPAND_PERFCHAN0   0x03000050
+#define EXPAND_PERFCHAN1   0x03000058
+#define     EXPPERF_CTRL_BITS    0x0000000F
+#define     EXPPERF_CTRL_S       0
+#define         EXPPERF_WRC_WRBURST   0x0   /* Write combiner write burst */
+#define         EXPPERF_WRC_WRITE     0x1   /* Write combiner write       */
+#define         EXPPERF_WRC_RDBURST   0x2   /* Write combiner read burst  */
+#define         EXPPERF_WRC_READ      0x3   /* Write combiner read        */
+#define         EXPPERF_PREARB_DELAY  0x4   /* Pre-arbiter delay cycle    */
+					    /* Cross-bar hold-off cycle:  */
+#define         EXPPERF_XBAR_HOLDWRAP 0x5   /*    wrapper register        */
+#define         EXPPERF_XBAR_HOLDSBUS 0x6   /*    system bus              */
+#ifdef METAC_1_2
+#define         EXPPERF_XBAR_HOLDLBUS 0x7   /*    local bus               */
+#else /* METAC_1_2 */
+#define         EXPPERF_XBAR_HOLDINTR 0x8   /*    internal ram            */
+#define         EXPPERF_XBAR_HOLDCREG 0x9   /*    core registers          */
+					    /* Admission delay cycle:     */
+#define         EXPPERF_INPUT_SBUS    0xA   /*    register port           */
+#define         EXPPERF_INPUT_CREG    0xB   /*    core registers          */
+#define         EXPPERF_INPUT_INTR    0xC   /*    internal ram            */
+#define         EXPPERF_INPUT_WRC     0xD   /*    write combiners(memory) */
+#endif /* !METAC_1_2 */
+#define         EXPPERF_MAINARB_DELAY 0xF   /* Main arbiter delay cycle   */
+
+/*
+ * Debug port registers
+ * --------------------------------------
+ */
+
+/* Data Exchange Register */
+#define DBGPORT_MDBGDATAX                    0x0
+
+/* Data Transfer register */
+#define DBGPORT_MDBGDATAT                    0x4
+
+/* Control Register 0 */
+#define DBGPORT_MDBGCTRL0                    0x8
+#define     DBGPORT_MDBGCTRL0_ADDR_BITS      0xFFFFFFFC
+#define     DBGPORT_MDBGCTRL0_ADDR_S         2
+#define     DBGPORT_MDBGCTRL0_AUTOINCR_BIT   0x00000002
+#define     DBGPORT_MDBGCTRL0_RD_BIT         0x00000001
+
+/* Control Register 1 */
+#define DBGPORT_MDBGCTRL1                    0xC
+#ifdef METAC_2_1
+#define    DBGPORT_MDBGCTRL1_DEFERRTHREAD_BITS      0xC0000000
+#define    DBGPORT_MDBGCTRL1_DEFERRTHREAD_S         30
+#endif /* METAC_2_1 */
+#define     DBGPORT_MDBGCTRL1_LOCK2_INTERLOCK_BIT   0x20000000
+#define     DBGPORT_MDBGCTRL1_ATOMIC_INTERLOCK_BIT  0x10000000
+#define     DBGPORT_MDBGCTRL1_TRIGSTATUS_BIT        0x08000000
+#define     DBGPORT_MDBGCTRL1_GBLPORT_IDLE_BIT      0x04000000
+#define     DBGPORT_MDBGCTRL1_COREMEM_IDLE_BIT      0x02000000
+#define     DBGPORT_MDBGCTRL1_READY_BIT             0x01000000
+#ifdef METAC_2_1
+#define     DBGPORT_MDBGCTRL1_DEFERRID_BITS         0x00E00000
+#define     DBGPORT_MDBGCTRL1_DEFERRID_S            21
+#define     DBGPORT_MDBGCTRL1_DEFERR_BIT            0x00100000
+#endif /* METAC_2_1 */
+#define     DBGPORT_MDBGCTRL1_WR_ACTIVE_BIT         0x00040000
+#define     DBGPORT_MDBGCTRL1_COND_LOCK2_BIT        0x00020000
+#define     DBGPORT_MDBGCTRL1_LOCK2_BIT             0x00010000
+#define     DBGPORT_MDBGCTRL1_DIAGNOSE_BIT          0x00008000
+#define     DBGPORT_MDBGCTRL1_FORCEDIAG_BIT         0x00004000
+#define     DBGPORT_MDBGCTRL1_MEMFAULT_BITS         0x00003000
+#define     DBGPORT_MDBGCTRL1_MEMFAULT_S            12
+#define     DBGPORT_MDBGCTRL1_TRIGGER_BIT           0x00000100
+#ifdef METAC_2_1
+#define     DBGPORT_MDBGCTRL1_INTSPECIAL_BIT        0x00000080
+#define     DBGPORT_MDBGCTRL1_INTRUSIVE_BIT         0x00000040
+#endif /* METAC_2_1 */
+#define     DBGPORT_MDBGCTRL1_THREAD_BITS           0x00000030 /* Thread mask selects threads */
+#define     DBGPORT_MDBGCTRL1_THREAD_S              4
+#define     DBGPORT_MDBGCTRL1_TRANS_SIZE_BITS       0x0000000C
+#define     DBGPORT_MDBGCTRL1_TRANS_SIZE_S          2
+#define         DBGPORT_MDBGCTRL1_TRANS_SIZE_32_BIT 0x00000000
+#define         DBGPORT_MDBGCTRL1_TRANS_SIZE_16_BIT 0x00000004
+#define         DBGPORT_MDBGCTRL1_TRANS_SIZE_8_BIT  0x00000008
+#define     DBGPORT_MDBGCTRL1_BYTE_ROUND_BITS       0x00000003
+#define     DBGPORT_MDBGCTRL1_BYTE_ROUND_S          0
+#define         DBGPORT_MDBGCTRL1_BYTE_ROUND_8_BIT  0x00000001
+#define         DBGPORT_MDBGCTRL1_BYTE_ROUND_16_BIT 0x00000002
+
+
+/* L2 Cache registers */
+#define SYSC_L2C_INIT              0x048300C0
+#define SYSC_L2C_INIT_INIT                  1
+#define SYSC_L2C_INIT_IN_PROGRESS           0
+#define SYSC_L2C_INIT_COMPLETE              1
+
+#define SYSC_L2C_ENABLE            0x048300D0
+#define SYSC_L2C_ENABLE_ENABLE_BIT     0x00000001
+#define SYSC_L2C_ENABLE_PFENABLE_BIT   0x00000002
+
+#define SYSC_L2C_PURGE             0x048300C8
+#define SYSC_L2C_PURGE_PURGE                1
+#define SYSC_L2C_PURGE_IN_PROGRESS          0
+#define SYSC_L2C_PURGE_COMPLETE             1
+
+#endif /* _ASM_METAG_MEM_H_ */
diff --git a/arch/metag/include/asm/metag_regs.h b/arch/metag/include/asm/metag_regs.h
new file mode 100644
index 0000000..acf4b8e
--- /dev/null
+++ b/arch/metag/include/asm/metag_regs.h
@@ -0,0 +1,1184 @@
+/*
+ * asm/metag_regs.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta core (non memory-mapped) registers.
+ */
+
+#ifndef _ASM_METAG_REGS_H_
+#define _ASM_METAG_REGS_H_
+
+/*
+ * CHIP Unit Identifiers and Valid/Global register number masks
+ * ------------------------------------------------------------
+ */
+#define TXUCT_ID    0x0     /* Control unit regs */
+#ifdef METAC_1_2
+#define     TXUCT_MASK  0xFF0FFFFF  /* Valid regs 0..31  */
+#else
+#define     TXUCT_MASK  0xFF1FFFFF  /* Valid regs 0..31  */
+#endif
+#define     TGUCT_MASK  0x00000000  /* No global regs    */
+#define TXUD0_ID    0x1     /* Data unit regs */
+#define TXUD1_ID    0x2
+#define     TXUDX_MASK  0xFFFFFFFF  /* Valid regs 0..31 */
+#define     TGUDX_MASK  0xFFFF0000  /* Global regs for base inst */
+#define     TXUDXDSP_MASK   0x0F0FFFFF  /* Valid DSP regs */
+#define     TGUDXDSP_MASK   0x0E0E0000  /* Global DSP ACC regs */
+#define TXUA0_ID    0x3     /* Address unit regs */
+#define TXUA1_ID    0x4
+#define     TXUAX_MASK  0x0000FFFF  /* Valid regs   0-15 */
+#define     TGUAX_MASK  0x0000FF00  /* Global regs  8-15 */
+#define TXUPC_ID    0x5     /* PC registers */
+#define     TXUPC_MASK  0x00000003  /* Valid regs   0- 1 */
+#define     TGUPC_MASK  0x00000000  /* No global regs    */
+#define TXUPORT_ID  0x6     /* Ports are not registers */
+#define TXUTR_ID    0x7
+#define     TXUTR_MASK  0x0000005F  /* Valid regs   0-3,4,6 */
+#define     TGUTR_MASK  0x00000000  /* No global regs    */
+#ifdef METAC_2_1
+#define TXUTT_ID    0x8
+#define     TXUTT_MASK  0x0000000F  /* Valid regs   0-3 */
+#define     TGUTT_MASK  0x00000010  /* Global reg   4   */
+#define TXUFP_ID    0x9     /* FPU regs */
+#define     TXUFP_MASK  0x0000FFFF  /* Valid regs   0-15 */
+#define     TGUFP_MASK  0x00000000  /* No global regs    */
+#endif /* METAC_2_1 */
+
+#ifdef METAC_1_2
+#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \
+		      TXUAX_MASK, TXUPC_MASK,          0, TXUTR_MASK, \
+		      0, 0, 0, 0, 0, 0, 0, 0                          }
+#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \
+		      TGUAX_MASK, TGUPC_MASK,          0, TGUTR_MASK, \
+		      0, 0, 0, 0, 0, 0, 0, 0                          }
+#else /* METAC_1_2 */
+#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \
+		      TXUAX_MASK, TXUPC_MASK,          0, TXUTR_MASK, \
+		      TXUTT_MASK, TXUFP_MASK,          0,          0, \
+			       0,          0,          0,          0  }
+#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \
+		      TGUAX_MASK, TGUPC_MASK,          0, TGUTR_MASK, \
+		      TGUTT_MASK, TGUFP_MASK,          0,          0, \
+			       0,          0,          0,          0  }
+#endif /* !METAC_1_2 */
+
+#define TXUXXDSP_MASKS { 0, TXUDXDSP_MASK, TXUDXDSP_MASK, 0, 0, 0, 0, 0, \
+			 0, 0, 0, 0, 0, 0, 0, 0                          }
+#define TGUXXDSP_MASKS { 0, TGUDXDSP_MASK, TGUDXDSP_MASK, 0, 0, 0, 0, 0, \
+			 0, 0, 0, 0, 0, 0, 0, 0                          }
+
+/* -------------------------------------------------------------------------
+;                          DATA AND ADDRESS UNIT REGISTERS
+;  -----------------------------------------------------------------------*/
+/*
+  Thread local D0 registers
+ */
+/*   D0.0    ; Holds 32-bit result, can be used as scratch */
+#define D0Re0 D0.0
+/*   D0.1    ; Used to pass Arg6_32 */
+#define D0Ar6 D0.1
+/*   D0.2    ; Used to pass Arg4_32 */
+#define D0Ar4 D0.2
+/*   D0.3    ; Used to pass Arg2_32 to a called routine (see D1.3 below) */
+#define D0Ar2 D0.3
+/*   D0.4    ; Can be used as scratch; used to save A0FrP in entry sequences */
+#define D0FrT D0.4
+/*   D0.5    ; C compiler assumes preservation, save with D1.5 if used */
+/*   D0.6    ; C compiler assumes preservation, save with D1.6 if used */
+/*   D0.7    ; C compiler assumes preservation, save with D1.7 if used */
+/*   D0.8    ; Use of D0.8 and above is not encouraged */
+/*   D0.9  */
+/*   D0.10 */
+/*   D0.11 */
+/*   D0.12 */
+/*   D0.13 */
+/*   D0.14 */
+/*   D0.15 */
+/*
+   Thread local D1 registers
+ */
+/*   D1.0    ; Holds top 32-bits of 64-bit result, can be used as scratch */
+#define D1Re0 D1.0
+/*   D1.1    ; Used to pass Arg5_32 */
+#define D1Ar5 D1.1
+/*   D1.2    ; Used to pass Arg3_32 */
+#define D1Ar3 D1.2
+/*   D1.3    ; Used to pass Arg1_32 (first 32-bit argument) to a called routine */
+#define D1Ar1 D1.3
+/*   D1.4    ; Used for Return Pointer, save during entry with A0FrP (via D0.4) */
+#define D1RtP D1.4
+/*   D1.5    ; C compiler assumes preservation, save if used */
+/*   D1.6    ; C compiler assumes preservation, save if used */
+/*   D1.7    ; C compiler assumes preservation, save if used */
+/*   D1.8    ; Use of D1.8 and above is not encouraged */
+/*   D1.9  */
+/*   D1.10 */
+/*   D1.11 */
+/*   D1.12 */
+/*   D1.13 */
+/*   D1.14 */
+/*   D1.15 */
+/*
+   Thread local A0 registers
+ */
+/*   A0.0    ; Primary stack pointer */
+#define A0StP A0.0
+/*   A0.1    ; Used as local frame pointer in C, save if used (via D0.4) */
+#define A0FrP A0.1
+/*   A0.2  */
+/*   A0.3  */
+/*   A0.4    ; Use of A0.4 and above is not encouraged */
+/*   A0.5  */
+/*   A0.6  */
+/*   A0.7  */
+/*
+   Thread local A1 registers
+ */
+/*   A1.0    ; Global static chain pointer - do not modify */
+#define A1GbP A1.0
+/*   A1.1    ; Local static chain pointer in C, can be used as scratch */
+#define A1LbP A1.1
+/*   A1.2  */
+/*   A1.3  */
+/*   A1.4    ; Use of A1.4 and above is not encouraged */
+/*   A1.5  */
+/*   A1.6  */
+/*   A1.7  */
+#ifdef METAC_2_1
+/* Renameable registers for use with Fast Interrupts */
+/* The interrupt stack pointer (usually a global register) */
+#define A0IStP A0IReg
+/* The interrupt global pointer (usually a global register) */
+#define A1IGbP A1IReg
+#endif
+/*
+   Further registers may be globally allocated via linkage/loading tools,
+   normally they are not used.
+ */
+/*-------------------------------------------------------------------------
+;                    STACK STRUCTURE and CALLING CONVENTION
+; -----------------------------------------------------------------------*/
+/*
+; Calling convention indicates that the following is the state of the
+; stack frame at the start of a routine-
+;
+;       Arg9_32 [A0StP+#-12]
+;       Arg8_32 [A0StP+#- 8]
+;       Arg7_32 [A0StP+#- 4]
+;   A0StP->
+;
+; Registers D1.3, D0.3, ..., to D0.1 are used to pass Arg1_32 to Arg6_32
+;   respectively. If a routine needs to store them on the stack in order
+;   to make sub-calls or because of the general complexity of the routine it
+;   is best to dump these registers immediately at the start of a routine
+;   using a MSETL or SETL instruction-
+;
+;   MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump argments expected
+;or SETL    [A0StP+#8++],D0Ar2       ; Up to two 32-bit args expected
+;
+; For non-leaf routines it is always necessary to save and restore at least
+; the return address value D1RtP on the stack. Also by convention if the
+; frame is saved then a new A0FrP value must be set-up. So for non-leaf
+; routines at this point both these registers must be saved onto the stack
+; using a SETL instruction and the new A0FrP value is then set-up-
+;
+;   MOV     D0FrT,A0FrP
+;   ADD     A0FrP,A0StP,#0
+;   SETL    [A0StP+#8++],D0FrT,D1RtP
+;
+; Registers D0.5, D1.5, to D1.7 are assumed to be preserved across calls so
+;   a SETL or MSETL instruction can be used to save the current state
+;   of these registers if they are modified by the current routine-
+;
+;   MSETL   [A0StP],D0.5,D0.6,D0.7   ; Only save registers modified
+;or SETL    [A0StP+#8++],D0.5        ; Only D0.5 and/or D1.5 modified
+;
+; All of the above sequences can be combined into one maximal case-
+;
+;   MOV     D0FrT,A0FrP              ; Save and calculate new frame pointer
+;   ADD     A0FrP,A0StP,#(ARS)
+;   MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+;
+; Having completed the above sequence the only remaining task on routine
+; entry is to reserve any local and outgoing argment storage space on the
+; stack. This instruction may be omitted if the size of this region is zero-
+;
+;   ADD     A0StP,A0StP,#(LCS)
+;
+; LCS is the first example use of one of a number of standard local defined
+; values that can be created to make assembler code more readable and
+; potentially more robust-
+;
+; #define ARS   0x18                 ; Register arg bytes saved on stack
+; #define FRS   0x20                 ; Frame save area size in bytes
+; #define LCS   0x00                 ; Locals and Outgoing arg size
+; #define ARO   (LCS+FRS)            ; Stack offset to access args
+;
+; All of the above defines should be undefined (#undef) at the end of each
+; routine to avoid accidental use in the next routine.
+;
+; Given all of the above the following stack structure is expected during
+; the body of a routine if all args passed in registers are saved during
+; entry-
+;
+;                                    ; 'Incoming args area'
+;         Arg10_32 [A0StP+#-((10*4)+ARO)]       Arg9_32  [A0StP+#-(( 9*4)+ARO)]
+;         Arg8_32  [A0StP+#-(( 8*4)+ARO)]       Arg7_32  [A0StP+#-(( 7*4)+ARO)]
+;--- Call point
+; D0Ar6=  Arg6_32  [A0StP+#-(( 6*4)+ARO)] D1Ar5=Arg5_32  [A0StP+#-(( 5*4)+ARO)]
+; D0Ar4=  Arg4_32  [A0StP+#-(( 4*4)+ARO)] D1Ar3=Arg3_32  [A0StP+#-(( 3*4)+ARO)]
+; D0Ar2=  Arg2_32  [A0StP+#-(( 2*4)+ARO)] D1Ar2=Arg1_32  [A0StP+#-(( 1*4)+ARO)]
+;                                    ; 'Frame area'
+; A0FrP-> D0FrT, D1RtP,
+;         D0.5, D1.5,
+;         D0.6, D1.6,
+;         D0.7, D1.7,
+;                                    ; 'Locals area'
+;         Loc0_32  [A0StP+# (( 0*4)-LCS)],      Loc1_32 [A0StP+# (( 1*4)-LCS)]
+;               .... other locals
+;         Locn_32  [A0StP+# (( n*4)-LCS)]
+;                                    ; 'Outgoing args area'
+;         Outm_32  [A0StP+#- ( m*4)]            .... other outgoing args
+;         Out8_32  [A0StP+#- ( 1*4)]            Out7_32  [A0StP+#- ( 1*4)]
+; A0StP-> (Out1_32-Out6_32 in regs D1Ar1-D0Ar6)
+;
+; The exit sequence for a non-leaf routine can use the frame pointer created
+; in the entry sequence to optimise the recovery of the full state-
+;
+;   MGETL   D0FrT,D0.5,D0.6,D0.7,[A0FrP]
+;   SUB     A0StP,A0FrP,#(ARS+FRS)
+;   MOV     A0FrP,D0FrT
+;   MOV     PC,D1RtP
+;
+; Having described the most complex non-leaf case above, it is worth noting
+; that if a routine is a leaf and does not use any of the caller-preserved
+; state. The routine can be implemented as-
+;
+;   ADD     A0StP,A0StP,#LCS
+;   .... body of routine
+;   SUB     A0StP,A0StP,#LCS
+;   MOV     PC,D1RtP
+;
+; The stack adjustments can also be omitted if no local storage is required.
+;
+; Another exit sequence structure is more applicable if for a leaf routine
+; with no local frame pointer saved/generated in which the call saved
+; registers need to be saved and restored-
+;
+;   MSETL   [A0StP],D0.5,D0.6,D0.7   ; Hence FRS is 0x18, ARS is 0x00
+;   ADD     A0StP,A0StP,#LCS
+;   .... body of routine
+;   GETL    D0.5,D1.5,[A0StP+#((0*8)-(FRS+LCS))]
+;   GETL    D0.6,D1.6,[A0StP+#((1*8)-(FRS+LCS))]
+;   GETL    D0.7,D1.7,[A0StP+#((2*8)-(FRS+LCS))]
+;   SUB     A0StP,A0StP,#(ARS+FRS+LCS)
+;   MOV     PC,D1RtP
+;
+; Lastly, to support profiling assembler code should use a fixed entry/exit
+; sequence if the trigger define _GMON_ASM is defined-
+;
+;   #ifndef _GMON_ASM
+;   ... optimised entry code
+;   #else
+;   ; Profiling entry case
+;   MOV     D0FrT,A0FrP              ; Save and calculate new frame pointer
+;   ADD     A0FrP,A0StP,#(ARS)
+;   MSETL   [A0StP],...,D0FrT,... or SETL    [A0FrP],D0FrT,D1RtP
+;   CALLR   D0FrT,_mcount_wrapper
+;   #endif
+;   ... body of routine
+;   #ifndef _GMON_ASM
+;   ... optimised exit code
+;   #else
+;   ; Profiling exit case
+;   MGETL   D0FrT,...,[A0FrP]     or GETL    D0FrT,D1RtP,[A0FrP++]
+;   SUB     A0StP,A0FrP,#(ARS+FRS)
+;   MOV     A0FrP,D0FrT
+;   MOV     PC,D1RtP
+;   #endif
+
+
+; -------------------------------------------------------------------------
+;                         CONTROL UNIT REGISTERS
+; -------------------------------------------------------------------------
+;
+; See the assembler guide, hardware documentation, or the field values
+; defined below for some details of the use of these registers.
+*/
+#define TXENABLE    CT.0    /* Need to define bit-field values in these */
+#define TXMODE      CT.1
+#define TXSTATUS    CT.2    /* DEFAULT 0x00020000 */
+#define TXRPT       CT.3
+#define TXTIMER     CT.4
+#define TXL1START   CT.5
+#define TXL1END     CT.6
+#define TXL1COUNT   CT.7
+#define TXL2START   CT.8
+#define TXL2END     CT.9
+#define TXL2COUNT   CT.10
+#define TXBPOBITS   CT.11
+#define TXMRSIZE    CT.12
+#define TXTIMERI    CT.13
+#define TXDRCTRL    CT.14  /* DEFAULT 0x0XXXF0F0 */
+#define TXDRSIZE    CT.15
+#define TXCATCH0    CT.16
+#define TXCATCH1    CT.17
+#define TXCATCH2    CT.18
+#define TXCATCH3    CT.19
+
+#ifdef METAC_2_1
+#define TXDEFR      CT.20
+#define TXCPRS      CT.21
+#endif
+
+#define TXINTERN0   CT.23
+#define TXAMAREG0   CT.24
+#define TXAMAREG1   CT.25
+#define TXAMAREG2   CT.26
+#define TXAMAREG3   CT.27
+#define TXDIVTIME   CT.28   /* DEFAULT 0x00000001 */
+#define TXPRIVEXT   CT.29   /* DEFAULT 0x003B0000 */
+#define TXTACTCYC   CT.30
+#define TXIDLECYC   CT.31
+
+/*****************************************************************************
+ *                        CONTROL UNIT REGISTER BITS
+ ****************************************************************************/
+/*
+ * The following registers and where appropriate the sub-fields of those
+ * registers are defined for pervasive use in controlling program flow.
+ */
+
+/*
+ * TXENABLE register fields - only the thread id is routinely useful
+ */
+#define TXENABLE_REGNUM 0
+#define TXENABLE_THREAD_BITS       0x00000700
+#define TXENABLE_THREAD_S          8
+#define TXENABLE_REV_STEP_BITS     0x000000F0
+#define TXENABLE_REV_STEP_S        4
+
+/*
+ * TXMODE register - controls extensions of the instruction set
+ */
+#define TXMODE_REGNUM 1
+#define     TXMODE_DEFAULT  0   /* All fields default to zero */
+
+/*
+ * TXSTATUS register - contains a couple of stable bits that can be used
+ *      to determine the privilege processing level and interrupt
+ *      processing level of the current thread.
+ */
+#define TXSTATUS_REGNUM 2
+#define TXSTATUS_PSTAT_BIT         0x00020000   /* -> Privilege active      */
+#define TXSTATUS_PSTAT_S           17
+#define TXSTATUS_ISTAT_BIT         0x00010000   /* -> In interrupt state    */
+#define TXSTATUS_ISTAT_S           16
+
+/*
+ * These are all relatively boring registers, mostly full 32-bit
+ */
+#define TXRPT_REGNUM     3  /* Repeat counter for XFR... instructions   */
+#define TXTIMER_REGNUM   4  /* Timer-- causes timer trigger on overflow */
+#define TXL1START_REGNUM 5  /* Hardware Loop 1 Start-PC/End-PC/Count    */
+#define TXL1END_REGNUM   6
+#define TXL1COUNT_REGNUM 7
+#define TXL2START_REGNUM 8  /* Hardware Loop 2 Start-PC/End-PC/Count    */
+#define TXL2END_REGNUM   9
+#define TXL2COUNT_REGNUM 10
+#define TXBPOBITS_REGNUM 11 /* Branch predict override bits - tune perf */
+#define TXTIMERI_REGNUM  13 /* Timer-- time based interrupt trigger     */
+
+/*
+ * TXDIVTIME register is routinely read to calculate the time-base for
+ * the TXTIMER register.
+ */
+#define TXDIVTIME_REGNUM 28
+#define     TXDIVTIME_DIV_BITS 0x000000FF
+#define     TXDIVTIME_DIV_S    0
+#define     TXDIVTIME_DIV_MIN  0x00000001   /* Maximum resolution       */
+#define     TXDIVTIME_DIV_MAX  0x00000100   /* 1/1 -> 1/256 resolution  */
+#define     TXDIVTIME_BASE_HZ  1000000      /* Timers run at 1Mhz @1/1  */
+
+/*
+ * TXPRIVEXT register can be consulted to decide if write access to a
+ *    part of the threads register set is not permitted when in
+ *    unprivileged mode (PSTAT == 0).
+ */
+#define TXPRIVEXT_REGNUM 29
+#define     TXPRIVEXT_COPRO_BITS    0xFF000000 /* Co-processor 0-7 */
+#define     TXPRIVEXT_COPRO_S       24
+#ifndef METAC_1_2
+#define     TXPRIVEXT_TXTIMER_BIT   0x00080000 /* TXTIMER   priv */
+#define     TXPRIVEXT_TRACE_BIT     0x00040000 /* TTEXEC|TTCTRL|GTEXEC */
+#endif
+#define     TXPRIVEXT_TXTRIGGER_BIT 0x00020000 /* TXSTAT|TXMASK|TXPOLL */
+#define     TXPRIVEXT_TXGBLCREG_BIT 0x00010000 /* Global common regs */
+#define     TXPRIVEXT_CBPRIV_BIT    0x00008000 /* Mem i/f dump priv */
+#define     TXPRIVEXT_ILOCK_BIT     0x00004000 /* LOCK inst priv */
+#define     TXPRIVEXT_TXITACCYC_BIT 0x00002000 /* TXIDLECYC|TXTACTCYC */
+#define     TXPRIVEXT_TXDIVTIME_BIT 0x00001000 /* TXDIVTIME priv */
+#define     TXPRIVEXT_TXAMAREGX_BIT 0x00000800 /* TXAMAREGX priv */
+#define     TXPRIVEXT_TXTIMERI_BIT  0x00000400 /* TXTIMERI  priv */
+#define     TXPRIVEXT_TXSTATUS_BIT  0x00000200 /* TXSTATUS  priv */
+#define     TXPRIVEXT_TXDISABLE_BIT 0x00000100 /* TXENABLE  priv */
+#ifndef METAC_1_2
+#define     TXPRIVEXT_MINIMON_BIT   0x00000080 /* Enable Minim features */
+#define     TXPRIVEXT_OLDBCCON_BIT  0x00000020 /* Restore Static predictions */
+#define     TXPRIVEXT_ALIGNREW_BIT  0x00000010 /* Align & precise checks */
+#endif
+#define     TXPRIVEXT_KEEPPRI_BIT   0x00000008 /* Use AMA_Priority if ISTAT=1*/
+#define     TXPRIVEXT_TXTOGGLEI_BIT 0x00000001 /* TX.....I  priv */
+
+/*
+ * TXTACTCYC register - counts instructions issued for this thread
+ */
+#define TXTACTCYC_REGNUM  30
+#define     TXTACTCYC_COUNT_MASK    0x00FFFFFF
+
+/*
+ * TXIDLECYC register - counts idle cycles
+ */
+#define TXIDLECYC_REGNUM  31
+#define     TXIDLECYC_COUNT_MASK    0x00FFFFFF
+
+/*****************************************************************************
+ *                             DSP EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following values relate to fields and controls that only a program
+ * using the DSP extensions of the META instruction set need to know.
+ */
+
+
+#ifndef METAC_1_2
+/*
+ * Allow co-processor hardware to replace the read pipeline data source in
+ * appropriate cases.
+ */
+#define TXMODE_RDCPEN_BIT       0x00800000
+#endif
+
+/*
+ * Address unit addressing modes
+ */
+#define TXMODE_A1ADDR_BITS  0x00007000
+#define TXMODE_A1ADDR_S     12
+#define TXMODE_A0ADDR_BITS  0x00000700
+#define TXMODE_A0ADDR_S     8
+#define     TXMODE_AXADDR_MODULO 3
+#define     TXMODE_AXADDR_REVB   4
+#define     TXMODE_AXADDR_REVW   5
+#define     TXMODE_AXADDR_REVD   6
+#define     TXMODE_AXADDR_REVL   7
+
+/*
+ * Data unit OverScale select (default 0 -> normal, 1 -> top 16 bits)
+ */
+#define TXMODE_DXOVERSCALE_BIT  0x00000080
+
+/*
+ * Data unit MX mode select (default 0 -> MX16, 1 -> MX8)
+ */
+#define TXMODE_M8_BIT         0x00000040
+
+/*
+ * Data unit accumulator saturation point (default -> 40 bit accumulator)
+ */
+#define TXMODE_DXACCSAT_BIT 0x00000020 /* Set for 32-bit accumulator */
+
+/*
+ * Data unit accumulator saturation enable (default 0 -> no saturation)
+ */
+#define TXMODE_DXSAT_BIT    0x00000010
+
+/*
+ * Data unit master rounding control (default 0 -> normal, 1 -> convergent)
+ */
+#define TXMODE_DXROUNDING_BIT   0x00000008
+
+/*
+ * Data unit product shift for fractional arithmetic (default off)
+ */
+#define TXMODE_DXPRODSHIFT_BIT  0x00000004
+
+/*
+ * Select the arithmetic mode (multiply mostly) for both data units
+ */
+#define TXMODE_DXARITH_BITS 0x00000003
+#define     TXMODE_DXARITH_32  3
+#define     TXMODE_DXARITH_32H 2
+#define     TXMODE_DXARITH_S16 1
+#define     TXMODE_DXARITH_16  0
+
+/*
+ * TXMRSIZE register value only relevant when DSP modulo addressing active
+ */
+#define TXMRSIZE_REGNUM 12
+#define     TXMRSIZE_MIN    0x0002  /* 0, 1 -> normal addressing logic */
+#define     TXMRSIZE_MAX    0xFFFF
+
+/*
+ * TXDRCTRL register can be used to detect the actaul size of the DSP RAM
+ * partitions allocated to this thread.
+ */
+#define TXDRCTRL_REGNUM 14
+#define     TXDRCTRL_SINESIZE_BITS  0x0F000000
+#define     TXDRCTRL_SINESIZE_S     24
+#define     TXDRCTRL_RAMSZPOW_BITS  0x001F0000  /* Limit = (1<<RAMSZPOW)-1 */
+#define     TXDRCTRL_RAMSZPOW_S     16
+#define     TXDRCTRL_D1RSZAND_BITS  0x0000F000  /* Mask top 4 bits - D1 */
+#define     TXDRCTRL_D1RSZAND_S     12
+#define     TXDRCTRL_D0RSZAND_BITS  0x000000F0  /* Mask top 4 bits - D0 */
+#define     TXDRCTRL_D0RSZAND_S     4
+/* Given extracted RAMSZPOW and DnRSZAND fields this returns the size */
+#define     TXDRCTRL_DXSIZE(Pow, AndBits) \
+				((((~(AndBits)) & 0x0f) + 1) << ((Pow)-4))
+
+/*
+ * TXDRSIZE register provides modulo addressing options for each DSP RAM
+ */
+#define TXDRSIZE_REGNUM 15
+#define     TXDRSIZE_R1MOD_BITS       0xFFFF0000
+#define     TXDRSIZE_R1MOD_S          16
+#define     TXDRSIZE_R0MOD_BITS       0x0000FFFF
+#define     TXDRSIZE_R0MOD_S          0
+
+#define     TXDRSIZE_RBRAD_SCALE_BITS 0x70000000
+#define     TXDRSIZE_RBRAD_SCALE_S    28
+#define     TXDRSIZE_RBMODSIZE_BITS   0x0FFF0000
+#define     TXDRSIZE_RBMODSIZE_S      16
+#define     TXDRSIZE_RARAD_SCALE_BITS 0x00007000
+#define     TXDRSIZE_RARAD_SCALE_S    12
+#define     TXDRSIZE_RAMODSIZE_BITS   0x00000FFF
+#define     TXDRSIZE_RAMODSIZE_S      0
+
+/*****************************************************************************
+ *                       DEFERRED and BUS ERROR EXTENSION
+ ****************************************************************************/
+
+/*
+ * TXDEFR register - Deferred exception control
+ */
+#define TXDEFR_REGNUM 20
+#define     TXDEFR_DEFAULT  0   /* All fields default to zero */
+
+/*
+ * Bus error state is a multi-bit positive/negative event notification from
+ * the bus infrastructure.
+ */
+#define     TXDEFR_BUS_ERR_BIT    0x80000000  /* Set if error (LSB STATE) */
+#define     TXDEFR_BUS_ERRI_BIT   0x40000000  /* Fetch returned error */
+#define     TXDEFR_BUS_STATE_BITS 0x3F000000  /* Bus event/state data */
+#define     TXDEFR_BUS_STATE_S    24
+#define     TXDEFR_BUS_TRIG_BIT   0x00800000  /* Set when bus error seen */
+
+/*
+ * Bus events are collected by background code in a deferred manner unless
+ * selected to trigger an extended interrupt HALT trigger when they occur.
+ */
+#define     TXDEFR_BUS_ICTRL_BIT  0x00000080  /* Enable interrupt trigger */
+
+/*
+ * CHIP Automatic Mips Allocation control registers
+ * ------------------------------------------------
+ */
+
+/* CT Bank AMA Registers */
+#define TXAMAREG0_REGNUM 24
+#ifdef METAC_1_2
+#define     TXAMAREG0_CTRL_BITS       0x07000000
+#else /* METAC_1_2 */
+#define     TXAMAREG0_RCOFF_BIT       0x08000000
+#define     TXAMAREG0_DLINEHLT_BIT    0x04000000
+#define     TXAMAREG0_DLINEDIS_BIT    0x02000000
+#define     TXAMAREG0_CYCSTRICT_BIT   0x01000000
+#define     TXAMAREG0_CTRL_BITS       (TXAMAREG0_RCOFF_BIT |    \
+				       TXAMAREG0_DLINEHLT_BIT | \
+				       TXAMAREG0_DLINEDIS_BIT | \
+				       TXAMAREG0_CYCSTRICT_BIT)
+#endif /* !METAC_1_2 */
+#define     TXAMAREG0_CTRL_S           24
+#define     TXAMAREG0_MDM_BIT         0x00400000
+#define     TXAMAREG0_MPF_BIT         0x00200000
+#define     TXAMAREG0_MPE_BIT         0x00100000
+#define     TXAMAREG0_MASK_BITS       (TXAMAREG0_MDM_BIT | \
+				       TXAMAREG0_MPF_BIT | \
+				       TXAMAREG0_MPE_BIT)
+#define     TXAMAREG0_MASK_S          20
+#define     TXAMAREG0_SDM_BIT         0x00040000
+#define     TXAMAREG0_SPF_BIT         0x00020000
+#define     TXAMAREG0_SPE_BIT         0x00010000
+#define     TXAMAREG0_STATUS_BITS     (TXAMAREG0_SDM_BIT | \
+				       TXAMAREG0_SPF_BIT | \
+				       TXAMAREG0_SPE_BIT)
+#define     TXAMAREG0_STATUS_S        16
+#define     TXAMAREG0_PRIORITY_BITS   0x0000FF00
+#define     TXAMAREG0_PRIORITY_S      8
+#define     TXAMAREG0_BVALUE_BITS     0x000000FF
+#define     TXAMAREG0_BVALUE_S  0
+
+#define TXAMAREG1_REGNUM 25
+#define     TXAMAREG1_DELAYC_BITS     0x07FFFFFF
+#define     TXAMAREG1_DELAYC_S  0
+
+#define TXAMAREG2_REGNUM 26
+#ifdef METAC_1_2
+#define     TXAMAREG2_DLINEC_BITS     0x00FFFFFF
+#define     TXAMAREG2_DLINEC_S        0
+#else /* METAC_1_2 */
+#define     TXAMAREG2_IRQPRIORITY_BIT 0xFF000000
+#define     TXAMAREG2_IRQPRIORITY_S   24
+#define     TXAMAREG2_DLINEC_BITS     0x00FFFFF0
+#define     TXAMAREG2_DLINEC_S        4
+#endif /* !METAC_1_2 */
+
+#define TXAMAREG3_REGNUM 27
+#define     TXAMAREG2_AMABLOCK_BIT    0x00080000
+#define     TXAMAREG2_AMAC_BITS       0x0000FFFF
+#define     TXAMAREG2_AMAC_S          0
+
+/*****************************************************************************
+ *                                FPU EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following registers only exist in FPU enabled cores.
+ */
+
+/*
+ * TXMODE register - FPU rounding mode control/status fields
+ */
+#define     TXMODE_FPURMODE_BITS     0x00030000
+#define     TXMODE_FPURMODE_S        16
+#define     TXMODE_FPURMODEWRITE_BIT 0x00040000  /* Set to change FPURMODE */
+
+/*
+ * TXDEFR register - FPU exception handling/state is a significant source
+ *   of deferrable errors. Run-time S/W can move handling to interrupt level
+ *   using DEFR instruction to collect state.
+ */
+#define     TXDEFR_FPE_FE_BITS       0x003F0000  /* Set by FPU_FE events */
+#define     TXDEFR_FPE_FE_S          16
+
+#define     TXDEFR_FPE_INEXACT_FE_BIT   0x010000
+#define     TXDEFR_FPE_UNDERFLOW_FE_BIT 0x020000
+#define     TXDEFR_FPE_OVERFLOW_FE_BIT  0x040000
+#define     TXDEFR_FPE_DIVBYZERO_FE_BIT 0x080000
+#define     TXDEFR_FPE_INVALID_FE_BIT   0x100000
+#define     TXDEFR_FPE_DENORMAL_FE_BIT  0x200000
+
+#define     TXDEFR_FPE_ICTRL_BITS    0x000003F   /* Route to interrupts */
+#define     TXDEFR_FPE_ICTRL_S       0
+
+#define     TXDEFR_FPE_INEXACT_ICTRL_BIT   0x01
+#define     TXDEFR_FPE_UNDERFLOW_ICTRL_BIT 0x02
+#define     TXDEFR_FPE_OVERFLOW_ICTRL_BIT  0x04
+#define     TXDEFR_FPE_DIVBYZERO_ICTRL_BIT 0x08
+#define     TXDEFR_FPE_INVALID_ICTRL_BIT   0x10
+#define     TXDEFR_FPE_DENORMAL_ICTRL_BIT  0x20
+
+/*
+ * DETAILED FPU RELATED VALUES
+ * ---------------------------
+ */
+
+/*
+ * Rounding mode field in TXMODE can hold a number of logical values
+ */
+#define METAG_FPURMODE_TONEAREST  0x0      /* Default */
+#define METAG_FPURMODE_TOWARDZERO 0x1
+#define METAG_FPURMODE_UPWARD     0x2
+#define METAG_FPURMODE_DOWNWARD   0x3
+
+/*
+ * In order to set the TXMODE register field that controls the rounding mode
+ * an extra bit must be set in the value written versus that read in order
+ * to gate writes to the rounding mode field. This allows other non-FPU code
+ * to modify TXMODE without knowledge of the FPU units presence and not
+ * influence the FPU rounding mode. This macro adds the required bit so new
+ * rounding modes are accepted.
+ */
+#define TXMODE_FPURMODE_SET(FPURMode) \
+	(TXMODE_FPURMODEWRITE_BIT + ((FPURMode)<<TXMODE_FPURMODE_S))
+
+/*
+ * To successfully restore TXMODE to zero at the end of the function the
+ * following value (rather than zero) must be used.
+ */
+#define TXMODE_FPURMODE_RESET (TXMODE_FPURMODEWRITE_BIT)
+
+/*
+ * In TXSTATUS a special bit exists to indicate if FPU H/W has been accessed
+ * since it was last reset.
+ */
+#define TXSTATUS_FPACTIVE_BIT  0x01000000
+
+/*
+ * Exception state (see TXDEFR_FPU_FE_*) and enabling (for interrupt
+ * level processing (see TXDEFR_FPU_ICTRL_*) are controlled by similar
+ * bit mask locations within each field.
+ */
+#define METAG_FPU_FE_INEXACT   0x01
+#define METAG_FPU_FE_UNDERFLOW 0x02
+#define METAG_FPU_FE_OVERFLOW  0x04
+#define METAG_FPU_FE_DIVBYZERO 0x08
+#define METAG_FPU_FE_INVALID   0x10
+#define METAG_FPU_FE_DENORMAL  0x20
+#define METAG_FPU_FE_ALL_EXCEPT (METAG_FPU_FE_INEXACT   | \
+				 METAG_FPU_FE_UNDERFLOW | \
+				 METAG_FPU_FE_OVERFLOW  | \
+				 METAG_FPU_FE_DIVBYZERO | \
+				 METAG_FPU_FE_INVALID   | \
+				 METAG_FPU_FE_DENORMAL)
+
+/*****************************************************************************
+ *             THREAD CONTROL, ERROR, OR INTERRUPT STATE EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following values are only relevant to code that externally controls
+ * threads, handles errors/interrupts, and/or set-up interrupt/error handlers
+ * for subsequent use.
+ */
+
+/*
+ * TXENABLE register fields - only ENABLE_BIT is potentially read/write
+ */
+#define TXENABLE_MAJOR_REV_BITS    0xFF000000
+#define TXENABLE_MAJOR_REV_S       24
+#define TXENABLE_MINOR_REV_BITS    0x00FF0000
+#define TXENABLE_MINOR_REV_S       16
+#define TXENABLE_CLASS_BITS        0x0000F000
+#define TXENABLE_CLASS_S           12
+#define TXENABLE_CLASS_DSP             0x0 /* -> DSP Thread */
+#define TXENABLE_CLASS_LDSP            0x8 /* -> DSP LITE Thread */
+#define TXENABLE_CLASS_GP              0xC /* -> General Purpose Thread */
+#define     TXENABLE_CLASSALT_LFPU       0x2 /*  Set to indicate LITE FPU */
+#define     TXENABLE_CLASSALT_FPUR8      0x1 /*  Set to indicate 8xFPU regs */
+#define TXENABLE_MTXARCH_BIT       0x00000800
+#define TXENABLE_STEP_REV_BITS     0x000000F0
+#define TXENABLE_STEP_REV_S        4
+#define TXENABLE_STOPPED_BIT       0x00000004   /* TXOFF due to ENABLE->0 */
+#define TXENABLE_OFF_BIT           0x00000002   /* Thread is in off state */
+#define TXENABLE_ENABLE_BIT        0x00000001   /* Set if running */
+
+/*
+ * TXSTATUS register - used by external/internal interrupt/error handler
+ */
+#define TXSTATUS_CB1MARKER_BIT     0x00800000   /* -> int level mem state */
+#define TXSTATUS_CBMARKER_BIT      0x00400000   /* -> mem i/f state dumped */
+#define TXSTATUS_MEM_FAULT_BITS    0x00300000
+#define TXSTATUS_MEM_FAULT_S       20
+#define     TXSTATUS_MEMFAULT_NONE  0x0 /* -> No memory fault       */
+#define     TXSTATUS_MEMFAULT_GEN   0x1 /* -> General fault         */
+#define     TXSTATUS_MEMFAULT_PF    0x2 /* -> Page fault            */
+#define     TXSTATUS_MEMFAULT_RO    0x3 /* -> Read only fault       */
+#define TXSTATUS_MAJOR_HALT_BITS   0x000C0000
+#define TXSTATUS_MAJOR_HALT_S      18
+#define     TXSTATUS_MAJHALT_TRAP 0x0   /* -> SWITCH inst used      */
+#define     TXSTATUS_MAJHALT_INST 0x1   /* -> Unknown inst or fetch */
+#define     TXSTATUS_MAJHALT_PRIV 0x2   /* -> Internal privilege    */
+#define     TXSTATUS_MAJHALT_MEM  0x3   /* -> Memory i/f fault      */
+#define TXSTATUS_L_STEP_BITS       0x00000800   /* -> Progress of L oper    */
+#define TXSTATUS_LSM_STEP_BITS     0x00000700   /* -> Progress of L/S mult  */
+#define TXSTATUS_LSM_STEP_S        8
+#define TXSTATUS_FLAG_BITS         0x0000001F   /* -> All the flags         */
+#define TXSTATUS_SCC_BIT           0x00000010   /* -> Split-16 flags ...    */
+#define TXSTATUS_SCF_LZ_BIT        0x00000008   /* -> Split-16 Low  Z flag  */
+#define TXSTATUS_SCF_HZ_BIT        0x00000004   /* -> Split-16 High Z flag  */
+#define TXSTATUS_SCF_HC_BIT        0x00000002   /* -> Split-16 High C flag  */
+#define TXSTATUS_SCF_LC_BIT        0x00000001   /* -> Split-16 Low  C flag  */
+#define TXSTATUS_CF_Z_BIT          0x00000008   /* -> Condition Z flag      */
+#define TXSTATUS_CF_N_BIT          0x00000004   /* -> Condition N flag      */
+#define TXSTATUS_CF_O_BIT          0x00000002   /* -> Condition O flag      */
+#define TXSTATUS_CF_C_BIT          0x00000001   /* -> Condition C flag      */
+
+/*
+ * TXCATCH0-3 register contents may store information on a memory operation
+ * that has failed if the bit TXSTATUS_CBMARKER_BIT is set.
+ */
+#define TXCATCH0_REGNUM 16
+#define TXCATCH1_REGNUM 17
+#define     TXCATCH1_ADDR_BITS   0xFFFFFFFF   /* TXCATCH1 is Addr 0-31 */
+#define     TXCATCH1_ADDR_S      0
+#define TXCATCH2_REGNUM 18
+#define     TXCATCH2_DATA0_BITS  0xFFFFFFFF   /* TXCATCH2 is Data 0-31 */
+#define     TXCATCH2_DATA0_S     0
+#define TXCATCH3_REGNUM 19
+#define     TXCATCH3_DATA1_BITS  0xFFFFFFFF   /* TXCATCH3 is Data 32-63 */
+#define     TXCATCH3_DATA1_S     0
+
+/*
+ * Detailed catch state information
+ * --------------------------------
+ */
+
+/* Contents of TXCATCH0 register */
+#define     TXCATCH0_LDRXX_BITS  0xF8000000  /* Load destination reg 0-31 */
+#define     TXCATCH0_LDRXX_S     27
+#define     TXCATCH0_LDDST_BITS  0x07FF0000  /* Load destination bits */
+#define     TXCATCH0_LDDST_S     16
+#define         TXCATCH0_LDDST_D1DSP 0x400   /* One bit set if it's a LOAD */
+#define         TXCATCH0_LDDST_D0DSP 0x200
+#define         TXCATCH0_LDDST_TMPLT 0x100
+#define         TXCATCH0_LDDST_TR    0x080
+#ifdef METAC_2_1
+#define         TXCATCH0_LDDST_FPU   0x040
+#endif
+#define         TXCATCH0_LDDST_PC    0x020
+#define         TXCATCH0_LDDST_A1    0x010
+#define         TXCATCH0_LDDST_A0    0x008
+#define         TXCATCH0_LDDST_D1    0x004
+#define         TXCATCH0_LDDST_D0    0x002
+#define         TXCATCH0_LDDST_CT    0x001
+#ifdef METAC_2_1
+#define     TXCATCH0_WATCHSTOP_BIT 0x00004000  /* Set if Data Watch set fault */
+#endif
+#define     TXCATCH0_WATCHS_BIT  0x00004000  /* Set if Data Watch set fault */
+#define     TXCATCH0_WATCH1_BIT  0x00002000  /* Set if Data Watch 1 matches */
+#define     TXCATCH0_WATCH0_BIT  0x00001000  /* Set if Data Watch 0 matches */
+#define     TXCATCH0_FAULT_BITS  0x00000C00  /* See TXSTATUS_MEMFAULT_*     */
+#define     TXCATCH0_FAULT_S     10
+#define     TXCATCH0_PRIV_BIT    0x00000200  /* Privilege of transaction    */
+#define     TXCATCH0_READ_BIT    0x00000100  /* Set for Read or Load cases  */
+
+#ifdef METAC_2_1
+/* LNKGET Marker bit in TXCATCH0 */
+#define   TXCATCH0_LNKGET_MARKER_BIT 0x00000008
+#define       TXCATCH0_PREPROC_BIT  0x00000004
+#endif
+
+/* Loads are indicated by one of the LDDST bits being set */
+#define     TXCATCH0_LDM16_BIT   0x00000004  /* Load M16 flag */
+#define     TXCATCH0_LDL2L1_BITS 0x00000003  /* Load data size L2,L1 */
+#define     TXCATCH0_LDL2L1_S    0
+
+/* Reads are indicated by the READ bit being set without LDDST bits */
+#define     TXCATCH0_RAXX_BITS   0x0000001F  /* RAXX issue port for read */
+#define     TXCATCH0_RAXX_S      0
+
+/* Write operations are all that remain if READ bit is not set */
+#define     TXCATCH0_WMASK_BITS  0x000000FF  /* Write byte lane mask */
+#define     TXCATCH0_WMASK_S     0
+
+#ifdef METAC_2_1
+
+/* When a FPU exception is signalled then FPUSPEC == FPUSPEC_TAG */
+#define     TXCATCH0_FPURDREG_BITS    0xF8000000
+#define     TXCATCH0_FPURDREG_S       27
+#define     TXCATCH0_FPUR1REG_BITS    0x07C00000
+#define     TXCATCH0_FPUR1REG_S       22
+#define     TXCATCH0_FPUSPEC_BITS     0x000F0000
+#define     TXCATCH0_FPUSPEC_S        16
+#define         TXCATCH0_FPUSPEC_TAG      0xF
+#define     TXCATCH0_FPUINSTA_BIT     0x00001000
+#define     TXCATCH0_FPUINSTQ_BIT     0x00000800
+#define     TXCATCH0_FPUINSTZ_BIT     0x00000400
+#define     TXCATCH0_FPUINSTN_BIT     0x00000200
+#define     TXCATCH0_FPUINSTO3O_BIT   0x00000100
+#define     TXCATCH0_FPUWIDTH_BITS    0x000000C0
+#define     TXCATCH0_FPUWIDTH_S       6
+#define         TXCATCH0_FPUWIDTH_FLOAT   0
+#define         TXCATCH0_FPUWIDTH_DOUBLE  1
+#define         TXCATCH0_FPUWIDTH_PAIRED  2
+#define     TXCATCH0_FPUOPENC_BITS    0x0000003F
+#define     TXCATCH0_FPUOPENC_S       0
+#define         TXCATCH0_FPUOPENC_ADD     0  /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_SUB     1  /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_MUL     2  /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_ATOI    3  /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_ATOX    4  /* rop3=Rs, uses #Imm */
+#define         TXCATCH0_FPUOPENC_ITOA    5  /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_XTOA    6  /* rop3=Rs, uses #Imm */
+#define         TXCATCH0_FPUOPENC_ATOH    7  /* rop2=Rs */
+#define         TXCATCH0_FPUOPENC_HTOA    8  /* rop2=Rs */
+#define         TXCATCH0_FPUOPENC_DTOF    9  /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_FTOD    10 /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_DTOL    11 /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_LTOD    12 /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_DTOXL   13 /* rop3=Rs, uses #imm */
+#define         TXCATCH0_FPUOPENC_XLTOD   14 /* rop3=Rs, uses #imm */
+#define         TXCATCH0_FPUOPENC_CMP     15 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MIN     16 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MAX     17 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_ADDRE   18 /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_SUBRE   19 /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_MULRE   20 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MXA     21 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_MXAS    22 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_MAR     23 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MARS    24 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MUZ     25 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_MUZS    26 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_RCP     27 /* rop2=Rs */
+#define         TXCATCH0_FPUOPENC_RSQ     28 /* rop2=Rs */
+
+/* For floating point exceptions TXCATCH1 is used to carry extra data */
+#define     TXCATCH1_FPUR2REG_BITS    0xF8000000
+#define     TXCATCH1_FPUR2REG_S       27
+#define     TXCATCH1_FPUR3REG_BITS    0x07C00000  /* Undefined if O3O set */
+#define     TXCATCH1_FPUR3REG_S       22
+#define     TXCATCH1_FPUIMM16_BITS    0x0000FFFF
+#define     TXCATCH1_FPUIMM16_S       0
+
+#endif /* METAC_2_1 */
+
+/*
+ * TXDIVTIME register used to hold the partial base address of memory i/f
+ * state dump area. Now deprecated.
+ */
+#define     TXDIVTIME_CBBASE_MASK    0x03FFFE00
+#define     TXDIVTIME_CBBASE_LINBASE 0x80000000
+#define     TXDIVTIME_CBBASE_LINBOFF 0x00000000 /* BGnd state */
+#define     TXDIVTIME_CBBASE_LINIOFF 0x00000100 /* Int  state */
+
+/*
+ * TXDIVTIME register used to indicate if the read pipeline was dirty when a
+ * thread was interrupted, halted, or generated an exception. It is invalid
+ * to attempt to issue a further pipeline read address while the read
+ * pipeline is in the dirty state.
+ */
+#define     TXDIVTIME_RPDIRTY_BIT   0x80000000
+
+/*
+ * Further bits in the TXDIVTIME register allow interrupt handling code to
+ * short-cut the discovery the most significant bit last read from TXSTATI.
+ *
+ * This is the bit number of the trigger line that a low level interrupt
+ * handler should acknowledge and then perhaps the index of a corresponding
+ * handler function.
+ */
+#define     TXDIVTIME_IRQENC_BITS   0x0F000000
+#define     TXDIVTIME_IRQENC_S      24
+
+/*
+ * If TXDIVTIME_RPVALID_BIT is set the read pipeline contained significant
+ * information when the thread was interrupted|halted|exceptioned. Each slot
+ * containing data is indicated by a one bit in the corresponding
+ * TXDIVTIME_RPMASK_BITS bit (least significance bit relates to first
+ * location in read pipeline - most likely to have the 1 state). Empty slots
+ * contain zeroes with no interlock applied on reads if RPDIRTY is currently
+ * set with RPMASK itself being read-only state.
+ */
+#define     TXDIVTIME_RPMASK_BITS 0x003F0000   /* -> Full (1) Empty (0) */
+#define     TXDIVTIME_RPMASK_S    16
+
+/*
+ * TXPRIVEXT register can be used to single step thread execution and
+ * enforce synchronous memory i/f address checking for debugging purposes.
+ */
+#define     TXPRIVEXT_TXSTEP_BIT    0x00000004
+#define     TXPRIVEXT_MEMCHECK_BIT  0x00000002
+
+/*
+ * TXINTERNx registers holds internal state information for H/W debugging only
+ */
+#define TXINTERN0_REGNUM 23
+#define     TXINTERN0_LOCK2_BITS  0xF0000000
+#define     TXINTERN0_LOCK2_S     28
+#define     TXINTERN0_LOCK1_BITS  0x0F000000
+#define     TXINTERN0_LOCK1_S     24
+#define     TXINTERN0_TIFDF_BITS  0x0000F000
+#define     TXINTERN0_TIFDF_S     12
+#define     TXINTERN0_TIFIB_BITS  0x00000F00
+#define     TXINTERN0_TIFIB_S     8
+#define     TXINTERN0_TIFAF_BITS  0x000000F0
+#define     TXINTERN0_TIFAF_S     4
+#define     TXINTERN0_MSTATE_BITS 0x0000000F
+#define     TXINTERN0_MSTATE_S    0
+
+/*
+ * TXSTAT, TXMASK, TXPOLL, TXSTATI, TXMASKI, TXPOLLI registers from trigger
+ * bank all have similar contents (upper kick count bits not in MASK regs)
+ */
+#define TXSTAT_REGNUM  0
+#define     TXSTAT_TIMER_BIT    0x00000001
+#define     TXSTAT_TIMER_S      0
+#define     TXSTAT_KICK_BIT     0x00000002
+#define     TXSTAT_KICK_S       1
+#define     TXSTAT_DEFER_BIT    0x00000008
+#define     TXSTAT_DEFER_S      3
+#define     TXSTAT_EXTTRIG_BITS 0x0000FFF0
+#define     TXSTAT_EXTTRIG_S    4
+#define     TXSTAT_FPE_BITS     0x003F0000
+#define     TXSTAT_FPE_S        16
+#define     TXSTAT_FPE_DENORMAL_BIT    0x00200000
+#define     TXSTAT_FPE_DENORMAL_S      21
+#define     TXSTAT_FPE_INVALID_BIT     0x00100000
+#define     TXSTAT_FPE_INVALID_S       20
+#define     TXSTAT_FPE_DIVBYZERO_BIT   0x00080000
+#define     TXSTAT_FPE_DIVBYZERO_S     19
+#define     TXSTAT_FPE_OVERFLOW_BIT    0x00040000
+#define     TXSTAT_FPE_OVERFLOW_S      18
+#define     TXSTAT_FPE_UNDERFLOW_BIT   0x00020000
+#define     TXSTAT_FPE_UNDERFLOW_S     17
+#define     TXSTAT_FPE_INEXACT_BIT     0x00010000
+#define     TXSTAT_FPE_INEXACT_S       16
+#define     TXSTAT_BUSERR_BIT          0x00800000   /* Set if bus error/ack state */
+#define     TXSTAT_BUSERR_S            23
+#define         TXSTAT_BUSSTATE_BITS     0xFF000000 /* Read only */
+#define         TXSTAT_BUSSTATE_S        24
+#define     TXSTAT_KICKCNT_BITS 0xFFFF0000
+#define     TXSTAT_KICKCNT_S    16
+#define TXMASK_REGNUM  1
+#define TXSTATI_REGNUM 2
+#define     TXSTATI_BGNDHALT_BIT    0x00000004
+#define TXMASKI_REGNUM 3
+#define TXPOLL_REGNUM  4
+#define TXPOLLI_REGNUM 6
+
+/*
+ * TXDRCTRL register can be used to partition the DSP RAM space available to
+ * this thread at startup. This is achieved by offsetting the region allocated
+ * to each thread.
+ */
+#define     TXDRCTRL_D1PARTOR_BITS  0x00000F00  /* OR's into top 4 bits */
+#define     TXDRCTRL_D1PARTOR_S     8
+#define     TXDRCTRL_D0PARTOR_BITS  0x0000000F  /* OR's into top 4 bits */
+#define     TXDRCTRL_D0PARTOR_S     0
+/* Given extracted Pow and Or fields this is threads base within DSP RAM */
+#define     TXDRCTRL_DXBASE(Pow, Or)  ((Or)<<((Pow)-4))
+
+/*****************************************************************************
+ *                      RUN TIME TRACE CONTROL REGISTERS
+ ****************************************************************************/
+/*
+ * The following values are only relevant to code that implements run-time
+ *  trace features within the META Core
+ */
+#define TTEXEC      TT.0
+#define TTCTRL      TT.1
+#define TTMARK      TT.2
+#define TTREC       TT.3
+#define GTEXEC      TT.4
+
+#define TTEXEC_REGNUM               0
+#define     TTEXEC_EXTTRIGAND_BITS      0x7F000000
+#define     TTEXEC_EXTTRIGAND_S         24
+#define     TTEXEC_EXTTRIGEN_BIT        0x00008000
+#define     TTEXEC_EXTTRIGMATCH_BITS    0x00007F00
+#define     TTEXEC_EXTTRIGMATCH_S       8
+#define     TTEXEC_TCMODE_BITS          0x00000003
+#define     TTEXEC_TCMODE_S             0
+
+#define TTCTRL_REGNUM               1
+#define     TTCTRL_TRACETT_BITS         0x00008000
+#define     TTCTRL_TRACETT_S            15
+#define     TTCTRL_TRACEALL_BITS        0x00002000
+#define     TTCTRL_TRACEALL_S           13
+#ifdef METAC_2_1
+#define     TTCTRL_TRACEALLTAG_BITS     0x00000400
+#define     TTCTRL_TRACEALLTAG_S        10
+#endif /* METAC_2_1 */
+#define     TTCTRL_TRACETAG_BITS        0x00000200
+#define     TTCTRL_TRACETAG_S           9
+#define     TTCTRL_TRACETTPC_BITS       0x00000080
+#define     TTCTRL_TRACETTPC_S          7
+#define     TTCTRL_TRACEMPC_BITS        0x00000020
+#define     TTCTRL_TRACEMPC_S           5
+#define     TTCTRL_TRACEEN_BITS         0x00000008
+#define     TTCTRL_TRACEEN_S            3
+#define     TTCTRL_TRACEEN1_BITS        0x00000004
+#define     TTCTRL_TRACEEN1_S           2
+#define     TTCTRL_TRACEPC_BITS         0x00000002
+#define     TTCTRL_TRACEPC_S            1
+
+#ifdef METAC_2_1
+#define TTMARK_REGNUM   2
+#define TTMARK_BITS                 0xFFFFFFFF
+#define TTMARK_S                    0x0
+
+#define TTREC_REGNUM    3
+#define TTREC_BITS                  0xFFFFFFFFFFFFFFFF
+#define TTREC_S                     0x0
+#endif /* METAC_2_1 */
+
+#define GTEXEC_REGNUM               4
+#define     GTEXEC_DCRUN_BITS           0x80000000
+#define     GTEXEC_DCRUN_S              31
+#define     GTEXEC_ICMODE_BITS          0x0C000000
+#define     GTEXEC_ICMODE_S             26
+#define     GTEXEC_TCMODE_BITS          0x03000000
+#define     GTEXEC_TCMODE_S             24
+#define     GTEXEC_PERF1CMODE_BITS      0x00040000
+#define     GTEXEC_PERF1CMODE_S         18
+#define     GTEXEC_PERF0CMODE_BITS      0x00010000
+#define     GTEXEC_PERF0CMODE_S         16
+#define     GTEXEC_REFMSEL_BITS         0x0000F000
+#define     GTEXEC_REFMSEL_S            12
+#define     GTEXEC_METRICTH_BITS        0x000003FF
+#define     GTEXEC_METRICTH_S           0
+
+#ifdef METAC_2_1
+/*
+ * Clock Control registers
+ * -----------------------
+ */
+#define TXCLKCTRL_REGNUM        22
+
+/*
+ * Default setting is with clocks always on (DEFON), turning all clocks off
+ * can only be done from external devices (OFF), enabling automatic clock
+ * gating will allow clocks to stop as units fall idle.
+ */
+#define TXCLKCTRL_ALL_OFF       0x02222222
+#define TXCLKCTRL_ALL_DEFON     0x01111111
+#define TXCLKCTRL_ALL_AUTO      0x02222222
+
+/*
+ * Individual fields control caches, floating point and main data/addr units
+ */
+#define TXCLKCTRL_CLOCKIC_BITS  0x03000000
+#define TXCLKCTRL_CLOCKIC_S     24
+#define TXCLKCTRL_CLOCKDC_BITS  0x00300000
+#define TXCLKCTRL_CLOCKDC_S     20
+#define TXCLKCTRL_CLOCKFP_BITS  0x00030000
+#define TXCLKCTRL_CLOCKFP_S     16
+#define TXCLKCTRL_CLOCKD1_BITS  0x00003000
+#define TXCLKCTRL_CLOCKD1_S     12
+#define TXCLKCTRL_CLOCKD0_BITS  0x00000300
+#define TXCLKCTRL_CLOCKD0_S     8
+#define TXCLKCTRL_CLOCKA1_BITS  0x00000030
+#define TXCLKCTRL_CLOCKA1_S     4
+#define TXCLKCTRL_CLOCKA0_BITS  0x00000003
+#define TXCLKCTRL_CLOCKA0_S     0
+
+/*
+ * Individual settings for each field are common
+ */
+#define TXCLKCTRL_CLOCKxx_OFF   0
+#define TXCLKCTRL_CLOCKxx_DEFON 1
+#define TXCLKCTRL_CLOCKxx_AUTO  2
+
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Fast interrupt new bits
+ * ------------------------------------
+ */
+#define TXSTATUS_IPTOGGLE_BIT           0x80000000 /* Prev PToggle of TXPRIVEXT */
+#define TXSTATUS_ISTATE_BIT             0x40000000 /* IState bit */
+#define TXSTATUS_IWAIT_BIT              0x20000000 /* wait indefinitely in decision step*/
+#define TXSTATUS_IEXCEPT_BIT            0x10000000 /* Indicate an exception occured */
+#define TXSTATUS_IRPCOUNT_BITS          0x0E000000 /* Number of 'dirty' date entries*/
+#define TXSTATUS_IRPCOUNT_S             25
+#define TXSTATUS_IRQSTAT_BITS           0x0000F000 /* IRQEnc bits, trigger or interrupts */
+#define TXSTATUS_IRQSTAT_S              12
+#define TXSTATUS_LNKSETOK_BIT           0x00000020 /* LNKSetOK bit, successful LNKSET */
+
+/* New fields in TXDE for fast interrupt system */
+#define TXDIVTIME_IACTIVE_BIT           0x00008000 /* Enable new interrupt system */
+#define TXDIVTIME_INONEST_BIT           0x00004000 /* Gate nested interrupt */
+#define TXDIVTIME_IREGIDXGATE_BIT       0x00002000 /* gate of the IRegIdex field */
+#define TXDIVTIME_IREGIDX_BITS          0x00001E00 /* Index of A0.0/1 replaces */
+#define TXDIVTIME_IREGIDX_S             9
+#define TXDIVTIME_NOST_BIT              0x00000100 /* disable superthreading bit */
+#endif
+
+#endif /* _ASM_METAG_REGS_H_ */
diff --git a/arch/metag/include/asm/mman.h b/arch/metag/include/asm/mman.h
new file mode 100644
index 0000000..17999db
--- /dev/null
+++ b/arch/metag/include/asm/mman.h
@@ -0,0 +1,11 @@
+#ifndef __METAG_MMAN_H__
+#define __METAG_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+#ifndef __ASSEMBLY__
+#define arch_mmap_check metag_mmap_check
+int metag_mmap_check(unsigned long addr, unsigned long len,
+		     unsigned long flags);
+#endif
+#endif /* __METAG_MMAN_H__ */
diff --git a/arch/metag/include/asm/mmu.h b/arch/metag/include/asm/mmu.h
new file mode 100644
index 0000000..9c32114
--- /dev/null
+++ b/arch/metag/include/asm/mmu.h
@@ -0,0 +1,77 @@
+#ifndef __MMU_H
+#define __MMU_H
+
+#ifdef CONFIG_METAG_USER_TCM
+#include <linux/list.h>
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+#include <asm/page.h>
+#endif
+
+typedef struct {
+	/* Software pgd base pointer used for Meta 1.x MMU. */
+	unsigned long pgd_base;
+#ifdef CONFIG_METAG_USER_TCM
+	struct list_head tcm;
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#if HPAGE_SHIFT < HUGEPT_SHIFT
+	/* last partially filled huge page table address */
+	unsigned long part_huge;
+#endif
+#endif
+} mm_context_t;
+
+/* Given a virtual address, return the pte for the top level 4meg entry
+ * that maps that address.
+ * Returns 0 (an empty pte) if that range is not mapped.
+ */
+unsigned long mmu_read_first_level_page(unsigned long vaddr);
+
+/* Given a linear (virtual) address, return the second level 4k pte
+ * that maps that address.  Returns 0 if the address is not mapped.
+ */
+unsigned long mmu_read_second_level_page(unsigned long vaddr);
+
+/* Get the virtual base address of the MMU */
+unsigned long mmu_get_base(void);
+
+/* Initialize the MMU. */
+void mmu_init(unsigned long mem_end);
+
+#ifdef CONFIG_METAG_META21_MMU
+/*
+ * For cpu "cpu" calculate and return the address of the
+ * MMCU_TnLOCAL_TABLE_PHYS0 if running in local-space or
+ * MMCU_TnGLOBAL_TABLE_PHYS0 if running in global-space.
+ */
+static inline unsigned long mmu_phys0_addr(unsigned int cpu)
+{
+	unsigned long phys0;
+
+	phys0 = (MMCU_T0LOCAL_TABLE_PHYS0 +
+		(MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) +
+		(MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET));
+
+	return phys0;
+}
+
+/*
+ * For cpu "cpu" calculate and return the address of the
+ * MMCU_TnLOCAL_TABLE_PHYS1 if running in local-space or
+ * MMCU_TnGLOBAL_TABLE_PHYS1 if running in global-space.
+ */
+static inline unsigned long mmu_phys1_addr(unsigned int cpu)
+{
+	unsigned long phys1;
+
+	phys1 = (MMCU_T0LOCAL_TABLE_PHYS1 +
+		(MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) +
+		(MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET));
+
+	return phys1;
+}
+#endif /* CONFIG_METAG_META21_MMU */
+
+#endif
diff --git a/arch/metag/include/asm/mmu_context.h b/arch/metag/include/asm/mmu_context.h
new file mode 100644
index 0000000..ae2a71b
--- /dev/null
+++ b/arch/metag/include/asm/mmu_context.h
@@ -0,0 +1,113 @@
+#ifndef __METAG_MMU_CONTEXT_H
+#define __METAG_MMU_CONTEXT_H
+
+#include <asm-generic/mm_hooks.h>
+
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+#include <linux/io.h>
+
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+				  struct task_struct *tsk)
+{
+}
+
+static inline int init_new_context(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+#ifndef CONFIG_METAG_META21_MMU
+	/* We use context to store a pointer to the page holding the
+	 * pgd of a process while it is running. While a process is not
+	 * running the pgd and context fields should be equal.
+	 */
+	mm->context.pgd_base = (unsigned long) mm->pgd;
+#endif
+#ifdef CONFIG_METAG_USER_TCM
+	INIT_LIST_HEAD(&mm->context.tcm);
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_METAG_USER_TCM
+
+#include <linux/slab.h>
+#include <asm/tcm.h>
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+	struct tcm_allocation *pos, *n;
+
+	list_for_each_entry_safe(pos, n,  &mm->context.tcm, list) {
+		tcm_free(pos->tag, pos->addr, pos->size);
+		list_del(&pos->list);
+		kfree(pos);
+	}
+}
+#else
+#define destroy_context(mm)		do { } while (0)
+#endif
+
+#ifdef CONFIG_METAG_META21_MMU
+static inline void load_pgd(pgd_t *pgd, int thread)
+{
+	unsigned long phys0 = mmu_phys0_addr(thread);
+	unsigned long phys1 = mmu_phys1_addr(thread);
+
+	/*
+	 *  0x900 2Gb address space
+	 *  The permission bits apply to MMU table region which gives a 2MB
+	 *  window into physical memory. We especially don't want userland to be
+	 *  able to access this.
+	 */
+	metag_out32(0x900 | _PAGE_CACHEABLE | _PAGE_PRIV | _PAGE_WRITE |
+		    _PAGE_PRESENT, phys0);
+	/* Set new MMU base address */
+	metag_out32(__pa(pgd) & MMCU_TBLPHYS1_ADDR_BITS, phys1);
+}
+#endif
+
+static inline void switch_mmu(struct mm_struct *prev, struct mm_struct *next)
+{
+#ifdef CONFIG_METAG_META21_MMU
+	load_pgd(next->pgd, hard_processor_id());
+#else
+	unsigned int i;
+
+	/* prev->context == prev->pgd in the case where we are initially
+	   switching from the init task to the first process. */
+	if (prev->context.pgd_base != (unsigned long) prev->pgd) {
+		for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++)
+			((pgd_t *) prev->context.pgd_base)[i] = prev->pgd[i];
+	} else
+		prev->pgd = (pgd_t *)mmu_get_base();
+
+	next->pgd = prev->pgd;
+	prev->pgd = (pgd_t *) prev->context.pgd_base;
+
+	for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++)
+		next->pgd[i] = ((pgd_t *) next->context.pgd_base)[i];
+
+	flush_cache_all();
+#endif
+	flush_tlb_all();
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	if (prev != next)
+		switch_mmu(prev, next);
+}
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+			       struct mm_struct *next_mm)
+{
+	switch_mmu(prev_mm, next_mm);
+}
+
+#define deactivate_mm(tsk, mm)   do { } while (0)
+
+#endif
diff --git a/arch/metag/include/asm/mmzone.h b/arch/metag/include/asm/mmzone.h
new file mode 100644
index 0000000..9c88a9c
--- /dev/null
+++ b/arch/metag/include/asm/mmzone.h
@@ -0,0 +1,42 @@
+#ifndef __ASM_METAG_MMZONE_H
+#define __ASM_METAG_MMZONE_H
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+#include <linux/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid)		(node_data[nid])
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+	int nid;
+
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		if (pfn >= node_start_pfn(nid) && pfn <= node_end_pfn(nid))
+			break;
+
+	return nid;
+}
+
+static inline struct pglist_data *pfn_to_pgdat(unsigned long pfn)
+{
+	return NODE_DATA(pfn_to_nid(pfn));
+}
+
+/* arch/metag/mm/numa.c */
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end);
+#else
+static inline void
+setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+}
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+#ifdef CONFIG_NUMA
+/* SoC specific mem init */
+void __init soc_mem_setup(void);
+#else
+static inline void __init soc_mem_setup(void) {};
+#endif
+
+#endif /* __ASM_METAG_MMZONE_H */
diff --git a/arch/metag/include/asm/module.h b/arch/metag/include/asm/module.h
new file mode 100644
index 0000000..e47e609
--- /dev/null
+++ b/arch/metag/include/asm/module.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_METAG_MODULE_H
+#define _ASM_METAG_MODULE_H
+
+#include <asm-generic/module.h>
+
+struct metag_plt_entry {
+	/* Indirect jump instruction sequence. */
+	unsigned long tramp[2];
+};
+
+struct mod_arch_specific {
+	/* Indices of PLT sections within module. */
+	unsigned int core_plt_section, init_plt_section;
+};
+
+#if defined CONFIG_METAG_META12
+#define MODULE_PROC_FAMILY "META 1.2 "
+#elif defined CONFIG_METAG_META21
+#define MODULE_PROC_FAMILY "META 2.1 "
+#else
+#define MODULE_PROC_FAMILY ""
+#endif
+
+#ifdef CONFIG_4KSTACKS
+#define MODULE_STACKSIZE "4KSTACKS "
+#else
+#define MODULE_STACKSIZE ""
+#endif
+
+#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
+
+#ifdef MODULE
+asm(".section .plt,\"ax\",@progbits; .balign 8; .previous");
+asm(".section .init.plt,\"ax\",@progbits; .balign 8; .previous");
+#endif
+
+#endif /* _ASM_METAG_MODULE_H */
diff --git a/arch/metag/include/asm/page.h b/arch/metag/include/asm/page.h
new file mode 100644
index 0000000..1e8e281
--- /dev/null
+++ b/arch/metag/include/asm/page.h
@@ -0,0 +1,128 @@
+#ifndef _METAG_PAGE_H
+#define _METAG_PAGE_H
+
+#include <linux/const.h>
+
+#include <asm/metag_mem.h>
+
+/* PAGE_SHIFT determines the page size */
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define PAGE_SHIFT	12
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define PAGE_SHIFT	13
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define PAGE_SHIFT	14
+#endif
+
+#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define HPAGE_SHIFT	13
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define HPAGE_SHIFT	14
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define HPAGE_SHIFT	15
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define HPAGE_SHIFT	16
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define HPAGE_SHIFT	17
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define HPAGE_SHIFT	18
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define HPAGE_SHIFT	19
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define HPAGE_SHIFT	20
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define HPAGE_SHIFT	21
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define HPAGE_SHIFT	22
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+# define HPAGE_SIZE		(1UL << HPAGE_SHIFT)
+# define HPAGE_MASK		(~(HPAGE_SIZE-1))
+# define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT-PAGE_SHIFT)
+/*
+ * We define our own hugetlb_get_unmapped_area so we don't corrupt 2nd level
+ * page tables with normal pages in them.
+ */
+# define HUGEPT_SHIFT		(22)
+# define HUGEPT_ALIGN		(1 << HUGEPT_SHIFT)
+# define HUGEPT_MASK		(HUGEPT_ALIGN - 1)
+# define ALIGN_HUGEPT(x)	ALIGN(x, HUGEPT_ALIGN)
+# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* On the Meta, we would like to know if the address (heap) we have is
+ * in local or global space.
+ */
+#define is_global_space(addr)	((addr) > 0x7fffffff)
+#define is_local_space(addr)	(!is_global_space(addr))
+
+extern void clear_page(void *to);
+extern void copy_page(void *to, void *from);
+
+#define clear_user_page(page, vaddr, pg)        clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)     copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
+
+#define pte_val(x)	((x).pte)
+#define pgd_val(x)	((x).pgd)
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x)	((pte_t) { (x) })
+#define __pgd(x)	((pgd_t) { (x) })
+#define __pgprot(x)	((pgprot_t) { (x) })
+
+/* The kernel must now ALWAYS live at either 0xC0000000 or 0x40000000 - that
+ * being either global or local space.
+ */
+#define PAGE_OFFSET		(CONFIG_PAGE_OFFSET)
+
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define META_MEMORY_BASE  LINGLOBAL_BASE
+#define META_MEMORY_LIMIT LINGLOBAL_LIMIT
+#else
+#define META_MEMORY_BASE  LINLOCAL_BASE
+#define META_MEMORY_LIMIT LINLOCAL_LIMIT
+#endif
+
+/* Offset between physical and virtual mapping of kernel memory. */
+extern unsigned int meta_memoffset;
+
+#define __pa(x) ((unsigned long)(((unsigned long)(x)) - meta_memoffset))
+#define __va(x) ((void *)((unsigned long)(((unsigned long)(x)) + meta_memoffset)))
+
+extern unsigned long pfn_base;
+#define ARCH_PFN_OFFSET         (pfn_base)
+#define virt_to_page(kaddr)     pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_virt(page)      __va(page_to_pfn(page) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr)  pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_phys(page)      (page_to_pfn(page) << PAGE_SHIFT)
+#ifdef CONFIG_FLATMEM
+extern unsigned long max_pfn;
+extern unsigned long min_low_pfn;
+#define pfn_valid(pfn)		((pfn) >= min_low_pfn && (pfn) < max_pfn)
+#endif
+
+#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* __ASSMEBLY__ */
+
+#endif /* _METAG_PAGE_H */
diff --git a/arch/metag/include/asm/perf_event.h b/arch/metag/include/asm/perf_event.h
new file mode 100644
index 0000000..105bbff
--- /dev/null
+++ b/arch/metag/include/asm/perf_event.h
@@ -0,0 +1,4 @@
+#ifndef __ASM_METAG_PERF_EVENT_H
+#define __ASM_METAG_PERF_EVENT_H
+
+#endif /* __ASM_METAG_PERF_EVENT_H */
diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h
new file mode 100644
index 0000000..275d928
--- /dev/null
+++ b/arch/metag/include/asm/pgalloc.h
@@ -0,0 +1,79 @@
+#ifndef _METAG_PGALLOC_H
+#define _METAG_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/mm.h>
+
+#define pmd_populate_kernel(mm, pmd, pte) \
+	set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
+
+#define pmd_populate(mm, pmd, pte) \
+	set_pmd(pmd, __pmd(_PAGE_TABLE | page_to_phys(pte)))
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+/*
+ * Allocate and free page tables.
+ */
+#ifdef CONFIG_METAG_META21_MMU
+static inline void pgd_ctor(pgd_t *pgd)
+{
+	memcpy(pgd + USER_PTRS_PER_PGD,
+	       swapper_pg_dir + USER_PTRS_PER_PGD,
+	       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+}
+#else
+#define pgd_ctor(x)	do { } while (0)
+#endif
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+	if (pgd)
+		pgd_ctor(pgd);
+	return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long address)
+{
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT |
+					      __GFP_ZERO);
+	return pte;
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+				      unsigned long address)
+{
+	struct page *pte;
+	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	if (pte)
+		pgtable_page_ctor(pte);
+	return pte;
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+	pgtable_page_dtor(pte);
+	__free_page(pte);
+}
+
+#define __pte_free_tlb(tlb, pte, addr)				\
+	do {							\
+		pgtable_page_dtor(pte);				\
+		tlb_remove_page((tlb), (pte));			\
+	} while (0)
+
+#define check_pgt_cache()	do { } while (0)
+
+#endif
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
new file mode 100644
index 0000000..1cd13d5
--- /dev/null
+++ b/arch/metag/include/asm/pgtable.h
@@ -0,0 +1,370 @@
+/*
+ * Macros and functions to manipulate Meta page tables.
+ */
+
+#ifndef _METAG_PGTABLE_H
+#define _METAG_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define CONSISTENT_START	0xF7000000
+#define CONSISTENT_END		0xF73FFFFF
+#define VMALLOC_START		0xF8000000
+#define VMALLOC_END		0xFFFEFFFF
+#else
+#define CONSISTENT_START	0x77000000
+#define CONSISTENT_END		0x773FFFFF
+#define VMALLOC_START		0x78000000
+#define VMALLOC_END		0x7FFFFFFF
+#endif
+
+/*
+ * Definitions for MMU descriptors
+ *
+ * These are the hardware bits in the MMCU pte entries.
+ * Derived from the Meta toolkit headers.
+ */
+#define _PAGE_PRESENT		MMCU_ENTRY_VAL_BIT
+#define _PAGE_WRITE		MMCU_ENTRY_WR_BIT
+#define _PAGE_PRIV		MMCU_ENTRY_PRIV_BIT
+/* Write combine bit - this can cause writes to occur out of order */
+#define _PAGE_WR_COMBINE	MMCU_ENTRY_WRC_BIT
+/* Sys coherent bit - this bit is never used by Linux */
+#define _PAGE_SYS_COHERENT	MMCU_ENTRY_SYS_BIT
+#define _PAGE_ALWAYS_ZERO_1	0x020
+#define _PAGE_CACHE_CTRL0	0x040
+#define _PAGE_CACHE_CTRL1	0x080
+#define _PAGE_ALWAYS_ZERO_2	0x100
+#define _PAGE_ALWAYS_ZERO_3	0x200
+#define _PAGE_ALWAYS_ZERO_4	0x400
+#define _PAGE_ALWAYS_ZERO_5	0x800
+
+/* These are software bits that we stuff into the gaps in the hardware
+ * pte entries that are not used.  Note, these DO get stored in the actual
+ * hardware, but the hardware just does not use them.
+ */
+#define _PAGE_ACCESSED		_PAGE_ALWAYS_ZERO_1
+#define _PAGE_DIRTY		_PAGE_ALWAYS_ZERO_2
+#define _PAGE_FILE		_PAGE_ALWAYS_ZERO_3
+
+/* Pages owned, and protected by, the kernel. */
+#define _PAGE_KERNEL		_PAGE_PRIV
+
+/* No cacheing of this page */
+#define _PAGE_CACHE_WIN0	(MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
+/* burst cacheing - good for data streaming */
+#define _PAGE_CACHE_WIN1	(MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
+/* One cache way per thread */
+#define _PAGE_CACHE_WIN2	(MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
+/* Full on cacheing */
+#define _PAGE_CACHE_WIN3	(MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
+
+#define _PAGE_CACHEABLE		(_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
+
+/* which bits are used for cache control ... */
+#define _PAGE_CACHE_MASK	(_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
+				 _PAGE_WR_COMBINE)
+
+/* This is a mask of the bits that pte_modify is allowed to change. */
+#define _PAGE_CHG_MASK		(PAGE_MASK)
+
+#define _PAGE_SZ_SHIFT		1
+#define _PAGE_SZ_4K		(0x0)
+#define _PAGE_SZ_8K		(0x1 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_16K		(0x2 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_32K		(0x3 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_64K		(0x4 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_128K		(0x5 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_256K		(0x6 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_512K		(0x7 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_1M		(0x8 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_2M		(0x9 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_4M		(0xa << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_MASK		(0xf << _PAGE_SZ_SHIFT)
+
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define _PAGE_SZ		(_PAGE_SZ_4K)
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define _PAGE_SZ		(_PAGE_SZ_8K)
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define _PAGE_SZ		(_PAGE_SZ_16K)
+#endif
+#define _PAGE_TABLE		(_PAGE_SZ | _PAGE_PRESENT)
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_8K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_16K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_32K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_64K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_128K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_256K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define _PAGE_SZHUGE		(_PAGE_SZ_512K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define _PAGE_SZHUGE		(_PAGE_SZ_1M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define _PAGE_SZHUGE		(_PAGE_SZ_2M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define _PAGE_SZHUGE		(_PAGE_SZ_4M)
+#endif
+
+/*
+ * The Linux memory management assumes a three-level page table setup. On
+ * Meta, we use that, but "fold" the mid level into the top-level page
+ * table.
+ */
+
+/* PGDIR_SHIFT determines the size of the area a second-level page table can
+ * map. This is always 4MB.
+ */
+
+#define PGDIR_SHIFT	22
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/*
+ * Entries per page directory level: we use a two-level, so
+ * we don't really have any PMD directory physically. First level tables
+ * always map 2Gb (local or global) at a granularity of 4MB, second-level
+ * tables map 4MB with a granularity between 4MB and 4kB (between 1 and
+ * 1024 entries).
+ */
+#define PTRS_PER_PTE	(PGDIR_SIZE/PAGE_SIZE)
+#define HPTRS_PER_PTE	(PGDIR_SIZE/HPAGE_SIZE)
+#define PTRS_PER_PGD	512
+
+#define USER_PTRS_PER_PGD	256
+#define FIRST_USER_ADDRESS	META_MEMORY_BASE
+#define FIRST_USER_PGD_NR	pgd_index(FIRST_USER_ADDRESS)
+
+#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+				 _PAGE_CACHEABLE)
+
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
+				 _PAGE_ACCESSED | _PAGE_CACHEABLE)
+#define PAGE_SHARED_C	PAGE_SHARED
+#define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+				 _PAGE_CACHEABLE)
+#define PAGE_COPY_C	PAGE_COPY
+
+#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+				 _PAGE_CACHEABLE)
+#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_DIRTY | \
+				 _PAGE_ACCESSED | _PAGE_WRITE | \
+				 _PAGE_CACHEABLE | _PAGE_KERNEL)
+
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_COPY
+#define __P011	PAGE_COPY
+#define __P100	PAGE_READONLY
+#define __P101	PAGE_READONLY
+#define __P110	PAGE_COPY_C
+#define __P111	PAGE_COPY_C
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY
+#define __S101	PAGE_READONLY
+#define __S110	PAGE_SHARED_C
+#define __S111	PAGE_SHARED_C
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+
+/* zero page used for uninitialized stuff */
+extern unsigned long empty_zero_page;
+#define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
+
+/* Certain architectures need to do special things when pte's
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
+
+#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+
+#define pfn_pte(pfn, prot)	__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+#define pte_none(x)		(!pte_val(x))
+#define pte_present(x)		(pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm, addr, xp)	do { pte_val(*(xp)) = 0; } while (0)
+
+#define pmd_none(x)		(!pmd_val(x))
+#define pmd_bad(x)		((pmd_val(x) & ~(PAGE_MASK | _PAGE_SZ_MASK)) \
+					!= (_PAGE_TABLE & ~_PAGE_SZ_MASK))
+#define pmd_present(x)		(pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp)		do { pmd_val(*(xp)) = 0; } while (0)
+
+#define pte_page(x)		pfn_to_page(pte_pfn(x))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+
+static inline int pte_write(pte_t pte)   { return pte_val(pte) & _PAGE_WRITE; }
+static inline int pte_dirty(pte_t pte)   { return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)   { return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_file(pte_t pte)    { return pte_val(pte) & _PAGE_FILE; }
+static inline int pte_special(pte_t pte) { return 0; }
+
+static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= (~_PAGE_WRITE); return pte; }
+static inline pte_t pte_mkclean(pte_t pte)   { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte)     { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte)   { pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)   { pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte)   { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
+static inline pte_t pte_mkhuge(pte_t pte)    { return pte; }
+
+/*
+ * Macro and implementation to make a page protection as uncacheable.
+ */
+#define pgprot_writecombine(prot)					\
+	__pgprot(pgprot_val(prot) & ~(_PAGE_CACHE_CTRL1 | _PAGE_CACHE_CTRL0))
+
+#define pgprot_noncached(prot)						\
+	__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE)
+
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	return pte;
+}
+
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+	unsigned long paddr = pmd_val(pmd) & PAGE_MASK;
+	if (!paddr)
+		return 0;
+	return (unsigned long)__va(paddr);
+}
+
+#define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+#define pmd_page_shift(pmd)	(12 + ((pmd_val(pmd) & _PAGE_SZ_MASK) \
+					>> _PAGE_SZ_SHIFT))
+#define pmd_num_ptrs(pmd)	(PGDIR_SIZE >> pmd_page_shift(pmd))
+
+/*
+ * Each pgd is only 2k, mapping 2Gb (local or global). If we're in global
+ * space drop the top bit before indexing the pgd.
+ */
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define pgd_index(address)	((((address) & ~0x80000000) >> PGDIR_SHIFT) \
+							& (PTRS_PER_PGD-1))
+#else
+#define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#endif
+
+#define pgd_offset(mm, address)	((mm)->pgd + pgd_index(address))
+
+#define pgd_offset_k(address)	pgd_offset(&init_mm, address)
+
+#define pmd_index(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/* Find an entry in the second-level page table.. */
+#if !defined(CONFIG_HUGETLB_PAGE)
+  /* all pages are of size (1 << PAGE_SHIFT), so no need to read 1st level pt */
+# define pte_index(pmd, address) \
+	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#else
+  /* some pages are huge, so read 1st level pt to find out */
+# define pte_index(pmd, address) \
+	(((address) >> pmd_page_shift(pmd)) & (pmd_num_ptrs(pmd) - 1))
+#endif
+#define pte_offset_kernel(dir, address) \
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(*(dir), address))
+#define pte_offset_map(dir, address)		pte_offset_kernel(dir, address)
+#define pte_offset_map_nested(dir, address)	pte_offset_kernel(dir, address)
+
+#define pte_unmap(pte)		do { } while (0)
+#define pte_unmap_nested(pte)	do { } while (0)
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Meta doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+				    unsigned long address, pte_t *pte)
+{
+}
+
+/*
+ * Encode and decode a swap entry (must be !pte_none(e) && !pte_present(e))
+ * Since PAGE_PRESENT is bit 1, we can use the bits above that.
+ */
+#define __swp_type(x)			(((x).val >> 1) & 0xff)
+#define __swp_offset(x)			((x).val >> 10)
+#define __swp_entry(type, offset)	((swp_entry_t) { ((type) << 1) | \
+					 ((offset) << 10) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+#define PTE_FILE_MAX_BITS	22
+#define pte_to_pgoff(x)		(pte_val(x) >> 10)
+#define pgoff_to_pte(x)		__pte(((x) << 10) | _PAGE_FILE)
+
+#define kern_addr_valid(addr)	(1)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
+	remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+void paging_init(unsigned long mem_end);
+
+#ifdef CONFIG_METAG_META12
+/* This is a workaround for an issue in Meta 1 cores. These cores cache
+ * invalid entries in the TLB so we always need to flush whenever we add
+ * a new pte. Unfortunately we can only flush the whole TLB not shoot down
+ * single entries so this is sub-optimal. This implementation ensures that
+ * we will get a flush at the second attempt, so we may still get repeated
+ * faults, we just don't overflow the kernel stack handling them.
+ */
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+({									  \
+	int __changed = !pte_same(*(__ptep), __entry);			  \
+	if (__changed) {						  \
+		set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
+	}								  \
+	flush_tlb_page(__vma, __address);				  \
+	__changed;							  \
+})
+#endif
+
+#include <asm-generic/pgtable.h>
+
+#endif /* __ASSEMBLY__ */
+#endif /* _METAG_PGTABLE_H */
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
new file mode 100644
index 0000000..9b029a7
--- /dev/null
+++ b/arch/metag/include/asm/processor.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008 Imagination Technologies
+ */
+
+#ifndef __ASM_METAG_PROCESSOR_H
+#define __ASM_METAG_PROCESSOR_H
+
+#include <linux/atomic.h>
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/metag_regs.h>
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ __label__ _l; _l: &&_l; })
+
+/* The task stops where the kernel starts */
+#define TASK_SIZE	PAGE_OFFSET
+/* Add an extra page of padding at the top of the stack for the guard page. */
+#define STACK_TOP	(TASK_SIZE - PAGE_SIZE)
+#define STACK_TOP_MAX	STACK_TOP
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	META_MEMORY_BASE
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+#ifdef CONFIG_METAG_FPU
+struct meta_fpu_context {
+	TBICTXEXTFPU fpstate;
+	union {
+		struct {
+			TBICTXEXTBB4 fx8_15;
+			TBICTXEXTFPACC fpacc;
+		} fx8_15;
+		struct {
+			TBICTXEXTFPACC fpacc;
+			TBICTXEXTBB4 unused;
+		} nofx8_15;
+	} extfpstate;
+	bool needs_restore;
+};
+#else
+struct meta_fpu_context {};
+#endif
+
+#ifdef CONFIG_METAG_DSP
+struct meta_ext_context {
+	struct {
+		TBIEXTCTX ctx;
+		TBICTXEXTBB8 bb8;
+		TBIDUAL ax[TBICTXEXTAXX_BYTES / sizeof(TBIDUAL)];
+		TBICTXEXTHL2 hl2;
+		TBICTXEXTTDPR ext;
+		TBICTXEXTRP6 rp;
+	} regs;
+
+	/* DSPRAM A and B save areas. */
+	void *ram[2];
+
+	/* ECH encoded size of DSPRAM save areas. */
+	unsigned int ram_sz[2];
+};
+#else
+struct meta_ext_context {};
+#endif
+
+struct thread_struct {
+	PTBICTX kernel_context;
+	/* A copy of the user process Sig.SaveMask. */
+	unsigned int user_flags;
+	struct meta_fpu_context *fpu_context;
+	void __user *tls_ptr;
+	unsigned short int_depth;
+	unsigned short txdefr_failure;
+	struct meta_ext_context *dsp_context;
+};
+
+#define INIT_THREAD  { \
+	NULL,			/* kernel_context */	\
+	0,			/* user_flags */	\
+	NULL,			/* fpu_context */	\
+	NULL,			/* tls_ptr */		\
+	1,			/* int_depth - we start in kernel */	\
+	0,			/* txdefr_failure */	\
+	NULL,			/* dsp_context */	\
+}
+
+/* Needed to make #define as we are referencing 'current', that is not visible
+ * yet.
+ *
+ * Stack layout is as below.
+
+      argc            argument counter (integer)
+      argv[0]         program name (pointer)
+      argv[1...N]     program args (pointers)
+      argv[argc-1]    end of args (integer)
+      NULL
+      env[0...N]      environment variables (pointers)
+      NULL
+
+ */
+#define start_thread(regs, pc, usp) do {				   \
+	unsigned int *argc = (unsigned int *) bprm->exec;		   \
+	set_fs(USER_DS);						   \
+	current->thread.int_depth = 1;					   \
+	/* Force this process down to user land */			   \
+	regs->ctx.SaveMask = TBICTX_PRIV_BIT;				   \
+	regs->ctx.CurrPC = pc;						   \
+	regs->ctx.AX[0].U0 = usp;					   \
+	regs->ctx.DX[3].U1 = *((int *)argc);			/* argc */ \
+	regs->ctx.DX[3].U0 = (int)((int *)argc + 1);		/* argv */ \
+	regs->ctx.DX[2].U1 = (int)((int *)argc +			   \
+				   regs->ctx.DX[3].U1 + 2);	/* envp */ \
+	regs->ctx.DX[2].U0 = 0;				   /* rtld_fini */ \
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+
+/* Free all resources held by a thread. */
+static inline void release_thread(struct task_struct *dead_task)
+{
+}
+
+#define copy_segments(tsk, mm)		do { } while (0)
+#define release_segments(mm)		do { } while (0)
+
+extern void exit_thread(void);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+#define	thread_saved_pc(tsk)	\
+	((unsigned long)(tsk)->thread.kernel_context->CurrPC)
+#define thread_saved_sp(tsk)	\
+	((unsigned long)(tsk)->thread.kernel_context->AX[0].U0)
+#define thread_saved_fp(tsk)	\
+	((unsigned long)(tsk)->thread.kernel_context->AX[1].U0)
+
+unsigned long get_wchan(struct task_struct *p);
+
+#define	KSTK_EIP(tsk)	((tsk)->thread.kernel_context->CurrPC)
+#define	KSTK_ESP(tsk)	((tsk)->thread.kernel_context->AX[0].U0)
+
+#define user_stack_pointer(regs)        ((regs)->ctx.AX[0].U0)
+
+#define cpu_relax()     barrier()
+
+extern void setup_priv(void);
+
+static inline unsigned int hard_processor_id(void)
+{
+	unsigned int id;
+
+	asm volatile ("MOV	%0, TXENABLE\n"
+		      "AND	%0, %0, %1\n"
+		      "LSR	%0, %0, %2\n"
+		      : "=&d" (id)
+		      : "I" (TXENABLE_THREAD_BITS),
+			"K" (TXENABLE_THREAD_S)
+		      );
+
+	return id;
+}
+
+#define OP3_EXIT	0
+
+#define HALT_OK		0
+#define HALT_PANIC	-1
+
+/*
+ * Halt (stop) the hardware thread. This instruction sequence is the
+ * standard way to cause a Meta hardware thread to exit. The exit code
+ * is pushed onto the stack which is interpreted by the debug adapter.
+ */
+static inline void hard_processor_halt(int exit_code)
+{
+	asm volatile ("MOV	D1Ar1, %0\n"
+		      "MOV	D0Ar6, %1\n"
+		      "MSETL	[A0StP],D0Ar6,D0Ar4,D0Ar2\n"
+		      "1:\n"
+		      "SWITCH	#0xC30006\n"
+		      "B		1b\n"
+		      : : "r" (exit_code), "K" (OP3_EXIT));
+}
+
+/* Set these hooks to call SoC specific code to restart/halt/power off. */
+extern void (*soc_restart)(char *cmd);
+extern void (*soc_halt)(void);
+
+extern void show_trace(struct task_struct *tsk, unsigned long *sp,
+		       struct pt_regs *regs);
+
+#endif
diff --git a/arch/metag/include/asm/prom.h b/arch/metag/include/asm/prom.h
new file mode 100644
index 0000000..d2aa35d
--- /dev/null
+++ b/arch/metag/include/asm/prom.h
@@ -0,0 +1,23 @@
+/*
+ *  arch/metag/include/asm/prom.h
+ *
+ *  Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ *  Based on ARM version:
+ *  Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#ifndef __ASM_METAG_PROM_H
+#define __ASM_METAG_PROM_H
+
+#include <asm/setup.h>
+#define HAVE_ARCH_DEVTREE_FIXUPS
+
+extern struct machine_desc *setup_machine_fdt(void *dt);
+extern void copy_fdt(void);
+
+#endif /* __ASM_METAG_PROM_H */
diff --git a/arch/metag/include/asm/ptrace.h b/arch/metag/include/asm/ptrace.h
new file mode 100644
index 0000000..fcabc18
--- /dev/null
+++ b/arch/metag/include/asm/ptrace.h
@@ -0,0 +1,60 @@
+#ifndef _METAG_PTRACE_H
+#define _METAG_PTRACE_H
+
+#include <linux/compiler.h>
+#include <uapi/asm/ptrace.h>
+#include <asm/tbx.h>
+
+#ifndef __ASSEMBLY__
+
+/* this struct defines the way the registers are stored on the
+   stack during a system call. */
+
+struct pt_regs {
+	TBICTX ctx;
+	TBICTXEXTCB0 extcb0[5];
+};
+
+#define user_mode(regs) (((regs)->ctx.SaveMask & TBICTX_PRIV_BIT) > 0)
+
+#define instruction_pointer(regs) ((unsigned long)(regs)->ctx.CurrPC)
+#define profile_pc(regs) instruction_pointer(regs)
+
+#define task_pt_regs(task) \
+	((struct pt_regs *)(task_stack_page(task) + \
+			    sizeof(struct thread_info)))
+
+#define current_pt_regs() \
+	((struct pt_regs *)((char *)current_thread_info() + \
+			    sizeof(struct thread_info)))
+
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_leave(struct pt_regs *regs);
+
+/* copy a struct user_gp_regs out to user */
+int metag_gp_regs_copyout(const struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf);
+/* copy a struct user_gp_regs in from user */
+int metag_gp_regs_copyin(struct pt_regs *regs,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf);
+/* copy a struct user_cb_regs out to user */
+int metag_cb_regs_copyout(const struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf);
+/* copy a struct user_cb_regs in from user */
+int metag_cb_regs_copyin(struct pt_regs *regs,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf);
+/* copy a struct user_rp_state out to user */
+int metag_rp_state_copyout(const struct pt_regs *regs,
+			   unsigned int pos, unsigned int count,
+			   void *kbuf, void __user *ubuf);
+/* copy a struct user_rp_state in from user */
+int metag_rp_state_copyin(struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf);
+
+#endif /* __ASSEMBLY__ */
+#endif /* _METAG_PTRACE_H */
diff --git a/arch/metag/include/asm/setup.h b/arch/metag/include/asm/setup.h
new file mode 100644
index 0000000..e13083b
--- /dev/null
+++ b/arch/metag/include/asm/setup.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_METAG_SETUP_H
+#define _ASM_METAG_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+void per_cpu_trap_init(unsigned long);
+extern void __init dump_machine_table(void);
+#endif /* _ASM_METAG_SETUP_H */
diff --git a/arch/metag/include/asm/smp.h b/arch/metag/include/asm/smp.h
new file mode 100644
index 0000000..e0373f8
--- /dev/null
+++ b/arch/metag/include/asm/smp.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <linux/cpumask.h>
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+enum ipi_msg_type {
+	IPI_CALL_FUNC,
+	IPI_CALL_FUNC_SINGLE,
+	IPI_RESCHEDULE,
+};
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
+
+asmlinkage void secondary_start_kernel(void);
+
+extern void secondary_startup(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void __cpu_die(unsigned int cpu);
+extern int __cpu_disable(void);
+extern void cpu_die(void);
+#endif
+
+extern void smp_init_cpus(void);
+#endif /* __ASM_SMP_H */
diff --git a/arch/metag/include/asm/sparsemem.h b/arch/metag/include/asm/sparsemem.h
new file mode 100644
index 0000000..03fe255
--- /dev/null
+++ b/arch/metag/include/asm/sparsemem.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_METAG_SPARSEMEM_H
+#define __ASM_METAG_SPARSEMEM_H
+
+/*
+ * SECTION_SIZE_BITS		2^N: how big each section will be
+ * MAX_PHYSADDR_BITS		2^N: how much physical address space we have
+ * MAX_PHYSMEM_BITS		2^N: how much memory we can have in that space
+ */
+#define SECTION_SIZE_BITS	26
+#define MAX_PHYSADDR_BITS	32
+#define MAX_PHYSMEM_BITS	32
+
+#endif /* __ASM_METAG_SPARSEMEM_H */
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
new file mode 100644
index 0000000..86a7cf3
--- /dev/null
+++ b/arch/metag/include/asm/spinlock.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#ifdef CONFIG_METAG_ATOMICITY_LOCK1
+#include <asm/spinlock_lock1.h>
+#else
+#include <asm/spinlock_lnkget.h>
+#endif
+
+#define arch_spin_unlock_wait(lock) \
+	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+#define	arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define	arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/metag/include/asm/spinlock_lnkget.h b/arch/metag/include/asm/spinlock_lnkget.h
new file mode 100644
index 0000000..ad8436f
--- /dev/null
+++ b/arch/metag/include/asm/spinlock_lnkget.h
@@ -0,0 +1,249 @@
+#ifndef __ASM_SPINLOCK_LNKGET_H
+#define __ASM_SPINLOCK_LNKGET_H
+
+/*
+ * None of these asm statements clobber memory as LNKSET writes around
+ * the cache so the memory it modifies cannot safely be read by any means
+ * other than these accessors.
+ */
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	int ret;
+
+	asm volatile ("LNKGETD	%0, [%1]\n"
+		      "TST	%0, #1\n"
+		      "MOV	%0, #1\n"
+		      "XORZ      %0, %0, %0\n"
+		      : "=&d" (ret)
+		      : "da" (&lock->lock)
+		      : "cc");
+	return ret;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	int tmp;
+
+	asm volatile ("1:     LNKGETD %0,[%1]\n"
+		      "       TST     %0, #1\n"
+		      "       ADD     %0, %0, #1\n"
+		      "       LNKSETDZ [%1], %0\n"
+		      "       BNZ     1b\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       BNZ     1b\n"
+		      : "=&d" (tmp)
+		      : "da" (&lock->lock)
+		      : "cc");
+
+	smp_mb();
+}
+
+/* Returns 0 if failed to acquire lock */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	int tmp;
+
+	asm volatile ("       LNKGETD %0,[%1]\n"
+		      "       TST     %0, #1\n"
+		      "       ADD     %0, %0, #1\n"
+		      "       LNKSETDZ [%1], %0\n"
+		      "       BNZ     1f\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       MOV     %0, #1\n"
+		      "1:     XORNZ   %0, %0, %0\n"
+		      : "=&d" (tmp)
+		      : "da" (&lock->lock)
+		      : "cc");
+
+	smp_mb();
+
+	return tmp;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	smp_mb();
+
+	asm volatile ("       SETD    [%0], %1\n"
+		      :
+		      : "da" (&lock->lock), "da" (0)
+		      : "memory");
+}
+
+/*
+ * RWLOCKS
+ *
+ *
+ * Write locks are easy - we just set bit 31.  When unlocking, we can
+ * just write zero since the lock is exclusively held.
+ */
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("1:     LNKGETD %0,[%1]\n"
+		      "       CMP     %0, #0\n"
+		      "       ADD     %0, %0, %2\n"
+		      "       LNKSETDZ [%1], %0\n"
+		      "       BNZ     1b\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       BNZ     1b\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock), "bd" (0x80000000)
+		      : "cc");
+
+	smp_mb();
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("       LNKGETD %0,[%1]\n"
+		      "       CMP     %0, #0\n"
+		      "       ADD     %0, %0, %2\n"
+		      "       LNKSETDZ [%1], %0\n"
+		      "       BNZ     1f\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       MOV     %0,#1\n"
+		      "1:     XORNZ   %0, %0, %0\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock), "bd" (0x80000000)
+		      : "cc");
+
+	smp_mb();
+
+	return tmp;
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	smp_mb();
+
+	asm volatile ("       SETD    [%0], %1\n"
+		      :
+		      : "da" (&rw->lock), "da" (0)
+		      : "memory");
+}
+
+/* write_can_lock - would write_trylock() succeed? */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+	int ret;
+
+	asm volatile ("LNKGETD	%0, [%1]\n"
+		      "CMP	%0, #0\n"
+		      "MOV	%0, #1\n"
+		      "XORNZ     %0, %0, %0\n"
+		      : "=&d" (ret)
+		      : "da" (&rw->lock)
+		      : "cc");
+	return ret;
+}
+
+/*
+ * Read locks are a bit more hairy:
+ *  - Exclusively load the lock value.
+ *  - Increment it.
+ *  - Store new lock value if positive, and we still own this location.
+ *    If the value is negative, we've already failed.
+ *  - If we failed to store the value, we want a negative result.
+ *  - If we failed, try again.
+ * Unlocking is similarly hairy.  We may have multiple read locks
+ * currently active.  However, we know we won't have any write
+ * locks.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("1:     LNKGETD %0,[%1]\n"
+		      "       ADDS    %0, %0, #1\n"
+		      "       LNKSETDPL [%1], %0\n"
+		      "       BMI     1b\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       BNZ     1b\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock)
+		      : "cc");
+
+	smp_mb();
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	smp_mb();
+
+	asm volatile ("1:     LNKGETD %0,[%1]\n"
+		      "       SUB     %0, %0, #1\n"
+		      "       LNKSETD [%1], %0\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       BNZ     1b\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock)
+		      : "cc", "memory");
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("       LNKGETD %0,[%1]\n"
+		      "       ADDS    %0, %0, #1\n"
+		      "       LNKSETDPL [%1], %0\n"
+		      "       BMI     1f\n"
+		      "       DEFR    %0, TXSTAT\n"
+		      "       ANDT    %0, %0, #HI(0x3f000000)\n"
+		      "       CMPT    %0, #HI(0x02000000)\n"
+		      "       MOV     %0,#1\n"
+		      "       BZ      2f\n"
+		      "1:     MOV     %0,#0\n"
+		      "2:\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock)
+		      : "cc");
+
+	smp_mb();
+
+	return tmp;
+}
+
+/* read_can_lock - would read_trylock() succeed? */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+	int tmp;
+
+	asm volatile ("LNKGETD	%0, [%1]\n"
+		      "CMP	%0, %2\n"
+		      "MOV	%0, #1\n"
+		      "XORZ	%0, %0, %0\n"
+		      : "=&d" (tmp)
+		      : "da" (&rw->lock), "bd" (0x80000000)
+		      : "cc");
+	return tmp;
+}
+
+#define	arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define	arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SPINLOCK_LNKGET_H */
diff --git a/arch/metag/include/asm/spinlock_lock1.h b/arch/metag/include/asm/spinlock_lock1.h
new file mode 100644
index 0000000..c630444
--- /dev/null
+++ b/arch/metag/include/asm/spinlock_lock1.h
@@ -0,0 +1,184 @@
+#ifndef __ASM_SPINLOCK_LOCK1_H
+#define __ASM_SPINLOCK_LOCK1_H
+
+#include <asm/bug.h>
+#include <asm/global_lock.h>
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	int ret;
+
+	barrier();
+	ret = lock->lock;
+	WARN_ON(ret != 0 && ret != 1);
+	return ret;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int we_won = 0;
+	unsigned long flags;
+
+again:
+	__global_lock1(flags);
+	if (lock->lock == 0) {
+		fence();
+		lock->lock = 1;
+		we_won = 1;
+	}
+	__global_unlock1(flags);
+	if (we_won == 0)
+		goto again;
+	WARN_ON(lock->lock != 1);
+}
+
+/* Returns 0 if failed to acquire lock */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned long flags;
+	unsigned int ret;
+
+	__global_lock1(flags);
+	ret = lock->lock;
+	if (ret == 0) {
+		fence();
+		lock->lock = 1;
+	}
+	__global_unlock1(flags);
+	return (ret == 0);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	barrier();
+	WARN_ON(!lock->lock);
+	lock->lock = 0;
+}
+
+/*
+ * RWLOCKS
+ *
+ *
+ * Write locks are easy - we just set bit 31.  When unlocking, we can
+ * just write zero since the lock is exclusively held.
+ */
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int we_won = 0;
+
+again:
+	__global_lock1(flags);
+	if (rw->lock == 0) {
+		fence();
+		rw->lock = 0x80000000;
+		we_won = 1;
+	}
+	__global_unlock1(flags);
+	if (we_won == 0)
+		goto again;
+	WARN_ON(rw->lock != 0x80000000);
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int ret;
+
+	__global_lock1(flags);
+	ret = rw->lock;
+	if (ret == 0) {
+		fence();
+		rw->lock = 0x80000000;
+	}
+	__global_unlock1(flags);
+
+	return (ret == 0);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	barrier();
+	WARN_ON(rw->lock != 0x80000000);
+	rw->lock = 0;
+}
+
+/* write_can_lock - would write_trylock() succeed? */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+	unsigned int ret;
+
+	barrier();
+	ret = rw->lock;
+	return (ret == 0);
+}
+
+/*
+ * Read locks are a bit more hairy:
+ *  - Exclusively load the lock value.
+ *  - Increment it.
+ *  - Store new lock value if positive, and we still own this location.
+ *    If the value is negative, we've already failed.
+ *  - If we failed to store the value, we want a negative result.
+ *  - If we failed, try again.
+ * Unlocking is similarly hairy.  We may have multiple read locks
+ * currently active.  However, we know we won't have any write
+ * locks.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int we_won = 0, ret;
+
+again:
+	__global_lock1(flags);
+	ret = rw->lock;
+	if (ret < 0x80000000) {
+		fence();
+		rw->lock = ret + 1;
+		we_won = 1;
+	}
+	__global_unlock1(flags);
+	if (!we_won)
+		goto again;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int ret;
+
+	__global_lock1(flags);
+	fence();
+	ret = rw->lock--;
+	__global_unlock1(flags);
+	WARN_ON(ret == 0);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+	unsigned int ret;
+
+	__global_lock1(flags);
+	ret = rw->lock;
+	if (ret < 0x80000000) {
+		fence();
+		rw->lock = ret + 1;
+	}
+	__global_unlock1(flags);
+	return (ret < 0x80000000);
+}
+
+/* read_can_lock - would read_trylock() succeed? */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+	unsigned int ret;
+
+	barrier();
+	ret = rw->lock;
+	return (ret < 0x80000000);
+}
+
+#endif /* __ASM_SPINLOCK_LOCK1_H */
diff --git a/arch/metag/include/asm/spinlock_types.h b/arch/metag/include/asm/spinlock_types.h
new file mode 100644
index 0000000..b763914
--- /dev/null
+++ b/arch/metag/include/asm/spinlock_types.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_METAG_SPINLOCK_TYPES_H
+#define _ASM_METAG_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif /* _ASM_METAG_SPINLOCK_TYPES_H */
diff --git a/arch/metag/include/asm/stacktrace.h b/arch/metag/include/asm/stacktrace.h
new file mode 100644
index 0000000..2830a0f
--- /dev/null
+++ b/arch/metag/include/asm/stacktrace.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+struct stackframe {
+	unsigned long fp;
+	unsigned long sp;
+	unsigned long lr;
+	unsigned long pc;
+};
+
+struct metag_frame {
+	unsigned long fp;
+	unsigned long lr;
+};
+
+extern int unwind_frame(struct stackframe *frame);
+extern void walk_stackframe(struct stackframe *frame,
+			    int (*fn)(struct stackframe *, void *), void *data);
+
+#endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/metag/include/asm/string.h b/arch/metag/include/asm/string.h
new file mode 100644
index 0000000..53e3806
--- /dev/null
+++ b/arch/metag/include/asm/string.h
@@ -0,0 +1,13 @@
+#ifndef _METAG_STRING_H_
+#define _METAG_STRING_H_
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *__s, int __c, size_t __count);
+
+#define __HAVE_ARCH_MEMCPY
+void *memcpy(void *__to, __const__ void *__from, size_t __n);
+
+#define __HAVE_ARCH_MEMMOVE
+extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#endif /* _METAG_STRING_H_ */
diff --git a/arch/metag/include/asm/switch.h b/arch/metag/include/asm/switch.h
new file mode 100644
index 0000000..1fd6a58
--- /dev/null
+++ b/arch/metag/include/asm/switch.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _ASM_METAG_SWITCH_H
+#define _ASM_METAG_SWITCH_H
+
+/* metag SWITCH codes */
+#define __METAG_SW_PERM_BREAK	0x400002	/* compiled in breakpoint */
+#define __METAG_SW_SYS_LEGACY	0x440000	/* legacy system calls */
+#define __METAG_SW_SYS		0x440001	/* system calls */
+
+/* metag SWITCH instruction encoding */
+#define __METAG_SW_ENCODING(TYPE)	(0xaf000000 | (__METAG_SW_##TYPE))
+
+#endif /* _ASM_METAG_SWITCH_H */
diff --git a/arch/metag/include/asm/syscall.h b/arch/metag/include/asm/syscall.h
new file mode 100644
index 0000000..24fc979
--- /dev/null
+++ b/arch/metag/include/asm/syscall.h
@@ -0,0 +1,104 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_METAG_SYSCALL_H
+#define _ASM_METAG_SYSCALL_H
+
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+
+#include <asm/switch.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	unsigned long insn;
+
+	/*
+	 * FIXME there's no way to find out how we got here other than to
+	 * examine the memory at the PC to see if it is a syscall
+	 * SWITCH instruction.
+	 */
+	if (get_user(insn, (unsigned long *)(regs->ctx.CurrPC - 4)))
+		return -1;
+
+	if (insn == __METAG_SW_ENCODING(SYS))
+		return regs->ctx.DX[0].U1;
+	else
+		return -1L;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* do nothing */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	unsigned long error = regs->ctx.DX[0].U0;
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->ctx.DX[0].U0;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->ctx.DX[0].U0 = (long) error ?: val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	unsigned int reg, j;
+	BUG_ON(i + n > 6);
+
+	for (j = i, reg = 6 - i; j < (i + n); j++, reg--) {
+		if (reg % 2)
+			args[j] = regs->ctx.DX[(reg + 1) / 2].U0;
+		else
+			args[j] = regs->ctx.DX[reg / 2].U1;
+	}
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	unsigned int reg;
+	BUG_ON(i + n > 6);
+
+	for (reg = 6 - i; i < (i + n); i++, reg--) {
+		if (reg % 2)
+			regs->ctx.DX[(reg + 1) / 2].U0 = args[i];
+		else
+			regs->ctx.DX[reg / 2].U1 = args[i];
+	}
+}
+
+#define NR_syscalls __NR_syscalls
+
+/* generic syscall table */
+extern const void *sys_call_table[];
+
+#endif	/* _ASM_METAG_SYSCALL_H */
diff --git a/arch/metag/include/asm/syscalls.h b/arch/metag/include/asm/syscalls.h
new file mode 100644
index 0000000..a02b955
--- /dev/null
+++ b/arch/metag/include/asm/syscalls.h
@@ -0,0 +1,39 @@
+#ifndef _ASM_METAG_SYSCALLS_H
+#define _ASM_METAG_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+
+/* kernel/signal.c */
+#define sys_rt_sigreturn sys_rt_sigreturn
+asmlinkage long sys_rt_sigreturn(void);
+
+#include <asm-generic/syscalls.h>
+
+/* kernel/sys_metag.c */
+asmlinkage int sys_metag_setglobalbit(char __user *, int);
+asmlinkage void sys_metag_set_fpu_flags(unsigned int);
+asmlinkage int sys_metag_set_tls(void __user *);
+asmlinkage void *sys_metag_get_tls(void);
+
+asmlinkage long sys_truncate64_metag(const char __user *, unsigned long,
+				     unsigned long);
+asmlinkage long sys_ftruncate64_metag(unsigned int, unsigned long,
+				      unsigned long);
+asmlinkage long sys_fadvise64_64_metag(int, unsigned long, unsigned long,
+				       unsigned long, unsigned long, int);
+asmlinkage long sys_readahead_metag(int, unsigned long, unsigned long, size_t);
+asmlinkage ssize_t sys_pread64_metag(unsigned long, char __user *, size_t,
+				     unsigned long, unsigned long);
+asmlinkage ssize_t sys_pwrite64_metag(unsigned long, char __user *, size_t,
+				      unsigned long, unsigned long);
+asmlinkage long sys_sync_file_range_metag(int, unsigned long, unsigned long,
+					  unsigned long, unsigned long,
+					  unsigned int);
+
+int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+		    int syscall);
+
+#endif /* _ASM_METAG_SYSCALLS_H */
diff --git a/arch/metag/include/asm/tbx.h b/arch/metag/include/asm/tbx.h
new file mode 100644
index 0000000..287b36f
--- /dev/null
+++ b/arch/metag/include/asm/tbx.h
@@ -0,0 +1,1425 @@
+/*
+ * asm/tbx.h
+ *
+ * Copyright (C) 2000-2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Thread binary interface header
+ */
+
+#ifndef _ASM_METAG_TBX_H_
+#define _ASM_METAG_TBX_H_
+
+/* for CACHEW_* values */
+#include <asm/metag_isa.h>
+/* for LINSYSEVENT_* addresses */
+#include <asm/metag_mem.h>
+
+#ifdef  TBI_1_4
+#ifndef TBI_MUTEXES_1_4
+#define TBI_MUTEXES_1_4
+#endif
+#ifndef TBI_SEMAPHORES_1_4
+#define TBI_SEMAPHORES_1_4
+#endif
+#ifndef TBI_ASYNC_SWITCH_1_4
+#define TBI_ASYNC_SWITCH_1_4
+#endif
+#ifndef TBI_FASTINT_1_4
+#define TBI_FASTINT_1_4
+#endif
+#endif
+
+
+/* Id values in the TBI system describe a segment using an arbitrary
+   integer value and flags in the bottom 8 bits, the SIGPOLL value is
+   used in cases where control over blocking or polling behaviour is
+   needed. */
+#define TBID_SIGPOLL_BIT    0x02 /* Set bit in an Id value to poll vs block */
+/* Extended segment identifiers use strings in the string table */
+#define TBID_IS_SEGSTR( Id ) (((Id) & (TBID_SEGTYPE_BITS>>1)) == 0)
+
+/* Segment identifiers contain the following related bit-fields */
+#define TBID_SEGTYPE_BITS   0x0F /* One of the predefined segment types */
+#define TBID_SEGTYPE_S      0
+#define TBID_SEGSCOPE_BITS  0x30 /* Indicates the scope of the segment */
+#define TBID_SEGSCOPE_S     4
+#define TBID_SEGGADDR_BITS  0xC0 /* Indicates access possible via pGAddr */
+#define TBID_SEGGADDR_S     6
+
+/* Segments of memory can only really contain a few types of data */
+#define TBID_SEGTYPE_TEXT   0x02 /* Code segment */
+#define TBID_SEGTYPE_DATA   0x04 /* Data segment */
+#define TBID_SEGTYPE_STACK  0x06 /* Stack segment */
+#define TBID_SEGTYPE_HEAP   0x0A /* Heap segment */
+#define TBID_SEGTYPE_ROOT   0x0C /* Root block segments */
+#define TBID_SEGTYPE_STRING 0x0E /* String table segment */
+
+/* Segments have one of three possible scopes */
+#define TBID_SEGSCOPE_INIT     0 /* Temporary area for initialisation phase */
+#define TBID_SEGSCOPE_LOCAL    1 /* Private to this thread */
+#define TBID_SEGSCOPE_GLOBAL   2 /* Shared globally throughout the system */
+#define TBID_SEGSCOPE_SHARED   3 /* Limited sharing between local/global */
+
+/* For segment specifier a further field in two of the remaining bits
+   indicates the usefulness of the pGAddr field in the segment descriptor
+   descriptor. */
+#define TBID_SEGGADDR_NULL     0 /* pGAddr is NULL -> SEGSCOPE_(LOCAL|INIT) */
+#define TBID_SEGGADDR_READ     1 /* Only read    via pGAddr */
+#define TBID_SEGGADDR_WRITE    2 /* Full access  via pGAddr */
+#define TBID_SEGGADDR_EXEC     3 /* Only execute via pGAddr */
+
+/* The following values are common to both segment and signal Id value and
+   live in the top 8 bits of the Id values. */
+
+/* The ISTAT bit indicates if segments are related to interrupt vs
+   background level interfaces a thread can still handle all triggers at
+   either level, but can also split these up if it wants to. */
+#define TBID_ISTAT_BIT    0x01000000
+#define TBID_ISTAT_S      24
+
+/* Privilege needed to access a segment is indicated by the next bit.
+   
+   This bit is set to mirror the current privilege level when starting a
+   search for a segment - setting it yourself toggles the automatically
+   generated state which is only useful to emulate unprivileged behaviour
+   or access unprivileged areas of memory while at privileged level. */
+#define TBID_PSTAT_BIT    0x02000000
+#define TBID_PSTAT_S      25
+
+/* The top six bits of a signal/segment specifier identifies a thread within
+   the system. This represents a segments owner. */
+#define TBID_THREAD_BITS  0xFC000000
+#define TBID_THREAD_S     26
+
+/* Special thread id values */
+#define TBID_THREAD_NULL   (-32) /* Never matches any thread/segment id used */
+#define TBID_THREAD_GLOBAL (-31) /* Things global to all threads */
+#define TBID_THREAD_HOST   ( -1) /* Host interface */
+#define TBID_THREAD_EXTIO  (TBID_THREAD_HOST)   /* Host based ExtIO i/f */
+
+/* Virtual Id's are used for external thread interface structures or the
+   above special Id's */
+#define TBID_IS_VIRTTHREAD( Id ) ((Id) < 0)
+
+/* Real Id's are used for actual hardware threads that are local */
+#define TBID_IS_REALTHREAD( Id ) ((Id) >= 0)
+
+/* Generate a segment Id given Thread, Scope, and Type */
+#define TBID_SEG( Thread, Scope, Type )                           (\
+    ((Thread)<<TBID_THREAD_S) + ((Scope)<<TBID_SEGSCOPE_S) + (Type))
+
+/* Generate a signal Id given Thread and SigNum */
+#define TBID_SIG( Thread, SigNum )                                        (\
+    ((Thread)<<TBID_THREAD_S) + ((SigNum)<<TBID_SIGNUM_S) + TBID_SIGNAL_BIT)
+
+/* Generate an Id that solely represents a thread - useful for cache ops */
+#define TBID_THD( Thread ) ((Thread)<<TBID_THREAD_S)
+#define TBID_THD_NULL      ((TBID_THREAD_NULL)  <<TBID_THREAD_S)
+#define TBID_THD_GLOBAL    ((TBID_THREAD_GLOBAL)<<TBID_THREAD_S)
+
+/* Common exception handler (see TBID_SIGNUM_XXF below) receives hardware
+   generated fault codes TBIXXF_SIGNUM_xxF in it's SigNum parameter */
+#define TBIXXF_SIGNUM_IIF   0x01 /* General instruction fault */
+#define TBIXXF_SIGNUM_PGF   0x02 /* Privilege general fault */
+#define TBIXXF_SIGNUM_DHF   0x03 /* Data access watchpoint HIT */
+#define TBIXXF_SIGNUM_IGF   0x05 /* Code fetch general read failure */
+#define TBIXXF_SIGNUM_DGF   0x07 /* Data access general read/write fault */
+#define TBIXXF_SIGNUM_IPF   0x09 /* Code fetch page fault */
+#define TBIXXF_SIGNUM_DPF   0x0B /* Data access page fault */
+#define TBIXXF_SIGNUM_IHF   0x0D /* Instruction breakpoint HIT */
+#define TBIXXF_SIGNUM_DWF   0x0F /* Data access read-only fault */
+
+/* Hardware signals communicate events between processing levels within a
+   single thread all the _xxF cases are exceptions and are routed via a
+   common exception handler, _SWx are software trap events and kicks including
+   __TBISignal generated kicks, and finally _TRx are hardware triggers */
+#define TBID_SIGNUM_SW0     0x00 /* SWITCH GROUP 0 - Per thread user */
+#define TBID_SIGNUM_SW1     0x01 /* SWITCH GROUP 1 - Per thread system */
+#define TBID_SIGNUM_SW2     0x02 /* SWITCH GROUP 2 - Internal global request */
+#define TBID_SIGNUM_SW3     0x03 /* SWITCH GROUP 3 - External global request */
+#ifdef TBI_1_4
+#define TBID_SIGNUM_FPE     0x04 /* Deferred exception - Any IEEE 754 exception */
+#define TBID_SIGNUM_FPD     0x05 /* Deferred exception - Denormal exception */
+/* Reserved 0x6 for a reserved deferred exception */
+#define TBID_SIGNUM_BUS     0x07 /* Deferred exception - Bus Error */
+/* Reserved 0x08-0x09 */
+#else
+/* Reserved 0x04-0x09 */
+#endif
+#define TBID_SIGNUM_SWS     0x0A /* KICK received with SigMask != 0 */
+#define TBID_SIGNUM_SWK     0x0B /* KICK received with SigMask == 0 */
+/* Reserved 0x0C-0x0F */
+#define TBID_SIGNUM_TRT     0x10 /* Timer trigger */
+#define TBID_SIGNUM_LWK     0x11 /* Low level kick (handler provided by TBI) */
+#define TBID_SIGNUM_XXF     0x12 /* Fault handler - receives ALL _xxF sigs */
+#ifdef TBI_1_4
+#define TBID_SIGNUM_DFR     0x13 /* Deferred Exception handler */
+#else
+#define TBID_SIGNUM_FPE     0x13 /* FPE Exception handler */
+#endif
+/* External trigger one group 0x14 to 0x17 - per thread */
+#define TBID_SIGNUM_TR1(Thread) (0x14+(Thread))
+#define TBID_SIGNUM_T10     0x14
+#define TBID_SIGNUM_T11     0x15
+#define TBID_SIGNUM_T12     0x16
+#define TBID_SIGNUM_T13     0x17
+/* External trigger two group 0x18 to 0x1b - per thread */
+#define TBID_SIGNUM_TR2(Thread) (0x18+(Thread))
+#define TBID_SIGNUM_T20     0x18
+#define TBID_SIGNUM_T21     0x19
+#define TBID_SIGNUM_T22     0x1A
+#define TBID_SIGNUM_T23     0x1B
+#define TBID_SIGNUM_TR3     0x1C /* External trigger N-4 (global) */
+#define TBID_SIGNUM_TR4     0x1D /* External trigger N-3 (global) */
+#define TBID_SIGNUM_TR5     0x1E /* External trigger N-2 (global) */
+#define TBID_SIGNUM_TR6     0x1F /* External trigger N-1 (global) */
+#define TBID_SIGNUM_MAX     0x1F
+
+/* Return the trigger register(TXMASK[I]/TXSTAT[I]) bits related to
+   each hardware signal, sometimes this is a many-to-one relationship. */
+#define TBI_TRIG_BIT(SigNum)                                      (\
+    ((SigNum) >= TBID_SIGNUM_TRT) ? 1<<((SigNum)-TBID_SIGNUM_TRT) :\
+    ( ((SigNum) == TBID_SIGNUM_SWS) ||                             \
+      ((SigNum) == TBID_SIGNUM_SWK)    ) ?                         \
+                         TXSTAT_KICK_BIT : TXSTATI_BGNDHALT_BIT    )
+
+/* Return the hardware trigger vector number for entries in the
+   HWVEC0EXT table that will generate the required internal trigger. */
+#define TBI_TRIG_VEC(SigNum)                                      (\
+    ((SigNum) >= TBID_SIGNUM_T10) ? ((SigNum)-TBID_SIGNUM_TRT) : -1)
+
+/* Default trigger masks for each thread at background/interrupt level */
+#define TBI_TRIGS_INIT( Thread )                           (\
+    TXSTAT_KICK_BIT + TBI_TRIG_BIT(TBID_SIGNUM_TR1(Thread)) )
+#define TBI_INTS_INIT( Thread )                            (\
+    TXSTAT_KICK_BIT + TXSTATI_BGNDHALT_BIT                  \
+                    + TBI_TRIG_BIT(TBID_SIGNUM_TR2(Thread)) )
+
+#ifndef __ASSEMBLY__
+/* A spin-lock location is a zero-initialised location in memory */
+typedef volatile int TBISPIN, *PTBISPIN;
+
+/* A kick location is a hardware location you can write to
+ * in order to cause a kick
+ */
+typedef volatile int *PTBIKICK;
+
+#if defined(METAC_1_0) || defined(METAC_1_1)
+/* Macro to perform a kick */
+#define TBI_KICK( pKick ) do { pKick[0] = 1; } while (0)
+#else
+/* #define METAG_LIN_VALUES before including machine.h if required */
+#ifdef LINSYSEVENT_WR_COMBINE_FLUSH
+/* Macro to perform a kick - write combiners must be flushed */
+#define TBI_KICK( pKick )                                                do {\
+    volatile int *pFlush = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH;    \
+    pFlush[0] = 0;                                                           \
+    pKick[0]  = 1;                                                } while (0)
+#endif
+#endif /* if defined(METAC_1_0) || defined(METAC_1_1) */
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* 64-bit dual unit state value */
+typedef struct _tbidual_tag_ {
+    /* 32-bit value from a pair of registers in data or address units */
+    int U0, U1;
+} TBIDUAL, *PTBIDUAL;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBIDUAL */
+#define TBIDUAL_U0      (0)
+#define TBIDUAL_U1      (4)
+
+#define TBIDUAL_BYTES   (8)
+
+#define TBICTX_CRIT_BIT 0x0001  /* ASync state saved in TBICTX */
+#define TBICTX_SOFT_BIT 0x0002  /* Sync state saved in TBICTX (other bits 0) */
+#ifdef TBI_FASTINT_1_4
+#define TBICTX_FINT_BIT 0x0004  /* Using Fast Interrupts */
+#endif
+#define TBICTX_FPAC_BIT 0x0010  /* FPU state in TBICTX, FPU active on entry */
+#define TBICTX_XMCC_BIT 0x0020  /* Bit to identify a MECC task */
+#define TBICTX_CBUF_BIT 0x0040  /* Hardware catch buffer flag from TXSTATUS */
+#define TBICTX_CBRP_BIT 0x0080  /* Read pipeline dirty from TXDIVTIME */
+#define TBICTX_XDX8_BIT 0x0100  /* Saved DX.8 to DX.15 too */
+#define TBICTX_XAXX_BIT 0x0200  /* Save remaining AX registers to AX.7 */
+#define TBICTX_XHL2_BIT 0x0400  /* Saved hardware loop registers too */
+#define TBICTX_XTDP_BIT 0x0800  /* Saved DSP registers too */
+#define TBICTX_XEXT_BIT 0x1000  /* Set if TBICTX.Ext.Ctx contains extended
+                                   state save area, otherwise TBICTX.Ext.AX2
+                                   just holds normal A0.2 and A1.2 states */
+#define TBICTX_WAIT_BIT 0x2000  /* Causes wait for trigger - sticky toggle */
+#define TBICTX_XCBF_BIT 0x4000  /* Catch buffer or RD extracted into TBICTX */
+#define TBICTX_PRIV_BIT 0x8000  /* Set if system uses 'privileged' model */
+
+#ifdef METAC_1_0
+#define TBICTX_XAX3_BIT 0x0200  /* Saved AX.5 to AX.7 for XAXX */
+#define TBICTX_AX_REGS  5       /* Ax.0 to Ax.4 are core GP regs on CHORUS */
+#else
+#define TBICTX_XAX4_BIT 0x0200  /* Saved AX.4 to AX.7 for XAXX */
+#define TBICTX_AX_REGS  4       /* Default is Ax.0 to Ax.3 */
+#endif
+
+#ifdef TBI_1_4
+#define TBICTX_CFGFPU_FX16_BIT  0x00010000               /* Save FX.8 to FX.15 too */
+
+/* The METAC_CORE_ID_CONFIG field indicates omitted DSP resources */
+#define METAC_COREID_CFGXCTX_MASK( Value )                                 (\
+	( (((Value & METAC_COREID_CFGDSP_BITS)>>                                \
+	             METAC_COREID_CFGDSP_S      ) == METAC_COREID_CFGDSP_MIN) ? \
+	         ~(TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+                             \
+	           TBICTX_XAXX_BIT+TBICTX_XDX8_BIT ) : ~0U )                    )
+#endif
+
+/* Extended context state provides a standardised method for registering the
+   arguments required by __TBICtxSave to save the additional register states
+   currently in use by non general purpose code. The state of the __TBIExtCtx
+   variable in the static space of the thread forms an extension of the base
+   context of the thread.
+   
+   If ( __TBIExtCtx.Ctx.SaveMask == 0 ) then pExt is assumed to be NULL and
+   the empty state of  __TBIExtCtx is represented by the fact that
+   TBICTX.SaveMask does not have the bit TBICTX_XEXT_BIT set.
+   
+   If ( __TBIExtCtx.Ctx.SaveMask != 0 ) then pExt should point at a suitably
+   sized extended context save area (usually at the end of the stack space
+   allocated by the current routine). This space should allow for the
+   displaced state of A0.2 and A1.2 to be saved along with the other extended
+   states indicated via __TBIExtCtx.Ctx.SaveMask. */
+#ifndef __ASSEMBLY__
+typedef union _tbiextctx_tag_ {
+    long long Val;
+    TBIDUAL AX2;
+    struct _tbiextctxext_tag {
+#ifdef TBI_1_4
+        short DspramSizes;      /* DSPRAM sizes. Encoding varies between
+                                   TBICtxAlloc and the ECH scheme. */
+#else
+        short Reserved0;
+#endif
+        short SaveMask;         /* Flag bits for state saved */
+        PTBIDUAL pExt;          /* AX[2] state saved first plus Xxxx state */
+    
+    } Ctx;
+    
+} TBIEXTCTX, *PTBIEXTCTX;
+
+/* Automatic registration of extended context save for __TBINestInts */
+extern TBIEXTCTX __TBIExtCtx;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBIEXTCTX */
+#define TBIEXTCTX_AX2           (0)
+#define TBIEXTCTX_Ctx           (0)
+#define TBIEXTCTX_Ctx_SaveMask  (TBIEXTCTX_Ctx + 2)
+#define TBIEXTCTX_Ctx_pExt      (TBIEXTCTX_Ctx + 2 + 2)
+
+/* Extended context data size calculation constants */
+#define TBICTXEXT_BYTES          (8)
+#define TBICTXEXTBB8_BYTES     (8*8)
+#define TBICTXEXTAX3_BYTES     (3*8)
+#define TBICTXEXTAX4_BYTES     (4*8)
+#ifdef METAC_1_0
+#define TBICTXEXTAXX_BYTES     TBICTXEXTAX3_BYTES
+#else
+#define TBICTXEXTAXX_BYTES     TBICTXEXTAX4_BYTES
+#endif
+#define TBICTXEXTHL2_BYTES     (3*8)
+#define TBICTXEXTTDR_BYTES    (27*8)
+#define TBICTXEXTTDP_BYTES TBICTXEXTTDR_BYTES
+
+#ifdef TBI_1_4
+#define TBICTXEXTFX8_BYTES	(4*8)
+#define TBICTXEXTFPAC_BYTES	(1*4 + 2*2 + 4*8)
+#define TBICTXEXTFACF_BYTES	(3*8)
+#endif
+
+/* Maximum flag bits to be set via the TBICTX_EXTSET macro */
+#define TBICTXEXT_MAXBITS  (TBICTX_XEXT_BIT|                \
+                            TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\
+                            TBICTX_XHL2_BIT|TBICTX_XTDP_BIT )
+
+/* Maximum size of the extended context save area for current variant */
+#define TBICTXEXT_MAXBYTES (TBICTXEXT_BYTES+TBICTXEXTBB8_BYTES+\
+                         TBICTXEXTAXX_BYTES+TBICTXEXTHL2_BYTES+\
+                                            TBICTXEXTTDP_BYTES )
+
+#ifdef TBI_FASTINT_1_4
+/* Maximum flag bits to be set via the TBICTX_EXTSET macro */
+#define TBICTX2EXT_MAXBITS (TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\
+                            TBICTX_XHL2_BIT|TBICTX_XTDP_BIT )
+
+/* Maximum size of the extended context save area for current variant */
+#define TBICTX2EXT_MAXBYTES (TBICTXEXTBB8_BYTES+TBICTXEXTAXX_BYTES\
+                             +TBICTXEXTHL2_BYTES+TBICTXEXTTDP_BYTES )
+#endif
+
+/* Specify extended resources being used by current routine, code must be
+   assembler generated to utilise extended resources-
+
+        MOV     D0xxx,A0StP             ; Perform alloca - routine should
+        ADD     A0StP,A0StP,#SaveSize   ; setup/use A0FrP to access locals
+        MOVT    D1xxx,#SaveMask         ; TBICTX_XEXT_BIT MUST be set
+        SETL    [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx
+        
+    NB: OG(___TBIExtCtx) is a special case supported for SETL/GETL operations
+        on 64-bit sizes structures only, other accesses must be based on use
+        of OGA(___TBIExtCtx). 
+
+   At exit of routine-
+   
+        MOV     D0xxx,#0                ; Clear extended context save state
+        MOV     D1xxx,#0
+        SETL    [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx
+        SUB     A0StP,A0StP,#SaveSize   ; If original A0StP required
+        
+    NB: Both the setting and clearing of the whole __TBIExtCtx MUST be done
+        atomically in one 64-bit write operation.
+
+   For simple interrupt handling only via __TBINestInts there should be no
+   impact of the __TBIExtCtx system. If pre-emptive scheduling is being
+   performed however (assuming __TBINestInts has already been called earlier
+   on) then the following logic will correctly call __TBICtxSave if required
+   and clear out the currently selected background task-
+   
+        if ( __TBIExtCtx.Ctx.SaveMask & TBICTX_XEXT_BIT )
+        {
+            / * Store extended states in pCtx * /
+            State.Sig.SaveMask |= __TBIExtCtx.Ctx.SaveMask;
+        
+            (void) __TBICtxSave( State, (void *) __TBIExtCtx.Ctx.pExt );
+            __TBIExtCtx.Val   = 0;
+        }
+        
+    and when restoring task states call __TBICtxRestore-
+    
+        / * Restore state from pCtx * /
+        State.Sig.pCtx     = pCtx;
+        State.Sig.SaveMask = pCtx->SaveMask;
+
+        if ( State.Sig.SaveMask & TBICTX_XEXT_BIT )
+        {
+            / * Restore extended states from pCtx * /
+            __TBIExtCtx.Val = pCtx->Ext.Val;
+            
+            (void) __TBICtxRestore( State, (void *) __TBIExtCtx.Ctx.pExt );
+        }   
+   
+ */
+
+/* Critical thread state save area */
+#ifndef __ASSEMBLY__
+typedef struct _tbictx_tag_ {
+    /* TXSTATUS_FLAG_BITS and TXSTATUS_LSM_STEP_BITS from TXSTATUS */
+    short Flags;
+    /* Mask indicates any extended context state saved; 0 -> Never run */
+    short SaveMask;
+    /* Saved PC value */
+    int CurrPC;
+    /* Saved critical register states */
+    TBIDUAL DX[8];
+    /* Background control register states - for cores without catch buffer
+       base in DIVTIME the TXSTATUS bits RPVALID and RPMASK are stored with
+       the real state TXDIVTIME in CurrDIVTIME */
+    int CurrRPT, CurrBPOBITS, CurrMODE, CurrDIVTIME;
+    /* Saved AX register states */
+    TBIDUAL AX[2];
+    TBIEXTCTX Ext;
+    TBIDUAL AX3[TBICTX_AX_REGS-3];
+    
+    /* Any CBUF state to be restored by a handler return must be stored here.
+       Other extended state can be stored anywhere - see __TBICtxSave and
+       __TBICtxRestore. */
+    
+} TBICTX, *PTBICTX;
+
+#ifdef TBI_FASTINT_1_4
+typedef struct _tbictx2_tag_ {
+    TBIDUAL AX[2];    /* AU.0, AU.1 */
+    TBIDUAL DX[2];    /* DU.0, DU.4 */
+    int     CurrMODE;
+    int     CurrRPT;
+    int     CurrSTATUS;
+    void   *CurrPC;   /* PC in PC address space */
+} TBICTX2, *PTBICTX2;
+/* TBICTX2 is followed by:
+ *   TBICTXEXTCB0                if TXSTATUS.CBMarker
+ *   TBIDUAL * TXSTATUS.IRPCount if TXSTATUS.IRPCount > 0
+ *   TBICTXGP                    if using __TBIStdRootIntHandler or __TBIStdCtxSwitchRootIntHandler
+ */
+
+typedef struct _tbictxgp_tag_ {
+    short    DspramSizes;
+    short    SaveMask;
+    void    *pExt;
+    TBIDUAL  DX[6]; /* DU.1-DU.3, DU.5-DU.7 */
+    TBIDUAL  AX[2]; /* AU.2-AU.3 */
+} TBICTXGP, *PTBICTXGP;
+
+#define TBICTXGP_DspramSizes (0)
+#define TBICTXGP_SaveMask    (TBICTXGP_DspramSizes + 2)
+#define TBICTXGP_MAX_BYTES   (2 + 2 + 4 + 8*(6+2))
+
+#endif
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBICTX */
+#define TBICTX_Flags            (0)
+#define TBICTX_SaveMask         (2)
+#define TBICTX_CurrPC           (4)
+#define TBICTX_DX               (2 + 2 + 4)
+#define TBICTX_CurrRPT          (2 + 2 + 4 + 8 * 8)
+#define TBICTX_CurrMODE         (2 + 2 + 4 + 8 * 8 + 4 + 4)
+#define TBICTX_AX               (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4)
+#define TBICTX_Ext              (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4 + 2 * 8)
+#define TBICTX_Ext_AX2          (TBICTX_Ext + TBIEXTCTX_AX2)
+#define TBICTX_Ext_AX2_U0       (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U0)
+#define TBICTX_Ext_AX2_U1       (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U1)
+#define TBICTX_Ext_Ctx_pExt     (TBICTX_Ext + TBIEXTCTX_Ctx_pExt)
+#define TBICTX_Ext_Ctx_SaveMask (TBICTX_Ext + TBIEXTCTX_Ctx_SaveMask)
+
+#ifdef TBI_FASTINT_1_4
+#define TBICTX2_BYTES (8 * 2 + 8 * 2 + 4 + 4 + 4 + 4)
+#define TBICTXEXTCB0_BYTES (4 + 4 + 8)
+
+#define TBICTX2_CRIT_MAX_BYTES (TBICTX2_BYTES + TBICTXEXTCB0_BYTES + 6 * TBIDUAL_BYTES)
+#define TBI_SWITCH_NEXT_PC(PC, EXTRA) ((PC) + (EXTRA & 1) ? 8 : 4)
+#endif
+
+#ifndef __ASSEMBLY__
+/* Extended thread state save areas - catch buffer state element */
+typedef struct _tbictxextcb0_tag_ {
+    /* Flags data and address value - see METAC_CATCH_VALUES in machine.h */
+    unsigned long CBFlags, CBAddr;
+    /* 64-bit data */
+    TBIDUAL CBData;
+    
+} TBICTXEXTCB0, *PTBICTXEXTCB0;
+
+/* Read pipeline state saved on later cores after single catch buffer slot */
+typedef struct _tbictxextrp6_tag_ {
+    /* RPMask is TXSTATUS_RPMASK_BITS only, reserved is undefined */
+    unsigned long RPMask, Reserved0;
+    TBIDUAL CBData[6];
+    
+} TBICTXEXTRP6, *PTBICTXEXTRP6;
+
+/* Extended thread state save areas - 8 DU register pairs */
+typedef struct _tbictxextbb8_tag_ {
+    /* Remaining Data unit registers in 64-bit pairs */
+    TBIDUAL UX[8];
+    
+} TBICTXEXTBB8, *PTBICTXEXTBB8;
+
+/* Extended thread state save areas - 3 AU register pairs */
+typedef struct _tbictxextbb3_tag_ {
+    /* Remaining Address unit registers in 64-bit pairs */
+    TBIDUAL UX[3];
+    
+} TBICTXEXTBB3, *PTBICTXEXTBB3;
+
+/* Extended thread state save areas - 4 AU register pairs or 4 FX pairs */
+typedef struct _tbictxextbb4_tag_ {
+    /* Remaining Address unit or FPU registers in 64-bit pairs */
+    TBIDUAL UX[4];
+    
+} TBICTXEXTBB4, *PTBICTXEXTBB4;
+
+/* Extended thread state save areas - Hardware loop states (max 2) */
+typedef struct _tbictxexthl2_tag_ {
+    /* Hardware looping register states */
+    TBIDUAL Start, End, Count;
+    
+} TBICTXEXTHL2, *PTBICTXEXTHL2;
+
+/* Extended thread state save areas - DSP register states */
+typedef struct _tbictxexttdp_tag_ {
+    /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */
+    TBIDUAL Acc32[1];
+    /* DSP > 32-bit accumulator bits 63:32 of ACX.0 (zero-extended) */
+    TBIDUAL Acc64[1];
+    /* Twiddle register state, and three phase increment states */
+    TBIDUAL PReg[4];
+    /* Modulo region size, padded to 64-bits */
+    int CurrMRSIZE, Reserved0;
+    
+} TBICTXEXTTDP, *PTBICTXEXTTDP;
+
+/* Extended thread state save areas - DSP register states including DSP RAM */
+typedef struct _tbictxexttdpr_tag_ {
+    /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */
+    TBIDUAL Acc32[1];
+    /* DSP 40-bit accumulator register state (Bits 39:8 of ACX.0) */
+    TBIDUAL Acc40[1];
+    /* DSP RAM Pointers */
+    TBIDUAL RP0[2],  WP0[2],  RP1[2],  WP1[2];
+    /* DSP RAM Increments */
+    TBIDUAL RPI0[2], WPI0[2], RPI1[2], WPI1[2];
+    /* Template registers */
+    unsigned long Tmplt[16];
+    /* Modulo address region size and DSP RAM module region sizes */
+    int CurrMRSIZE, CurrDRSIZE;
+    
+} TBICTXEXTTDPR, *PTBICTXEXTTDPR;
+
+#ifdef TBI_1_4
+/* The METAC_ID_CORE register state is a marker for the FPU
+   state that is then stored after this core header structure.  */
+#define TBICTXEXTFPU_CONFIG_MASK  ( (METAC_COREID_NOFPACC_BIT+     \
+                                     METAC_COREID_CFGFPU_BITS ) << \
+                                     METAC_COREID_CONFIG_BITS       )
+
+/* Recorded FPU exception state from TXDEFR in DefrFpu */
+#define TBICTXEXTFPU_DEFRFPU_MASK (TXDEFR_FPU_FE_BITS)
+
+/* Extended thread state save areas - FPU register states */
+typedef struct _tbictxextfpu_tag_ {
+    /* Stored METAC_CORE_ID CONFIG */
+    int CfgFpu;
+    /* Stored deferred TXDEFR bits related to FPU
+     *
+     * This is encoded as follows in order to fit into 16-bits:
+     * DefrFPU:15 - 14 <= 0
+     *        :13 -  8 <= TXDEFR:21-16
+     *        : 7 -  6 <= 0
+     *        : 5 -  0 <= TXDEFR:5-0
+     */
+    short DefrFpu;
+
+    /* TXMODE bits related to FPU */
+    short ModeFpu;
+    
+    /* FPU Even/Odd register states */
+    TBIDUAL FX[4];
+   
+    /* if CfgFpu & TBICTX_CFGFPU_FX16_BIT  -> 1 then TBICTXEXTBB4 holds FX.8-15 */
+    /* if CfgFpu & TBICTX_CFGFPU_NOACF_BIT -> 0 then TBICTXEXTFPACC holds state */
+} TBICTXEXTFPU, *PTBICTXEXTFPU;
+
+/* Extended thread state save areas - FPU accumulator state */
+typedef struct _tbictxextfpacc_tag_ {
+    /* FPU accumulator register state - three 64-bit parts */
+    TBIDUAL FAcc32[3];
+    
+} TBICTXEXTFPACC, *PTBICTXEXTFPACC;
+#endif
+
+/* Prototype TBI structure */
+struct _tbi_tag_ ;
+
+/* A 64-bit return value used commonly in the TBI APIs */
+typedef union _tbires_tag_ {
+    /* Save and load this value to get/set the whole result quickly */
+    long long Val;
+
+    /* Parameter of a fnSigs or __TBICtx* call */
+    struct _tbires_sig_tag_ { 
+        /* TXMASK[I] bits zeroed upto and including current trigger level */
+        unsigned short TrigMask;
+        /* Control bits for handlers - see PTBIAPIFN documentation below */
+        unsigned short SaveMask;
+        /* Pointer to the base register context save area of the thread */
+        PTBICTX pCtx;
+    } Sig;
+
+    /* Result of TBIThrdPrivId call */
+    struct _tbires_thrdprivid_tag_ {
+        /* Basic thread identifier; just TBID_THREAD_BITS */
+        int Id;
+        /* None thread number bits; TBID_ISTAT_BIT+TBID_PSTAT_BIT */
+        int Priv;
+    } Thrd;
+
+    /* Parameter and Result of a __TBISwitch call */
+    struct _tbires_switch_tag_ { 
+        /* Parameter passed across context switch */
+        void *pPara;
+        /* Thread context of other Thread includng restore flags */
+        PTBICTX pCtx;
+    } Switch;
+    
+    /* For extended S/W events only */
+    struct _tbires_ccb_tag_ {
+        void *pCCB;
+        int COff;
+    } CCB;
+
+    struct _tbires_tlb_tag_ {
+        int Leaf;  /* TLB Leaf data */
+        int Flags; /* TLB Flags */
+    } Tlb;
+
+#ifdef TBI_FASTINT_1_4
+    struct _tbires_intr_tag_ {
+      short    TrigMask;
+      short    SaveMask;
+      PTBICTX2 pCtx;
+    } Intr;
+#endif
+
+} TBIRES, *PTBIRES;
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* Prototype for all signal handler functions, called via ___TBISyncTrigger or
+   ___TBIASyncTrigger.
+   
+   State.Sig.TrigMask will indicate the bits set within TXMASKI at
+          the time of the handler call that have all been cleared to prevent
+          nested interrupt occuring immediately.
+   
+   State.Sig.SaveMask is a bit-mask which will be set to Zero when a trigger
+          occurs at background level and TBICTX_CRIT_BIT and optionally
+          TBICTX_CBUF_BIT when a trigger occurs at interrupt level.
+          
+          TBICTX_CBUF_BIT reflects the state of TXSTATUS_CBMARKER_BIT for
+          the interrupted background thread.
+   
+   State.Sig.pCtx will point at a TBICTX structure generated to hold the
+          critical state of the interrupted thread at interrupt level and
+          should be set to NULL when called at background level.
+        
+   Triggers will indicate the status of TXSTAT or TXSTATI sampled by the
+          code that called the handler.
+          
+   InstOrSWSId is defined firstly as 'Inst' if the SigNum is TBID_SIGNUM_SWx
+          and hold the actual SWITCH instruction detected, secondly if SigNum
+          is TBID_SIGNUM_SWS the 'SWSId' is defined to hold the Id of the
+          software signal detected, in other cases the value of this
+          parameter is undefined.
+   
+   pTBI   points at the PTBI structure related to the thread and processing
+          level involved.
+
+   TBIRES return value at both processing levels is similar in terms of any
+          changes that the handler makes. By default the State argument value
+          passed in should be returned.
+          
+      Sig.TrigMask value is bits to OR back into TXMASKI when the handler
+          completes to enable currently disabled interrupts.
+          
+      Sig.SaveMask value is ignored.
+   
+      Sig.pCtx is ignored.
+
+ */
+typedef TBIRES (*PTBIAPIFN)( TBIRES State, int SigNum,
+                             int Triggers, int InstOrSWSId,
+                             volatile struct _tbi_tag_ *pTBI );
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* The global memory map is described by a list of segment descriptors */
+typedef volatile struct _tbiseg_tag_ {
+    volatile struct _tbiseg_tag_ *pLink;
+    int Id;                           /* Id of the segment */
+    TBISPIN Lock;                     /* Spin-lock for struct (normally 0) */
+    unsigned int Bytes;               /* Size of region in bytes */
+    void *pGAddr;                     /* Base addr of region in global space */
+    void *pLAddr;                     /* Base addr of region in local space */
+    int Data[2];                      /* Segment specific data (may be extended) */
+
+} TBISEG, *PTBISEG;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Offsets of fields in TBISEG structure */
+#define TBISEG_pLink    ( 0)
+#define TBISEG_Id       ( 4)
+#define TBISEG_Lock     ( 8)
+#define TBISEG_Bytes    (12)
+#define TBISEG_pGAddr   (16)
+#define TBISEG_pLAddr   (20)
+#define TBISEG_Data     (24)
+
+#ifndef __ASSEMBLY__
+typedef volatile struct _tbi_tag_ {
+    int SigMask;                      /* Bits set to represent S/W events */
+    PTBIKICK pKick;                   /* Kick addr for S/W events */
+    void *pCCB;                       /* Extended S/W events */
+    PTBISEG pSeg;                     /* Related segment structure */
+    PTBIAPIFN fnSigs[TBID_SIGNUM_MAX+1];/* Signal handler API table */
+} *PTBI, TBI;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBI */
+#define TBI_SigMask     (0)
+#define TBI_pKick       (4)
+#define TBI_pCCB        (8)
+#define TBI_pSeg       (12)
+#define TBI_fnSigs     (16)
+
+#ifdef TBI_1_4
+#ifndef __ASSEMBLY__
+/* This handler should be used for TBID_SIGNUM_DFR */
+extern TBIRES __TBIHandleDFR ( TBIRES State, int SigNum,
+                               int Triggers, int InstOrSWSId,
+                               volatile struct _tbi_tag_ *pTBI );
+#endif
+#endif
+
+/* String table entry - special values */
+#define METAG_TBI_STRS (0x5300) /* Tag      : If entry is valid */
+#define METAG_TBI_STRE (0x4500) /* Tag      : If entry is end of table */
+#define METAG_TBI_STRG (0x4700) /* Tag      : If entry is a gap */
+#define METAG_TBI_STRX (0x5A00) /* TransLen : If no translation present */
+
+#ifndef __ASSEMBLY__
+typedef volatile struct _tbistr_tag_ {
+    short Bytes;                      /* Length of entry in Bytes */
+    short Tag;                        /* Normally METAG_TBI_STRS(0x5300) */
+    short Len;                        /* Length of the string entry (incl null) */
+    short TransLen;                   /* Normally METAG_TBI_STRX(0x5A00) */
+    char String[8];                   /* Zero terminated (may-be bigger) */
+
+} TBISTR, *PTBISTR;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Cache size information - available as fields of Data[1] of global heap
+   segment */
+#define METAG_TBI_ICACHE_SIZE_S    0             /* see comments below */
+#define METAG_TBI_ICACHE_SIZE_BITS 0x0000000F
+#define METAG_TBI_ICACHE_FILL_S    4
+#define METAG_TBI_ICACHE_FILL_BITS 0x000000F0
+#define METAG_TBI_DCACHE_SIZE_S    8
+#define METAG_TBI_DCACHE_SIZE_BITS 0x00000F00
+#define METAG_TBI_DCACHE_FILL_S    12
+#define METAG_TBI_DCACHE_FILL_BITS 0x0000F000
+
+/* METAG_TBI_xCACHE_SIZE
+   Describes the physical cache size rounded up to the next power of 2
+   relative to a 16K (2^14) cache. These sizes are encoded as a signed addend
+   to this base power of 2, for example
+      4K -> 2^12 -> -2  (i.e. 12-14)
+      8K -> 2^13 -> -1
+     16K -> 2^14 ->  0
+     32K -> 2^15 -> +1
+     64K -> 2^16 -> +2
+    128K -> 2^17 -> +3
+
+   METAG_TBI_xCACHE_FILL
+   Describes the physical cache size within the power of 2 area given by
+   the value above. For example a 10K cache may be represented as having
+   nearest size 16K with a fill of 10 sixteenths. This is encoded as the
+   number of unused 1/16ths, for example
+     0000 ->  0 -> 16/16
+     0001 ->  1 -> 15/16
+     0010 ->  2 -> 14/16
+     ...
+     1111 -> 15 ->  1/16
+ */
+
+#define METAG_TBI_CACHE_SIZE_BASE_LOG2 14
+
+/* Each declaration made by this macro generates a TBISTR entry */
+#ifndef __ASSEMBLY__
+#define TBISTR_DECL( Name, Str )                                       \
+    __attribute__ ((__section__ (".tbistr") )) const char Name[] = #Str
+#endif
+
+/* META timer values - see below for Timer support routines */
+#define TBI_TIMERWAIT_MIN (-16)         /* Minimum 'recommended' period */
+#define TBI_TIMERWAIT_MAX (-0x7FFFFFFF) /* Maximum 'recommended' period */
+
+#ifndef __ASSEMBLY__
+/* These macros allow direct access from C to any register known to the
+   assembler or defined in machine.h. Example candidates are TXTACTCYC,
+   TXIDLECYC, and TXPRIVEXT. Note that when higher level macros and routines
+   like the timer and trigger handling features below these should be used in
+   preference to this direct low-level access mechanism. */
+#define TBI_GETREG( Reg )                                  __extension__ ({\
+   int __GRValue;                                                          \
+   __asm__ volatile ("MOV\t%0," #Reg "\t/* (*TBI_GETREG OK) */" :          \
+                     "=r" (__GRValue) );                                   \
+    __GRValue;                                                            })
+
+#define TBI_SETREG( Reg, Value )                                       do {\
+   int __SRValue = Value;                                                  \
+   __asm__ volatile ("MOV\t" #Reg ",%0\t/* (*TBI_SETREG OK) */" :          \
+                     : "r" (__SRValue) );                       } while (0)
+
+#define TBI_SWAPREG( Reg, Value )                                      do {\
+   int __XRValue = (Value);                                                \
+   __asm__ volatile ("SWAP\t" #Reg ",%0\t/* (*TBI_SWAPREG OK) */" :        \
+                     "=r" (__XRValue) : "0" (__XRValue) );                 \
+   Value = __XRValue;                                           } while (0)
+
+/* Obtain and/or release global critical section lock given that interrupts
+   are already disabled and/or should remain disabled. */
+#define TBI_NOINTSCRITON                                             do {\
+   __asm__ volatile ("LOCK1\t\t/* (*TBI_NOINTSCRITON OK) */");} while (0)
+#define TBI_NOINTSCRITOFF                                             do {\
+   __asm__ volatile ("LOCK0\t\t/* (*TBI_NOINTSCRITOFF OK) */");} while (0)
+/* Optimised in-lining versions of the above macros */
+
+#define TBI_LOCK( TrigState )                                          do {\
+   int __TRValue;                                                          \
+   int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000;                \
+   __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_LOCK ... */\n\t"               \
+                     "SWAP\t%0,TXMASKI\t/* ... */\n\t"                     \
+                     "LOCK2\t\t/* ... */\n\t"                              \
+                     "SETD\t[%1+#0x40],D1RtP /* ... OK) */" :              \
+                     "=r&" (__TRValue) : "u" (__ALOCKHI) );                \
+   TrigState = __TRValue;                                       } while (0)
+#define TBI_CRITON( TrigState )                                        do {\
+   int __TRValue;                                                          \
+   __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_CRITON ... */\n\t"             \
+                     "SWAP\t%0,TXMASKI\t/* ... */\n\t"                     \
+                     "LOCK1\t\t/* ... OK) */" :                            \
+                     "=r" (__TRValue) );                                   \
+   TrigState = __TRValue;                                       } while (0)
+
+#define TBI_INTSX( TrigState )                                         do {\
+   int __TRValue = TrigState;                                              \
+   __asm__ volatile ("SWAP\t%0,TXMASKI\t/* (*TBI_INTSX OK) */" :           \
+                     "=r" (__TRValue) : "0" (__TRValue) );                 \
+   TrigState = __TRValue;                                       } while (0)
+
+#define TBI_UNLOCK( TrigState )                                        do {\
+   int __TRValue = TrigState;                                              \
+   int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000;                \
+   __asm__ volatile ("SETD\t[%1+#0x00],D1RtP\t/* (*TBI_UNLOCK ... */\n\t"  \
+                     "LOCK0\t\t/* ... */\n\t"                              \
+                     "MOV\tTXMASKI,%0\t/* ... OK) */" :                    \
+                     : "r" (__TRValue), "u" (__ALOCKHI) );      } while (0)
+
+#define TBI_CRITOFF( TrigState )                                       do {\
+   int __TRValue = TrigState;                                              \
+   __asm__ volatile ("LOCK0\t\t/* (*TBI_CRITOFF ... */\n\t"                \
+                     "MOV\tTXMASKI,%0\t/* ... OK) */" :                    \
+                     : "r" (__TRValue) );                       } while (0)
+
+#define TBI_TRIGSX( SrcDst ) do { TBI_SWAPREG( TXMASK, SrcDst );} while (0)
+
+/* Composite macros to perform logic ops on INTS or TRIGS masks */
+#define TBI_INTSOR( Bits )                                              do {\
+    int __TT = 0; TBI_INTSX(__TT);                                          \
+    __TT |= (Bits); TBI_INTSX(__TT);                             } while (0)
+    
+#define TBI_INTSAND( Bits )                                             do {\
+    int __TT = 0; TBI_INTSX(__TT);                                          \
+    __TT &= (Bits); TBI_INTSX(__TT);                             } while (0)
+
+#ifdef TBI_1_4
+#define TBI_DEFRICTRLSOR( Bits )                                        do {\
+    int __TT = TBI_GETREG( CT.20 );                                         \
+    __TT |= (Bits); TBI_SETREG( CT.20, __TT);                    } while (0)
+    
+#define TBI_DEFRICTRLSAND( Bits )                                       do {\
+    int __TT = TBI_GETREG( TXDEFR );                                        \
+    __TT &= (Bits); TBI_SETREG( CT.20, __TT);                    } while (0)
+#endif
+
+#define TBI_TRIGSOR( Bits )                                             do {\
+    int __TT = TBI_GETREG( TXMASK );                                        \
+    __TT |= (Bits); TBI_SETREG( TXMASK, __TT);                   } while (0)
+    
+#define TBI_TRIGSAND( Bits )                                            do {\
+    int __TT = TBI_GETREG( TXMASK );                                        \
+    __TT &= (Bits); TBI_SETREG( TXMASK, __TT);                   } while (0)
+
+/* Macros to disable and re-enable interrupts using TBI_INTSX, deliberate
+   traps and exceptions can still be handled within the critical section. */
+#define TBI_STOPINTS( Value )                                           do {\
+    int __TT = TBI_GETREG( TXMASKI );                                       \
+    __TT &= TXSTATI_BGNDHALT_BIT; TBI_INTSX( __TT );                        \
+    Value = __TT;                                                } while (0)
+#define TBI_RESTINTS( Value )                                           do {\
+    int __TT = Value; TBI_INTSX( __TT );                         } while (0)
+
+/* Return pointer to segment list at current privilege level */
+PTBISEG __TBISegList( void );
+
+/* Search the segment list for a match given Id, pStart can be NULL */
+PTBISEG __TBIFindSeg( PTBISEG pStart, int Id );
+
+/* Prepare a new segment structure using space from within another */
+PTBISEG __TBINewSeg( PTBISEG pFromSeg, int Id, unsigned int Bytes );
+
+/* Prepare a new segment using any global or local heap segments available */
+PTBISEG __TBIMakeNewSeg( int Id, unsigned int Bytes );
+
+/* Insert a new segment into the segment list so __TBIFindSeg can locate it */
+void __TBIAddSeg( PTBISEG pSeg );
+#define __TBIADDSEG_DEF     /* Some versions failed to define this */
+
+/* Return Id of current thread; TBID_ISTAT_BIT+TBID_THREAD_BITS */
+int __TBIThreadId( void );
+
+/* Return TBIRES.Thrd data for current thread */
+TBIRES __TBIThrdPrivId( void );
+
+/* Return pointer to current threads TBI root block.
+   Id implies whether Int or Background root block is required */
+PTBI __TBI( int Id );
+
+/* Try to set Mask bit using the spin-lock protocol, return 0 if fails and 
+   new state if succeeds */
+int __TBIPoll( PTBISPIN pLock, int Mask );
+
+/* Set Mask bits via the spin-lock protocol in *pLock, return new state */
+int __TBISpin( PTBISPIN pLock, int Mask );
+
+/* Default handler set up for all TBI.fnSigs entries during initialisation */
+TBIRES __TBIUnExpXXX( TBIRES State, int SigNum,
+                   int Triggers, int Inst, PTBI pTBI );
+
+/* Call this routine to service triggers at background processing level. The
+   TBID_POLL_BIT of the Id parameter value will be used to indicate that the
+   routine should return if no triggers need to be serviced initially. If this
+   bit is not set the routine will block until one trigger handler is serviced
+   and then behave like the poll case servicing any remaining triggers
+   actually outstanding before returning. Normally the State parameter should
+   be simply initialised to zero and the result should be ignored, other
+   values/options are for internal use only. */
+TBIRES __TBISyncTrigger( TBIRES State, int Id );
+
+/* Call this routine to enable processing of triggers by signal handlers at
+   interrupt level. The State parameter value passed is returned by this
+   routine. The State.Sig.TrigMask field also specifies the initial
+   state of the interrupt mask register TXMASKI to be setup by the call.
+   The other parts of the State parameter are ignored unless the PRIV bit is
+   set in the SaveMask field. In this case the State.Sig.pCtx field specifies
+   the base of the stack to which the interrupt system should switch into
+   as it saves the state of the previously executing code. In the case the
+   thread will be unprivileged as it continues execution at the return
+   point of this routine and it's future state will be effectively never
+   trusted to be valid. */
+TBIRES __TBIASyncTrigger( TBIRES State );
+
+/* Call this to swap soft threads executing at the background processing level.
+   The TBIRES returned to the new thread will be the same as the NextThread
+   value specified to the call. The NextThread.Switch.pCtx value specifies
+   which thread context to restore and the NextThread.Switch.Para value can
+   hold an arbitrary expression to be passed between the threads. The saved
+   state of the previous thread will be stored in a TBICTX descriptor created
+   on it's stack and the address of this will be stored into the *rpSaveCtx
+   location specified. */
+TBIRES __TBISwitch( TBIRES NextThread, PTBICTX *rpSaveCtx );
+
+/* Call this to initialise a stack frame ready for further use, up to four
+   32-bit arguments may be specified after the fixed args to be passed via
+   the new stack pStack to the routine specified via fnMain. If the
+   main-line routine ever returns the thread will operate as if main itself
+   had returned and terminate with the return code given. */
+typedef int (*PTBIMAINFN)( TBIRES Arg /*, <= 4 additional 32-bit args */ );
+PTBICTX __TBISwitchInit( void *pStack, PTBIMAINFN fnMain, ... );
+
+/* Call this to resume a thread from a saved synchronous TBICTX state.
+   The TBIRES returned to the new thread will be the same as the NextThread
+   value specified to the call. The NextThread.Switch.pCtx value specifies
+   which thread context to restore and the NextThread.Switch.Para value can
+   hold an arbitrary expression to be passed between the threads. The context
+   of the calling thread is lost and this routine never returns to the
+   caller. The TrigsMask value supplied is ored into TXMASKI to enable
+   interrupts after the context of the new thread is established. */
+void __TBISyncResume( TBIRES NextThread, int TrigsMask );
+
+/* Call these routines to save and restore the extended states of
+   scheduled tasks. */
+void *__TBICtxSave( TBIRES State, void *pExt );
+void *__TBICtxRestore( TBIRES State, void *pExt );
+
+#ifdef TBI_1_4
+#ifdef TBI_FASTINT_1_4
+/* Call these routines to copy the GP state to a separate buffer
+ * Only necessary for context switching.
+ */
+PTBICTXGP __TBICtx2SaveCrit( PTBICTX2 pCurrentCtx, PTBICTX2 pSaveCtx );
+void *__TBICtx2SaveGP( PTBICTXGP pCurrentCtxGP, PTBICTXGP pSaveCtxGP );
+
+/* Call these routines to save and restore the extended states of
+   scheduled tasks. */
+void *__TBICtx2Save( PTBICTXGP pCtxGP, short SaveMask, void *pExt );
+void *__TBICtx2Restore( PTBICTX2 pCtx, short SaveMask, void *pExt );
+#endif
+
+/* If FPAC flag is set then significant FPU context exists. Call these routine
+   to save and restore it */
+void *__TBICtxFPUSave( TBIRES State, void *pExt );
+void *__TBICtxFPURestore( TBIRES State, void *pExt );
+
+#ifdef TBI_FASTINT_1_4
+extern void *__TBICtx2FPUSave (PTBICTXGP, short, void*);
+extern void *__TBICtx2FPURestore (PTBICTXGP, short, void*);
+#endif
+#endif
+
+#ifdef TBI_1_4
+/* Call these routines to save and restore DSPRAM. */
+void *__TBIDspramSaveA (short DspramSizes, void *pExt);
+void *__TBIDspramSaveB (short DspramSizes, void *pExt);
+void *__TBIDspramRestoreA (short DspramSizes, void *pExt);
+void *__TBIDspramRestoreB (short DspramSizes, void *pExt);
+#endif
+
+/* This routine should be used at the entrypoint of interrupt handlers to
+   re-enable higher priority interrupts and/or save state from the previously
+   executing background code. State is a TBIRES.Sig parameter with NoNestMask
+   indicating the triggers (if any) that should remain disabled and SaveMask
+   CBUF bit indicating the if the hardware catch buffer is dirty. Optionally
+   any number of extended state bits X??? including XCBF can be specified to
+   force a nested state save call to __TBICtxSave before the current routine
+   continues. (In the latter case __TBICtxRestore should be called to restore
+   any extended states before the background thread of execution is resumed) 
+   
+   By default (no X??? bits specified in SaveMask) this routine performs a
+   sub-call to __TBICtxSave with the pExt and State parameters specified IF
+   some triggers could be serviced while the current interrupt handler
+   executes and the hardware catch buffer is actually dirty. In this case
+   this routine provides the XCBF bit in State.Sig.SaveMask to force the
+   __TBICtxSave to extract the current catch state.
+   
+   The NoNestMask parameter should normally indicate that the same or lower
+   triggers than those provoking the current handler call should not be
+   serviced in nested calls, zero may be specified if all possible interrupts
+   are to be allowed.
+   
+   The TBIRES.Sig value returned will be similar to the State parameter
+   specified with the XCBF bit ORed into it's SaveMask if a context save was
+   required and fewer bits set in it's TrigMask corresponding to the same/lower
+   priority interrupt triggers still not enabled. */
+TBIRES __TBINestInts( TBIRES State, void *pExt, int NoNestMask );
+
+/* This routine causes the TBICTX structure specified in State.Sig.pCtx to
+   be restored. This implies that execution will not return to the caller.
+   The State.Sig.TrigMask field will be restored during the context switch
+   such that any immediately occuring interrupts occur in the context of the
+   newly specified task. The State.Sig.SaveMask parameter is ignored. */
+void __TBIASyncResume( TBIRES State );
+
+/* Call this routine to enable fastest possible processing of one or more
+   interrupt triggers via a unified signal handler. The handler concerned
+   must simple return after servicing the related hardware.
+   The State.Sig.TrigMask parameter indicates the interrupt triggers to be
+   enabled and the Thin.Thin.fnHandler specifies the routine to call and
+   the whole Thin parameter value will be passed to this routine unaltered as
+   it's first parameter. */
+void __TBIASyncThin( TBIRES State, TBIRES Thin );
+
+/* Do this before performing your own direct spin-lock access - use TBI_LOCK */
+int __TBILock( void );
+
+/* Do this after performing your own direct spin-lock access - use TBI_UNLOCK */
+void __TBIUnlock( int TrigState );
+
+/* Obtain and release global critical section lock - only stops execution
+   of interrupts on this thread and similar critical section code on other
+   local threads - use TBI_CRITON or TBI_CRITOFF */
+int __TBICritOn( void );
+void __TBICritOff( int TrigState );
+
+/* Change INTS (TXMASKI) - return old state - use TBI_INTSX */
+int __TBIIntsX( int NewMask );
+
+/* Change TRIGS (TXMASK) - return old state - use TBI_TRIGSX */
+int __TBITrigsX( int NewMask );
+
+/* This function initialises a timer for first use, only the TBID_ISTAT_BIT
+   of the Id parameter is used to indicate which timer is to be modified. The
+   Wait value should either be zero to disable the timer concerned or be in
+   the recommended TBI_TIMERWAIT_* range to specify the delay required before
+   the first timer trigger occurs.
+      
+   The TBID_ISTAT_BIT of the Id parameter similar effects all other timer
+   support functions (see below). */
+void __TBITimerCtrl( int Id, int Wait );
+
+/* This routine returns a 64-bit time stamp value that is initialised to zero
+   via a __TBITimerCtrl timer enabling call. */
+long long __TBITimeStamp( int Id );
+
+/* To manage a periodic timer each period elapsed should be subracted from
+   the current timer value to attempt to set up the next timer trigger. The
+   Wait parameter should be a value in the recommended TBI_TIMERWAIT_* range.
+   The return value is the new aggregate value that the timer was updated to,
+   if this is less than zero then a timer trigger is guaranteed to be
+   generated after the number of ticks implied, if a positive result is
+   returned either itterative or step-wise corrective action must be taken to
+   resynchronise the timer and hence provoke a future timer trigger. */
+int __TBITimerAdd( int Id, int Wait );
+
+/* String table search function, pStart is first entry to check or NULL,
+   pStr is string data to search for and MatchLen is either length of string
+   to compare for an exact match or negative length to compare for partial
+   match. */
+const TBISTR *__TBIFindStr( const TBISTR *pStart,
+                            const char *pStr, int MatchLen );
+
+/* String table translate function, pStr is text to translate and Len is
+   it's length. Value returned may not be a string pointer if the
+   translation value is really some other type, 64-bit alignment of the return
+   pointer is guaranteed so almost any type including a structure could be
+   located with this routine. */ 
+const void *__TBITransStr( const char *pStr, int Len );
+
+
+
+/* Arbitrary physical memory access windows, use different Channels to avoid
+   conflict/thrashing within a single piece of code. */
+void *__TBIPhysAccess( int Channel, int PhysAddr, int Bytes );
+void __TBIPhysRelease( int Channel, void *pLinAddr );
+
+#ifdef METAC_1_0
+/* Data cache function nullified because data cache is off */
+#define TBIDCACHE_FLUSH( pAddr )
+#define TBIDCACHE_PRELOAD( Type, pAddr ) ((Type) (pAddr))
+#define TBIDCACHE_REFRESH( Type, pAddr ) ((Type) (pAddr))
+#endif
+#ifdef METAC_1_1
+/* To flush a single cache line from the data cache using a linear address */
+#define TBIDCACHE_FLUSH( pAddr )          ((volatile char *) \
+                 (((unsigned int) (pAddr))>>LINSYSLFLUSH_S))[0] = 0
+
+extern void * __builtin_dcache_preload (void *);
+
+/* Try to ensure that the data at the address concerned is in the cache */
+#define TBIDCACHE_PRELOAD( Type, Addr )                                    \
+  ((Type) __builtin_dcache_preload ((void *)(Addr)))
+
+extern void * __builtin_dcache_refresh (void *);
+
+/* Flush any old version of data from address and re-load a new copy */
+#define TBIDCACHE_REFRESH( Type, Addr )                   __extension__ ({ \
+  Type __addr = (Type)(Addr);                                              \
+  (void)__builtin_dcache_refresh ((void *)(((unsigned int)(__addr))>>6));  \
+  __addr; })
+
+#endif
+#ifndef METAC_1_0
+#ifndef METAC_1_1
+/* Support for DCACHE builtin */
+extern void __builtin_dcache_flush (void *);
+
+/* To flush a single cache line from the data cache using a linear address */
+#define TBIDCACHE_FLUSH( Addr )                                            \
+  __builtin_dcache_flush ((void *)(Addr))
+
+extern void * __builtin_dcache_preload (void *);
+
+/* Try to ensure that the data at the address concerned is in the cache */
+#define TBIDCACHE_PRELOAD( Type, Addr )                                    \
+  ((Type) __builtin_dcache_preload ((void *)(Addr)))
+
+extern void * __builtin_dcache_refresh (void *);
+
+/* Flush any old version of data from address and re-load a new copy */
+#define TBIDCACHE_REFRESH( Type, Addr )                                    \
+  ((Type) __builtin_dcache_refresh ((void *)(Addr)))
+
+#endif
+#endif
+
+/* Flush the MMCU cache */
+#define TBIMCACHE_FLUSH() { ((volatile int *) LINSYSCFLUSH_MMCU)[0] = 0; }
+
+#ifdef METAC_2_1
+/* Obtain the MMU table entry for the specified address */
+#define TBIMTABLE_LEAFDATA(ADDR) TBIXCACHE_RD((int)(ADDR) & (-1<<6))
+
+#ifndef __ASSEMBLY__
+/* Obtain the full MMU table entry for the specified address */
+#define TBIMTABLE_DATA(ADDR) __extension__ ({ TBIRES __p;                     \
+                                              __p.Val = TBIXCACHE_RL((int)(ADDR) & (-1<<6));   \
+                                              __p; })
+#endif
+#endif
+
+/* Combine a physical base address, and a linear address
+ * Internal use only
+ */
+#define _TBIMTABLE_LIN2PHYS(PHYS, LIN, LMASK) (void*)(((int)(PHYS)&0xFFFFF000)\
+                                               +((int)(LIN)&(LMASK)))
+
+/* Convert a linear to a physical address */
+#define TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR)                                    \
+          (((LEAFDATA) & CRLINPHY0_VAL_BIT)                                   \
+              ? _TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR, 0x00000FFF)               \
+              : 0)
+
+/* Debug support - using external debugger or host */
+void __TBIDumpSegListEntries( void );
+void __TBILogF( const char *pFmt, ... );
+void __TBIAssert( const char *pFile, int LineNum, const char *pExp );
+void __TBICont( const char *pMsg, ... ); /* TBIAssert -> 'wait for continue' */
+
+/* Array of signal name data for debug messages */
+extern const char __TBISigNames[];
+#endif /* ifndef __ASSEMBLY__ */
+
+
+
+/* Scale of sub-strings in the __TBISigNames string list */
+#define TBI_SIGNAME_SCALE   4
+#define TBI_SIGNAME_SCALE_S 2
+
+#define TBI_1_3 
+
+#ifdef TBI_1_3
+
+#ifndef __ASSEMBLY__
+#define TBIXCACHE_RD(ADDR)                                 __extension__ ({\
+    void * __Addr = (void *)(ADDR);                                        \
+    int __Data;                                                            \
+    __asm__ volatile ( "CACHERD\t%0,[%1+#0]" :                             \
+                       "=r" (__Data) : "r" (__Addr) );                     \
+    __Data;                                                               })
+
+#define TBIXCACHE_RL(ADDR)                                 __extension__ ({\
+    void * __Addr = (void *)(ADDR);                                        \
+    long long __Data;                                                      \
+    __asm__ volatile ( "CACHERL\t%0,%t0,[%1+#0]" :                         \
+                       "=d" (__Data) : "r" (__Addr) );                     \
+    __Data;                                                               })
+
+#define TBIXCACHE_WD(ADDR, DATA)                                      do {\
+    void * __Addr = (void *)(ADDR);                                       \
+    int __Data = DATA;                                                    \
+    __asm__ volatile ( "CACHEWD\t[%0+#0],%1" :                            \
+                       : "r" (__Addr), "r" (__Data) );          } while(0)
+
+#define TBIXCACHE_WL(ADDR, DATA)                                      do {\
+    void * __Addr = (void *)(ADDR);                                       \
+    long long __Data = DATA;                                              \
+    __asm__ volatile ( "CACHEWL\t[%0+#0],%1,%t1" :                        \
+                       : "r" (__Addr), "r" (__Data) );          } while(0)
+
+#ifdef TBI_4_0
+
+#define TBICACHE_FLUSH_L1D_L2(ADDR)                                       \
+  TBIXCACHE_WD(ADDR, CACHEW_FLUSH_L1D_L2)
+#define TBICACHE_WRITEBACK_L1D_L2(ADDR)                                   \
+  TBIXCACHE_WD(ADDR, CACHEW_WRITEBACK_L1D_L2)
+#define TBICACHE_INVALIDATE_L1D(ADDR)                                     \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D)
+#define TBICACHE_INVALIDATE_L1D_L2(ADDR)                                  \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D_L2)
+#define TBICACHE_INVALIDATE_L1DTLB(ADDR)                                  \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1DTLB)
+#define TBICACHE_INVALIDATE_L1I(ADDR)                                     \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1I)
+#define TBICACHE_INVALIDATE_L1ITLB(ADDR)                                  \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1ITLB)
+
+#endif /* TBI_4_0 */
+#endif /* ifndef __ASSEMBLY__ */
+
+/* 
+ * Calculate linear PC value from real PC and Minim mode control, the LSB of
+ * the result returned indicates if address compression has occured.
+ */
+#ifndef __ASSEMBLY__
+#define METAG_LINPC( PCVal )                                              (\
+    ( (TBI_GETREG(TXPRIVEXT) & TXPRIVEXT_MINIMON_BIT) != 0 ) ?           ( \
+        ( ((PCVal) & 0x00900000) == 0x00900000 ) ?                         \
+          (((PCVal) & 0xFFE00000) + (((PCVal) & 0x001FFFFC)>>1) + 1) :     \
+        ( ((PCVal) & 0x00800000) == 0x00000000 ) ?                         \
+          (((PCVal) & 0xFF800000) + (((PCVal) & 0x007FFFFC)>>1) + 1) :     \
+                                                             (PCVal)   )   \
+                                                                 : (PCVal) )
+#define METAG_LINPC_X2BIT 0x00000001       /* Make (Size>>1) if compressed */
+
+/* Convert an arbitrary Linear address into a valid Minim PC or return 0 */
+#define METAG_PCMINIM( LinVal )                                           (\
+        (((LinVal) & 0x00980000) == 0x00880000) ?                          \
+            (((LinVal) & 0xFFE00000) + (((LinVal) & 0x000FFFFE)<<1)) :     \
+        (((LinVal) & 0x00C00000) == 0x00000000) ?                          \
+            (((LinVal) & 0xFF800000) + (((LinVal) & 0x003FFFFE)<<1)) : 0   )
+
+/* Reverse a METAG_LINPC conversion step to return the original PCVal */
+#define METAG_PCLIN( LinVal )                              ( 0xFFFFFFFC & (\
+        ( (LinVal & METAG_LINPC_X2BIT) != 0 ) ? METAG_PCMINIM( LinVal ) :  \
+                                                               (LinVal)   ))
+
+/*
+ * Flush the MMCU Table cache privately for each thread. On cores that do not
+ * support per-thread flushing it will flush all threads mapping data.
+ */
+#define TBIMCACHE_TFLUSH(Thread)                                   do {\
+    ((volatile int *)( LINSYSCFLUSH_TxMMCU_BASE            +           \
+                      (LINSYSCFLUSH_TxMMCU_STRIDE*(Thread)) ))[0] = 0; \
+                                                             } while(0)
+
+/*
+ * To flush a single linear-matched cache line from the code cache. In
+ * cases where Minim is possible the METAC_LINPC operation must be used
+ * to pre-process the address being flushed.
+ */
+#define TBIICACHE_FLUSH( pAddr ) TBIXCACHE_WD (pAddr, CACHEW_ICACHE_BIT)
+
+/* To flush a single linear-matched mapping from code/data MMU table cache */
+#define TBIMCACHE_AFLUSH( pAddr, SegType )                                \
+    TBIXCACHE_WD(pAddr, CACHEW_TLBFLUSH_BIT + (                           \
+                 ((SegType) == TBID_SEGTYPE_TEXT) ? CACHEW_ICACHE_BIT : 0 ))
+
+/*
+ * To flush translation data corresponding to a range of addresses without
+ * using TBITCACHE_FLUSH to flush all of this threads translation data. It
+ * is necessary to know what stride (>= 4K) must be used to flush a specific
+ * region.
+ *
+ * For example direct mapped regions use the maximum page size (512K) which may
+ * mean that only one flush is needed to cover the sub-set of the direct
+ * mapped area used since it was setup.
+ *
+ * The function returns the stride on which flushes should be performed.
+ *
+ * If 0 is returned then the region is not subject to MMU caching, if -1 is
+ * returned then this indicates that only TBIMCACHE_TFLUSH can be used to
+ * flush the region concerned rather than TBIMCACHE_AFLUSH which this
+ * function is designed to support.
+ */
+int __TBIMMUCacheStride( const void *pStart, int Bytes );
+
+/*
+ * This function will use the above lower level functions to achieve a MMU
+ * table data flush in an optimal a fashion as possible. On a system that
+ * supports linear address based caching this function will also call the
+ * code or data cache flush functions to maintain address/data coherency.
+ *
+ * SegType should be TBID_SEGTYPE_TEXT if the address range is for code or
+ * any other value such as TBID_SEGTYPE_DATA for data. If an area is
+ * used in both ways then call this function twice; once for each.
+ */
+void __TBIMMUCacheFlush( const void *pStart, int Bytes, int SegType );
+
+/*
+ * Cached Core mode setup and flush functions allow one code and one data
+ * region of the corresponding global or local cache partion size to be
+ * locked into the corresponding cache memory. This prevents normal LRU
+ * logic discarding the code or data and avoids write-thru bandwidth in
+ * data areas. Code mappings are selected by specifying TBID_SEGTYPE_TEXT
+ * for SegType, otherwise data mappings are created.
+ * 
+ * Mode supplied should always contain the VALID bit and WINx selection data.
+ * Data areas will be mapped read-only if the WRITE bit is not added.
+ *
+ * The address returned by the Opt function will either be the same as that
+ * passed in (if optimisation cannot be supported) or the base of the new core
+ * cached region in linear address space. The returned address must be passed
+ * into the End function to remove the mapping when required. If a non-core
+ * cached memory address is passed into it the End function has no effect.
+ * Note that the region accessed MUST be flushed from the appropriate cache
+ * before the End function is called to deliver correct operation.
+ */
+void *__TBICoreCacheOpt( const void *pStart, int Bytes, int SegType, int Mode );
+void __TBICoreCacheEnd( const void *pOpt, int Bytes, int SegType );
+
+/*
+ * Optimise physical access channel and flush side effects before releasing
+ * the channel. If pStart is NULL the whole region must be flushed and this is
+ * done automatically by the channel release function if optimisation is
+ * enabled. Flushing the specific region that may have been accessed before
+ * release should optimises this process. On physically cached systems we do
+ * not flush the code/data caches only the MMU table data needs flushing.
+ */
+void __TBIPhysOptim( int Channel, int IMode, int DMode );
+void __TBIPhysFlush( int Channel, const void *pStart, int Bytes );
+#endif
+#endif /* ifdef TBI_1_3 */
+
+#endif /* _ASM_METAG_TBX_H_ */
diff --git a/arch/metag/include/asm/tcm.h b/arch/metag/include/asm/tcm.h
new file mode 100644
index 0000000..7711c31
--- /dev/null
+++ b/arch/metag/include/asm/tcm.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_TCM_H__
+#define __ASM_TCM_H__
+
+#include <linux/ioport.h>
+#include <linux/list.h>
+
+struct tcm_allocation {
+	struct list_head list;
+	unsigned int tag;
+	unsigned long addr;
+	unsigned long size;
+};
+
+/*
+ * TCM memory region descriptor.
+ */
+struct tcm_region {
+	unsigned int tag;
+	struct resource res;
+};
+
+#define TCM_INVALID_TAG	0xffffffff
+
+unsigned long tcm_alloc(unsigned int tag, size_t len);
+void tcm_free(unsigned int tag, unsigned long addr, size_t len);
+unsigned int tcm_lookup_tag(unsigned long p);
+
+int tcm_add_region(struct tcm_region *reg);
+
+#endif
diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h
new file mode 100644
index 0000000..0ecd34d
--- /dev/null
+++ b/arch/metag/include/asm/thread_info.h
@@ -0,0 +1,155 @@
+/* thread_info.h: Meta low-level thread information
+ *
+ * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+ * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+ *
+ * Meta port by Imagination Technologies
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#include <linux/compiler.h>
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#endif
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants must
+ *   also be changed
+ */
+#ifndef __ASSEMBLY__
+
+/* This must be 8 byte aligned so we can ensure stack alignment. */
+struct thread_info {
+	struct task_struct *task;	/* main task structure */
+	struct exec_domain *exec_domain;	/* execution domain */
+	unsigned long flags;	/* low level flags */
+	unsigned long status;	/* thread-synchronous flags */
+	u32 cpu;		/* current CPU */
+	int preempt_count;	/* 0 => preemptable, <0 => BUG */
+
+	mm_segment_t addr_limit;	/* thread address space */
+	struct restart_block restart_block;
+
+	u8 supervisor_stack[0];
+};
+
+#else /* !__ASSEMBLY__ */
+
+#include <generated/asm-offsets.h>
+
+#endif
+
+#define PREEMPT_ACTIVE		0x10000000
+
+#ifdef CONFIG_4KSTACKS
+#define THREAD_SHIFT		12
+#else
+#define THREAD_SHIFT		13
+#endif
+
+#if THREAD_SHIFT >= PAGE_SHIFT
+#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
+#else
+#define THREAD_SIZE_ORDER	0
+#endif
+
+#define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
+
+#define STACK_WARN		(THREAD_SIZE/8)
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#ifndef __ASSEMBLY__
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
+	.addr_limit	= KERNEL_DS,		\
+	.restart_block = {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+/* how to get the current stack pointer from C */
+register unsigned long current_stack_pointer asm("A0StP") __used;
+
+/* how to get the thread information struct from C */
+static inline struct thread_info *current_thread_info(void)
+{
+	return (struct thread_info *)(current_stack_pointer &
+				      ~(THREAD_SIZE - 1));
+}
+
+#define __HAVE_ARCH_KSTACK_END
+static inline int kstack_end(void *addr)
+{
+	return addr == (void *) (((unsigned long) addr & ~(THREAD_SIZE - 1))
+				 + sizeof(struct thread_info));
+}
+
+#endif
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files may need to
+ *   access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_SIGPENDING		1	/* signal pending */
+#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_SINGLESTEP		3	/* restore singlestep on return to user
+					   mode */
+#define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
+#define TIF_SECCOMP		5	/* secure computing */
+#define TIF_RESTORE_SIGMASK	6	/* restore signal mask in do_signal() */
+#define TIF_NOTIFY_RESUME	7	/* callback before returning to user */
+#define TIF_POLLING_NRFLAG      8	/* true if poll_idle() is polling
+					   TIF_NEED_RESCHED */
+#define TIF_MEMDIE		9	/* is terminating due to OOM killer */
+#define TIF_SYSCALL_TRACEPOINT  10	/* syscall tracepoint instrumentation */
+
+
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+#define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
+
+/* work to do in syscall trace */
+#define _TIF_WORK_SYSCALL_MASK	(_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \
+				 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+				 _TIF_SYSCALL_TRACEPOINT)
+
+/* work to do on any return to u-space */
+#define _TIF_ALLWORK_MASK	(_TIF_SYSCALL_TRACE | _TIF_SIGPENDING      | \
+				 _TIF_NEED_RESCHED  | _TIF_SYSCALL_AUDIT   | \
+				 _TIF_SINGLESTEP    | _TIF_RESTORE_SIGMASK | \
+				 _TIF_NOTIFY_RESUME)
+
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK		(_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \
+				 _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP))
+
+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/metag/include/asm/tlb.h b/arch/metag/include/asm/tlb.h
new file mode 100644
index 0000000..048282f1
--- /dev/null
+++ b/arch/metag/include/asm/tlb.h
@@ -0,0 +1,36 @@
+#ifndef __ASM_METAG_TLB_H
+#define __ASM_METAG_TLB_H
+
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+/* Note, read http://lkml.org/lkml/2004/1/15/6 */
+
+#ifdef CONFIG_METAG_META12
+
+#define tlb_start_vma(tlb, vma)						      \
+	do {								      \
+		if (!tlb->fullmm)					      \
+			flush_cache_range(vma, vma->vm_start, vma->vm_end);   \
+	} while (0)
+
+#define tlb_end_vma(tlb, vma)						      \
+	do {								      \
+		if (!tlb->fullmm)					      \
+			flush_tlb_range(vma, vma->vm_start, vma->vm_end);     \
+	} while (0)
+
+
+#else
+
+#define tlb_start_vma(tlb, vma)			do { } while (0)
+#define tlb_end_vma(tlb, vma)			do { } while (0)
+
+#endif
+
+#define __tlb_remove_tlb_entry(tlb, pte, addr)	do { } while (0)
+#define tlb_flush(tlb)				flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#endif
diff --git a/arch/metag/include/asm/tlbflush.h b/arch/metag/include/asm/tlbflush.h
new file mode 100644
index 0000000..566acf9
--- /dev/null
+++ b/arch/metag/include/asm/tlbflush.h
@@ -0,0 +1,77 @@
+#ifndef __ASM_METAG_TLBFLUSH_H
+#define __ASM_METAG_TLBFLUSH_H
+
+#include <linux/io.h>
+#include <linux/sched.h>
+#include <asm/metag_mem.h>
+#include <asm/pgalloc.h>
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * FIXME: Meta 2 can flush single TLB entries.
+ *
+ */
+
+#if defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP)
+static inline void __flush_tlb(void)
+{
+	/* flush TLB entries for just the current hardware thread */
+	int thread = hard_processor_id();
+	metag_out32(0, (LINSYSCFLUSH_TxMMCU_BASE +
+			LINSYSCFLUSH_TxMMCU_STRIDE * thread));
+}
+#else
+static inline void __flush_tlb(void)
+{
+	/* flush TLB entries for all hardware threads */
+	metag_out32(0, LINSYSCFLUSH_MMCU);
+}
+#endif /* defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP) */
+
+#define flush_tlb() __flush_tlb()
+
+#define flush_tlb_all() __flush_tlb()
+
+#define local_flush_tlb_all() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm == current->active_mm)
+		__flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long addr)
+{
+	flush_tlb_mm(vma->vm_mm);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	flush_tlb_mm(vma->vm_mm);
+}
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+				      unsigned long start, unsigned long end)
+{
+	flush_tlb_mm(mm);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	flush_tlb_all();
+}
+
+#endif /* __ASM_METAG_TLBFLUSH_H */
+
diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h
new file mode 100644
index 0000000..23f5118
--- /dev/null
+++ b/arch/metag/include/asm/topology.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_METAG_TOPOLOGY_H
+#define _ASM_METAG_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+
+/* sched_domains SD_NODE_INIT for Meta machines */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.parent			= NULL,			\
+	.child			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 8,			\
+	.max_interval		= 32,			\
+	.busy_factor		= 32,			\
+	.imbalance_pct		= 125,			\
+	.cache_nice_tries	= 2,			\
+	.busy_idx		= 3,			\
+	.idle_idx		= 2,			\
+	.newidle_idx		= 0,			\
+	.wake_idx		= 0,			\
+	.forkexec_idx		= 0,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_FORK	\
+				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_NEWIDLE	\
+				| SD_SERIALIZE,		\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+
+#define cpu_to_node(cpu)	((void)(cpu), 0)
+#define parent_node(node)	((void)(node), 0)
+
+#define cpumask_of_node(node)	((void)node, cpu_online_mask)
+
+#define pcibus_to_node(bus)	((void)(bus), -1)
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ? \
+					cpu_all_mask : \
+					cpumask_of_node(pcibus_to_node(bus)))
+
+#endif
+
+#define mc_capable()    (1)
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
+
+extern cpumask_t cpu_core_map[NR_CPUS];
+
+#define topology_core_cpumask(cpu)	(&cpu_core_map[cpu])
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_METAG_TOPOLOGY_H */
diff --git a/arch/metag/include/asm/traps.h b/arch/metag/include/asm/traps.h
new file mode 100644
index 0000000..ac80874
--- /dev/null
+++ b/arch/metag/include/asm/traps.h
@@ -0,0 +1,48 @@
+/*
+ *  Copyright (C) 2005,2008 Imagination Technologies
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _METAG_TBIVECTORS_H
+#define _METAG_TBIVECTORS_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/tbx.h>
+
+typedef TBIRES (*kick_irq_func_t)(TBIRES, int, int, int, PTBI, int *);
+
+extern TBIRES kick_handler(TBIRES, int, int, int, PTBI);
+struct kick_irq_handler {
+	struct list_head list;
+	kick_irq_func_t func;
+};
+
+extern void kick_register_func(struct kick_irq_handler *);
+extern void kick_unregister_func(struct kick_irq_handler *);
+
+extern void head_end(TBIRES, unsigned long);
+extern void restart_critical_section(TBIRES State);
+extern TBIRES tail_end_sys(TBIRES, int, int *);
+static inline TBIRES tail_end(TBIRES state)
+{
+	return tail_end_sys(state, -1, NULL);
+}
+
+DECLARE_PER_CPU(PTBI, pTBI);
+extern PTBI pTBI_get(unsigned int);
+
+extern int ret_from_fork(TBIRES arg);
+
+extern int do_page_fault(struct pt_regs *regs, unsigned long address,
+			 unsigned int write_access, unsigned int trapno);
+
+extern TBIRES __TBIUnExpXXX(TBIRES State, int SigNum, int Triggers, int Inst,
+			    PTBI pTBI);
+
+#endif
+
+#endif /* _METAG_TBIVECTORS_H */
diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
new file mode 100644
index 0000000..0748b0a
--- /dev/null
+++ b/arch/metag/include/asm/uaccess.h
@@ -0,0 +1,241 @@
+#ifndef __METAG_UACCESS_H
+#define __METAG_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s)  ((mm_segment_t) { (s) })
+
+#define KERNEL_DS       MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()	(KERNEL_DS)
+#define get_fs()        (current_thread_info()->addr_limit)
+#define set_fs(x)       (current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b)	((a).seg == (b).seg)
+
+#define __kernel_ok (segment_eq(get_fs(), KERNEL_DS))
+/*
+ * Explicitly allow NULL pointers here. Parts of the kernel such
+ * as readv/writev use access_ok to validate pointers, but want
+ * to allow NULL pointers for various reasons. NULL pointers are
+ * safe to allow through because the first page is not mappable on
+ * Meta.
+ *
+ * We also wish to avoid letting user code access the system area
+ * and the kernel half of the address space.
+ */
+#define __user_bad(addr, size) (((addr) > 0 && (addr) < META_MEMORY_BASE) || \
+				((addr) > PAGE_OFFSET &&		\
+				 (addr) < LINCORE_BASE))
+
+static inline int __access_ok(unsigned long addr, unsigned long size)
+{
+	return __kernel_ok || !__user_bad(addr, size);
+}
+
+#define access_ok(type, addr, size) __access_ok((unsigned long)(addr),	\
+						(unsigned long)(size))
+
+static inline int verify_area(int type, const void *addr, unsigned long size)
+{
+	return access_ok(type, addr, size) ? 0 : -EFAULT;
+}
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+struct exception_table_entry {
+	unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ */
+
+#define put_user(x, ptr) \
+	__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define __put_user(x, ptr) \
+	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+
+extern void __put_user_bad(void);
+
+#define __put_user_nocheck(x, ptr, size)		\
+({                                                      \
+	long __pu_err;                                  \
+	__put_user_size((x), (ptr), (size), __pu_err);	\
+	__pu_err;                                       \
+})
+
+#define __put_user_check(x, ptr, size)				\
+({                                                              \
+	long __pu_err = -EFAULT;                                \
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);           \
+	if (access_ok(VERIFY_WRITE, __pu_addr, size))		\
+		__put_user_size((x), __pu_addr, (size), __pu_err);	\
+	__pu_err;                                               \
+})
+
+extern long __put_user_asm_b(unsigned int x, void __user *addr);
+extern long __put_user_asm_w(unsigned int x, void __user *addr);
+extern long __put_user_asm_d(unsigned int x, void __user *addr);
+extern long __put_user_asm_l(unsigned long long x, void __user *addr);
+
+#define __put_user_size(x, ptr, size, retval)			\
+do {                                                            \
+	retval = 0;                                             \
+	switch (size) {                                         \
+	case 1:								\
+		retval = __put_user_asm_b((unsigned int)x, ptr); break;	\
+	case 2:								\
+		retval = __put_user_asm_w((unsigned int)x, ptr); break;	\
+	case 4:								\
+		retval = __put_user_asm_d((unsigned int)x, ptr); break;	\
+	case 8:								\
+		retval = __put_user_asm_l((unsigned long long)x, ptr); break; \
+	default:							\
+		__put_user_bad();					\
+	}								\
+} while (0)
+
+#define get_user(x, ptr) \
+	__get_user_check((x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr) \
+	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+
+extern long __get_user_bad(void);
+
+#define __get_user_nocheck(x, ptr, size)			\
+({                                                              \
+	long __gu_err, __gu_val;                                \
+	__get_user_size(__gu_val, (ptr), (size), __gu_err);	\
+	(x) = (__typeof__(*(ptr)))__gu_val;                     \
+	__gu_err;                                               \
+})
+
+#define __get_user_check(x, ptr, size)					\
+({                                                                      \
+	long __gu_err = -EFAULT, __gu_val = 0;                          \
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
+	if (access_ok(VERIFY_READ, __gu_addr, size))			\
+		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
+	(x) = (__typeof__(*(ptr)))__gu_val;                             \
+	__gu_err;                                                       \
+})
+
+extern unsigned char __get_user_asm_b(const void __user *addr, long *err);
+extern unsigned short __get_user_asm_w(const void __user *addr, long *err);
+extern unsigned int __get_user_asm_d(const void __user *addr, long *err);
+
+#define __get_user_size(x, ptr, size, retval)			\
+do {                                                            \
+	retval = 0;                                             \
+	switch (size) {                                         \
+	case 1:							\
+		x = __get_user_asm_b(ptr, &retval); break;	\
+	case 2:							\
+		x = __get_user_asm_w(ptr, &retval); break;	\
+	case 4:							\
+		x = __get_user_asm_d(ptr, &retval); break;	\
+	default:						\
+		(x) = __get_user_bad();				\
+	}                                                       \
+} while (0)
+
+/*
+ * Copy a null terminated string from userspace.
+ *
+ * Must return:
+ * -EFAULT		for an exception
+ * count		if we hit the buffer limit
+ * bytes copied		if we hit a null byte
+ * (without the null byte)
+ */
+
+extern long __must_check __strncpy_from_user(char *dst, const char __user *src,
+					     long count);
+
+#define strncpy_from_user(dst, src, count) __strncpy_from_user(dst, src, count)
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+extern long __must_check strnlen_user(const char __user *src, long count);
+
+#define strlen_user(str) strnlen_user(str, 32767)
+
+extern unsigned long __must_check __copy_user_zeroing(void *to,
+						      const void __user *from,
+						      unsigned long n);
+
+static inline unsigned long
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		return __copy_user_zeroing(to, from, n);
+	return n;
+}
+
+#define __copy_from_user(to, from, n) __copy_user_zeroing(to, from, n)
+#define __copy_from_user_inatomic __copy_from_user
+
+extern unsigned long __must_check __copy_user(void __user *to,
+					      const void *from,
+					      unsigned long n);
+
+static inline unsigned long copy_to_user(void __user *to, const void *from,
+					 unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		return __copy_user(to, from, n);
+	return n;
+}
+
+#define __copy_to_user(to, from, n) __copy_user(to, from, n)
+#define __copy_to_user_inatomic __copy_to_user
+
+/*
+ * Zero Userspace
+ */
+
+extern unsigned long __must_check __do_clear_user(void __user *to,
+						  unsigned long n);
+
+static inline unsigned long clear_user(void __user *to, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		return __do_clear_user(to, n);
+	return n;
+}
+
+#define __clear_user(to, n)            __do_clear_user(to, n)
+
+#endif /* _METAG_UACCESS_H */
diff --git a/arch/metag/include/asm/unistd.h b/arch/metag/include/asm/unistd.h
new file mode 100644
index 0000000..32955a1
--- /dev/null
+++ b/arch/metag/include/asm/unistd.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <uapi/asm/unistd.h>
+
+#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/metag/include/asm/user_gateway.h b/arch/metag/include/asm/user_gateway.h
new file mode 100644
index 0000000..e404c09
--- /dev/null
+++ b/arch/metag/include/asm/user_gateway.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies
+ */
+
+#ifndef __ASM_METAG_USER_GATEWAY_H
+#define __ASM_METAG_USER_GATEWAY_H
+
+#include <asm/page.h>
+
+/* Page of kernel code accessible to userspace. */
+#define USER_GATEWAY_PAGE	0x6ffff000
+/* Offset of TLS pointer array in gateway page. */
+#define USER_GATEWAY_TLS	0x100
+
+#ifndef __ASSEMBLY__
+
+extern char __user_gateway_start;
+extern char __user_gateway_end;
+
+/* Kernel mapping of the gateway page. */
+extern void *gateway_page;
+
+static inline void set_gateway_tls(void __user *tls_ptr)
+{
+	void **gateway_tls = (void **)(gateway_page + USER_GATEWAY_TLS +
+				       hard_processor_id() * 4);
+
+	*gateway_tls = (__force void *)tls_ptr;
+#ifdef CONFIG_METAG_META12
+	/* Avoid cache aliases on virtually tagged cache. */
+	__builtin_dcache_flush((void *)USER_GATEWAY_PAGE + USER_GATEWAY_TLS +
+				       hard_processor_id() * sizeof(void *));
+#endif
+}
+
+extern int __kuser_get_tls(void);
+extern char *__kuser_get_tls_end[];
+
+extern int __kuser_cmpxchg(int, int, unsigned long *);
+extern char *__kuser_cmpxchg_end[];
+
+#endif
+
+#endif
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
new file mode 100644
index 0000000..876c71f
--- /dev/null
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -0,0 +1,13 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+header-y += byteorder.h
+header-y += ptrace.h
+header-y += resource.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += swab.h
+header-y += unistd.h
+
+generic-y += mman.h
+generic-y += setup.h
diff --git a/arch/metag/include/uapi/asm/byteorder.h b/arch/metag/include/uapi/asm/byteorder.h
new file mode 100644
index 0000000..9558416
--- /dev/null
+++ b/arch/metag/include/uapi/asm/byteorder.h
@@ -0,0 +1 @@
+#include <linux/byteorder/little_endian.h>
diff --git a/arch/metag/include/uapi/asm/ptrace.h b/arch/metag/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000..45d9780
--- /dev/null
+++ b/arch/metag/include/uapi/asm/ptrace.h
@@ -0,0 +1,113 @@
+#ifndef _UAPI_METAG_PTRACE_H
+#define _UAPI_METAG_PTRACE_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * These are the layouts of the regsets returned by the GETREGSET ptrace call
+ */
+
+/* user_gp_regs::status */
+
+/* CBMarker bit (indicates catch state / catch replay) */
+#define USER_GP_REGS_STATUS_CATCH_BIT		(1 << 22)
+#define USER_GP_REGS_STATUS_CATCH_S		22
+/* LSM_STEP field (load/store multiple step) */
+#define USER_GP_REGS_STATUS_LSM_STEP_BITS	(0x7 << 8)
+#define USER_GP_REGS_STATUS_LSM_STEP_S		8
+/* SCC bit (indicates split 16x16 condition flags) */
+#define USER_GP_REGS_STATUS_SCC_BIT		(1 << 4)
+#define USER_GP_REGS_STATUS_SCC_S		4
+
+/* normal condition flags */
+/* CF_Z bit (Zero flag) */
+#define USER_GP_REGS_STATUS_CF_Z_BIT		(1 << 3)
+#define USER_GP_REGS_STATUS_CF_Z_S		3
+/* CF_N bit (Negative flag) */
+#define USER_GP_REGS_STATUS_CF_N_BIT		(1 << 2)
+#define USER_GP_REGS_STATUS_CF_N_S		2
+/* CF_V bit (oVerflow flag) */
+#define USER_GP_REGS_STATUS_CF_V_BIT		(1 << 1)
+#define USER_GP_REGS_STATUS_CF_V_S		1
+/* CF_C bit (Carry flag) */
+#define USER_GP_REGS_STATUS_CF_C_BIT		(1 << 0)
+#define USER_GP_REGS_STATUS_CF_C_S		0
+
+/* split 16x16 condition flags */
+/* SCF_LZ bit (Low Zero flag) */
+#define USER_GP_REGS_STATUS_SCF_LZ_BIT		(1 << 3)
+#define USER_GP_REGS_STATUS_SCF_LZ_S		3
+/* SCF_HZ bit (High Zero flag) */
+#define USER_GP_REGS_STATUS_SCF_HZ_BIT		(1 << 2)
+#define USER_GP_REGS_STATUS_SCF_HZ_S		2
+/* SCF_HC bit (High Carry flag) */
+#define USER_GP_REGS_STATUS_SCF_HC_BIT		(1 << 1)
+#define USER_GP_REGS_STATUS_SCF_HC_S		1
+/* SCF_LC bit (Low Carry flag) */
+#define USER_GP_REGS_STATUS_SCF_LC_BIT		(1 << 0)
+#define USER_GP_REGS_STATUS_SCF_LC_S		0
+
+/**
+ * struct user_gp_regs - User general purpose registers
+ * @dx:		GP data unit regs (dx[reg][unit] = D{unit:0-1}.{reg:0-7})
+ * @ax:		GP address unit regs (ax[reg][unit] = A{unit:0-1}.{reg:0-3})
+ * @pc:		PC register
+ * @status:	TXSTATUS register (condition flags, LSM_STEP etc)
+ * @rpt:	TXRPT registers (branch repeat counter)
+ * @bpobits:	TXBPOBITS register ("branch prediction other" bits)
+ * @mode:	TXMODE register
+ * @_pad1:	Reserved padding to make sizeof obviously 64bit aligned
+ *
+ * This is the user-visible general purpose register state structure.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_PRSTATUS.
+ *
+ * It is also used in the signal context.
+ */
+struct user_gp_regs {
+	unsigned long dx[8][2];
+	unsigned long ax[4][2];
+	unsigned long pc;
+	unsigned long status;
+	unsigned long rpt;
+	unsigned long bpobits;
+	unsigned long mode;
+	unsigned long _pad1;
+};
+
+/**
+ * struct user_cb_regs - User catch buffer registers
+ * @flags:	TXCATCH0 register (fault flags)
+ * @addr:	TXCATCH1 register (fault address)
+ * @data:	TXCATCH2 and TXCATCH3 registers (low and high data word)
+ *
+ * This is the user-visible catch buffer register state structure containing
+ * information about a failed memory access, and allowing the access to be
+ * modified and replayed.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_METAG_CBUF.
+ */
+struct user_cb_regs {
+	unsigned long flags;
+	unsigned long addr;
+	unsigned long long data;
+};
+
+/**
+ * struct user_rp_state - User read pipeline state
+ * @entries:	Read pipeline entries
+ * @mask:	Mask of valid pipeline entries (RPMask from TXDIVTIME register)
+ *
+ * This is the user-visible read pipeline state structure containing the entries
+ * currently in the read pipeline and the mask of valid entries.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_METAG_RPIPE.
+ */
+struct user_rp_state {
+	unsigned long long entries[6];
+	unsigned long mask;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_METAG_PTRACE_H */
diff --git a/arch/metag/include/uapi/asm/resource.h b/arch/metag/include/uapi/asm/resource.h
new file mode 100644
index 0000000..526d23c
--- /dev/null
+++ b/arch/metag/include/uapi/asm/resource.h
@@ -0,0 +1,7 @@
+#ifndef _UAPI_METAG_RESOURCE_H
+#define _UAPI_METAG_RESOURCE_H
+
+#define _STK_LIM_MAX    (1 << 28)
+#include <asm-generic/resource.h>
+
+#endif /* _UAPI_METAG_RESOURCE_H */
diff --git a/arch/metag/include/uapi/asm/sigcontext.h b/arch/metag/include/uapi/asm/sigcontext.h
new file mode 100644
index 0000000..ef79a91
--- /dev/null
+++ b/arch/metag/include/uapi/asm/sigcontext.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_METAG_SIGCONTEXT_H
+#define _ASM_METAG_SIGCONTEXT_H
+
+#include <asm/ptrace.h>
+
+/*
+ * In a sigcontext structure we need to store the active state of the
+ * user process so that it does not get trashed when we call the signal
+ * handler. That not really the same as a user context that we are
+ * going to store on syscall etc.
+ */
+struct sigcontext {
+	struct user_gp_regs regs;	/* needs to be first */
+
+	/*
+	 * Catch registers describing a memory fault.
+	 * If USER_GP_REGS_STATUS_CATCH_BIT is set in regs.status then catch
+	 * buffers have been saved and will be replayed on sigreturn.
+	 * Clear that bit to discard the catch state instead of replaying it.
+	 */
+	struct user_cb_regs cb;
+
+	/*
+	 * Read pipeline state. This will get restored on sigreturn.
+	 */
+	struct user_rp_state rp;
+
+	unsigned long oldmask;
+};
+
+#endif
diff --git a/arch/metag/include/uapi/asm/siginfo.h b/arch/metag/include/uapi/asm/siginfo.h
new file mode 100644
index 0000000..b2e0c8b
--- /dev/null
+++ b/arch/metag/include/uapi/asm/siginfo.h
@@ -0,0 +1,8 @@
+#ifndef _METAG_SIGINFO_H
+#define _METAG_SIGINFO_H
+
+#define __ARCH_SI_TRAPNO
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/arch/metag/include/uapi/asm/swab.h b/arch/metag/include/uapi/asm/swab.h
new file mode 100644
index 0000000..1076b3a
--- /dev/null
+++ b/arch/metag/include/uapi/asm/swab.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_METAG_SWAB_H
+#define __ASM_METAG_SWAB_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm-generic/swab.h>
+
+static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
+{
+	return __builtin_metag_bswaps(x);
+}
+#define __arch_swab16 __arch_swab16
+
+static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
+{
+	return __builtin_metag_bswap(x);
+}
+#define __arch_swab32 __arch_swab32
+
+static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
+{
+	return __builtin_metag_bswapll(x);
+}
+#define __arch_swab64 __arch_swab64
+
+#endif /* __ASM_METAG_SWAB_H */
diff --git a/arch/metag/include/uapi/asm/unistd.h b/arch/metag/include/uapi/asm/unistd.h
new file mode 100644
index 0000000..b80b8e8
--- /dev/null
+++ b/arch/metag/include/uapi/asm/unistd.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+/* metag-specific syscalls. */
+#define __NR_metag_setglobalbit		(__NR_arch_specific_syscall + 1)
+__SYSCALL(__NR_metag_setglobalbit, sys_metag_setglobalbit)
+#define __NR_metag_set_fpu_flags	(__NR_arch_specific_syscall + 2)
+__SYSCALL(__NR_metag_set_fpu_flags, sys_metag_set_fpu_flags)
+#define __NR_metag_set_tls		(__NR_arch_specific_syscall + 3)
+__SYSCALL(__NR_metag_set_tls, sys_metag_set_tls)
+#define __NR_metag_get_tls		(__NR_arch_specific_syscall + 4)
+__SYSCALL(__NR_metag_get_tls, sys_metag_get_tls)
diff --git a/arch/metag/kernel/.gitignore b/arch/metag/kernel/.gitignore
new file mode 100644
index 0000000..c5f676c
--- /dev/null
+++ b/arch/metag/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/metag/kernel/Makefile b/arch/metag/kernel/Makefile
new file mode 100644
index 0000000..d7675f4
--- /dev/null
+++ b/arch/metag/kernel/Makefile
@@ -0,0 +1,39 @@
+#
+# Makefile for the Linux/Meta kernel.
+#
+
+extra-y	+= head.o
+extra-y	+= vmlinux.lds
+
+obj-y	+= cachepart.o
+obj-y	+= clock.o
+obj-y	+= core_reg.o
+obj-y	+= devtree.o
+obj-y	+= dma.o
+obj-y	+= irq.o
+obj-y	+= kick.o
+obj-y	+= machines.o
+obj-y	+= process.o
+obj-y	+= ptrace.o
+obj-y	+= setup.o
+obj-y	+= signal.o
+obj-y	+= stacktrace.o
+obj-y	+= sys_metag.o
+obj-y	+= tbiunexp.o
+obj-y	+= time.o
+obj-y	+= topology.o
+obj-y	+= traps.o
+obj-y	+= user_gateway.o
+
+obj-$(CONFIG_PERF_EVENTS)		+= perf/
+
+obj-$(CONFIG_METAG_COREMEM)		+= coremem.o
+obj-$(CONFIG_METAG_DA)			+= da.o
+obj-$(CONFIG_DYNAMIC_FTRACE)		+= ftrace.o
+obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace_stub.o
+obj-$(CONFIG_MODULES)			+= metag_ksyms.o
+obj-$(CONFIG_MODULES)			+= module.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_callchain.o
+obj-$(CONFIG_SMP)			+= smp.o
+obj-$(CONFIG_METAG_SUSPEND_MEM)		+= suspend.o
+obj-$(CONFIG_METAG_USER_TCM)		+= tcm.o
diff --git a/arch/metag/kernel/asm-offsets.c b/arch/metag/kernel/asm-offsets.c
new file mode 100644
index 0000000..bfc9205
--- /dev/null
+++ b/arch/metag/kernel/asm-offsets.c
@@ -0,0 +1,14 @@
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ */
+
+#include <linux/kbuild.h>
+#include <linux/thread_info.h>
+
+int main(void)
+{
+	DEFINE(THREAD_INFO_SIZE, sizeof(struct thread_info));
+	return 0;
+}
diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c
new file mode 100644
index 0000000..3a589dfb
--- /dev/null
+++ b/arch/metag/kernel/cachepart.c
@@ -0,0 +1,124 @@
+/*
+ * Meta cache partition manipulation.
+ *
+ * Copyright 2010 Imagination Technologies Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <asm/processor.h>
+#include <asm/cachepart.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+
+#define SYSC_DCPART(n)	(SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
+#define SYSC_ICPART(n)	(SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
+
+#define CACHE_ASSOCIATIVITY 4 /* 4 way set-assosiative */
+#define ICACHE 0
+#define DCACHE 1
+
+/* The CORE_CONFIG2 register is not available on Meta 1 */
+#ifdef CONFIG_METAG_META21
+unsigned int get_dcache_size(void)
+{
+	unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
+	return 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS)
+				>> METAC_CORECFG2_DCSZ_S);
+}
+
+unsigned int get_icache_size(void)
+{
+	unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
+	return 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS)
+				>> METAC_CORE_C2ICSZ_S);
+}
+
+unsigned int get_global_dcache_size(void)
+{
+	unsigned int cpart = metag_in32(SYSC_DCPART(hard_processor_id()));
+	unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
+	return (get_dcache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
+}
+
+unsigned int get_global_icache_size(void)
+{
+	unsigned int cpart = metag_in32(SYSC_ICPART(hard_processor_id()));
+	unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
+	return (get_icache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
+}
+
+static unsigned int get_thread_cache_size(unsigned int cache, int thread_id)
+{
+	unsigned int cache_size;
+	unsigned int t_cache_part;
+	unsigned int isEnabled;
+	unsigned int offset = 0;
+	isEnabled = (cache == DCACHE ? metag_in32(MMCU_DCACHE_CTRL_ADDR) & 0x1 :
+		metag_in32(MMCU_ICACHE_CTRL_ADDR) & 0x1);
+	if (!isEnabled)
+		return 0;
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+	/* Checking for global cache */
+	cache_size = (cache == DCACHE ? get_global_dache_size() :
+		get_global_icache_size());
+	offset = 8;
+#else
+	cache_size = (cache == DCACHE ? get_dcache_size() :
+		get_icache_size());
+#endif
+	t_cache_part = (cache == DCACHE ?
+		(metag_in32(SYSC_DCPART(thread_id)) >> offset) & 0xF :
+		(metag_in32(SYSC_ICPART(thread_id)) >> offset) & 0xF);
+	switch (t_cache_part) {
+	case 0xF:
+		return cache_size;
+	case 0x7:
+		return cache_size / 2;
+	case 0x3:
+		return cache_size / 4;
+	case 0x1:
+		return cache_size / 8;
+	case 0:
+		return cache_size / 16;
+	}
+	return -1;
+}
+
+void check_for_cache_aliasing(int thread_id)
+{
+	unsigned int thread_cache_size;
+	unsigned int cache_type;
+	for (cache_type = ICACHE; cache_type <= DCACHE; cache_type++) {
+		thread_cache_size =
+				get_thread_cache_size(cache_type, thread_id);
+		if (thread_cache_size < 0)
+			pr_emerg("Can't read %s cache size", \
+				 cache_type ? "DCACHE" : "ICACHE");
+		else if (thread_cache_size == 0)
+			/* Cache is off. No need to check for aliasing */
+			continue;
+		if (thread_cache_size / CACHE_ASSOCIATIVITY > PAGE_SIZE) {
+			pr_emerg("Cache aliasing detected in %s on Thread %d",
+				 cache_type ? "DCACHE" : "ICACHE", thread_id);
+			pr_warn("Total %s size: %u bytes",
+				cache_type ? "DCACHE" : "ICACHE ",
+				cache_type ? get_dcache_size()
+				: get_icache_size());
+			pr_warn("Thread %s size: %d bytes",
+				cache_type ? "CACHE" : "ICACHE",
+				thread_cache_size);
+			pr_warn("Page Size: %lu bytes", PAGE_SIZE);
+		}
+	}
+}
+
+#else
+
+void check_for_cache_aliasing(int thread_id)
+{
+	return;
+}
+
+#endif
diff --git a/arch/metag/kernel/clock.c b/arch/metag/kernel/clock.c
new file mode 100644
index 0000000..defc840
--- /dev/null
+++ b/arch/metag/kernel/clock.c
@@ -0,0 +1,53 @@
+/*
+ * arch/metag/kernel/clock.c
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <asm/param.h>
+#include <asm/clock.h>
+
+struct meta_clock_desc _meta_clock;
+
+/* Default machine get_core_freq callback. */
+static unsigned long get_core_freq_default(void)
+{
+#ifdef CONFIG_METAG_META21
+	/*
+	 * Meta 2 cores divide down the core clock for the Meta timers, so we
+	 * can estimate the core clock from the divider.
+	 */
+	return (metag_in32(EXPAND_TIMER_DIV) + 1) * 1000000;
+#else
+	/*
+	 * On Meta 1 we don't know the core clock, but assuming the Meta timer
+	 * is correct it can be estimated based on loops_per_jiffy.
+	 */
+	return (loops_per_jiffy * HZ * 5) >> 1;
+#endif
+}
+
+/**
+ * setup_meta_clocks() - Set up the Meta clock.
+ * @desc:	Clock descriptor usually provided by machine description
+ *
+ * Ensures all callbacks are valid.
+ */
+void __init setup_meta_clocks(struct meta_clock_desc *desc)
+{
+	/* copy callbacks */
+	if (desc)
+		_meta_clock = *desc;
+
+	/* set fallback functions */
+	if (!_meta_clock.get_core_freq)
+		_meta_clock.get_core_freq = get_core_freq_default;
+}
+
diff --git a/arch/metag/kernel/core_reg.c b/arch/metag/kernel/core_reg.c
new file mode 100644
index 0000000..671cce8
--- /dev/null
+++ b/arch/metag/kernel/core_reg.c
@@ -0,0 +1,117 @@
+/*
+ *  Support for reading and writing Meta core internal registers.
+ *
+ *  Copyright (C) 2011 Imagination Technologies Ltd.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/hwthread.h>
+#include <asm/io.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+
+#define UNIT_BIT_MASK		TXUXXRXRQ_UXX_BITS
+#define REG_BIT_MASK		TXUXXRXRQ_RX_BITS
+#define THREAD_BIT_MASK		TXUXXRXRQ_TX_BITS
+
+#define UNIT_SHIFTS		TXUXXRXRQ_UXX_S
+#define REG_SHIFTS		TXUXXRXRQ_RX_S
+#define THREAD_SHIFTS		TXUXXRXRQ_TX_S
+
+#define UNIT_VAL(x)		(((x) << UNIT_SHIFTS) & UNIT_BIT_MASK)
+#define REG_VAL(x)		(((x) << REG_SHIFTS) & REG_BIT_MASK)
+#define THREAD_VAL(x)		(((x) << THREAD_SHIFTS) & THREAD_BIT_MASK)
+
+/*
+ * core_reg_write() - modify the content of a register in a core unit.
+ * @unit:	The unit to be modified.
+ * @reg:	Register number within the unit.
+ * @thread:	The thread we want to access.
+ * @val:	The new value to write.
+ *
+ * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
+ * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
+ * TXPOLLI_REGNUM, etc).
+ */
+void core_reg_write(int unit, int reg, int thread, unsigned int val)
+{
+	unsigned long flags;
+
+	/* TXUCT_ID has its own memory mapped registers */
+	if (unit == TXUCT_ID) {
+		void __iomem *cu_reg = __CU_addr(thread, reg);
+		metag_out32(val, cu_reg);
+		return;
+	}
+
+	__global_lock2(flags);
+
+	/* wait for ready */
+	while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+		udelay(10);
+
+	/* set the value to write */
+	metag_out32(val, TXUXXRXDT);
+
+	/* set the register to write */
+	val = UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread);
+	metag_out32(val, TXUXXRXRQ);
+
+	/* wait for finish */
+	while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+		udelay(10);
+
+	__global_unlock2(flags);
+}
+EXPORT_SYMBOL(core_reg_write);
+
+/*
+ * core_reg_read() - read the content of a register in a core unit.
+ * @unit:	The unit to be modified.
+ * @reg:	Register number within the unit.
+ * @thread:	The thread we want to access.
+ *
+ * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
+ * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
+ * TXPOLLI_REGNUM, etc).
+ */
+unsigned int core_reg_read(int unit, int reg, int thread)
+{
+	unsigned long flags;
+	unsigned int val;
+
+	/* TXUCT_ID has its own memory mapped registers */
+	if (unit == TXUCT_ID) {
+		void __iomem *cu_reg = __CU_addr(thread, reg);
+		val = metag_in32(cu_reg);
+		return val;
+	}
+
+	__global_lock2(flags);
+
+	/* wait for ready */
+	while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+		udelay(10);
+
+	/* set the register to read */
+	val = (UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread) |
+							TXUXXRXRQ_RDnWR_BIT);
+	metag_out32(val, TXUXXRXRQ);
+
+	/* wait for finish */
+	while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+		udelay(10);
+
+	/* read the register value */
+	val = metag_in32(TXUXXRXDT);
+
+	__global_unlock2(flags);
+
+	return val;
+}
+EXPORT_SYMBOL(core_reg_read);
diff --git a/arch/metag/kernel/da.c b/arch/metag/kernel/da.c
new file mode 100644
index 0000000..52aabb6
--- /dev/null
+++ b/arch/metag/kernel/da.c
@@ -0,0 +1,23 @@
+/*
+ * Meta DA JTAG debugger control.
+ *
+ * Copyright 2012 Imagination Technologies Ltd.
+ */
+
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <asm/da.h>
+#include <asm/metag_mem.h>
+
+bool _metag_da_present;
+
+int __init metag_da_probe(void)
+{
+	_metag_da_present = (metag_in32(T0VECINT_BHALT) == 1);
+	if (_metag_da_present)
+		pr_info("DA present\n");
+	else
+		pr_info("DA not present\n");
+	return 0;
+}
diff --git a/arch/metag/kernel/devtree.c b/arch/metag/kernel/devtree.c
new file mode 100644
index 0000000..7cd0252
--- /dev/null
+++ b/arch/metag/kernel/devtree.c
@@ -0,0 +1,114 @@
+/*
+ *  linux/arch/metag/kernel/devtree.c
+ *
+ *  Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ *  Based on ARM version:
+ *  Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/mach/arch.h>
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	pr_err("%s(%llx, %llx)\n",
+	       __func__, base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return alloc_bootmem_align(size, align);
+}
+
+/**
+ * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
+ * @dt:		virtual address pointer to dt blob
+ *
+ * If a dtb was passed to the kernel, then use it to choose the correct
+ * machine_desc and to setup the system.
+ */
+struct machine_desc * __init setup_machine_fdt(void *dt)
+{
+	struct boot_param_header *devtree = dt;
+	struct machine_desc *mdesc, *mdesc_best = NULL;
+	unsigned int score, mdesc_score = ~1;
+	unsigned long dt_root;
+	const char *model;
+
+	/* check device tree validity */
+	if (be32_to_cpu(devtree->magic) != OF_DT_HEADER)
+		return NULL;
+
+	/* Search the mdescs for the 'best' compatible value match */
+	initial_boot_params = devtree;
+	dt_root = of_get_flat_dt_root();
+
+	for_each_machine_desc(mdesc) {
+		score = of_flat_dt_match(dt_root, mdesc->dt_compat);
+		if (score > 0 && score < mdesc_score) {
+			mdesc_best = mdesc;
+			mdesc_score = score;
+		}
+	}
+	if (!mdesc_best) {
+		const char *prop;
+		long size;
+
+		pr_err("\nError: unrecognized/unsupported device tree compatible list:\n[ ");
+
+		prop = of_get_flat_dt_prop(dt_root, "compatible", &size);
+		if (prop) {
+			while (size > 0) {
+				printk("'%s' ", prop);
+				size -= strlen(prop) + 1;
+				prop += strlen(prop) + 1;
+			}
+		}
+		printk("]\n\n");
+
+		dump_machine_table(); /* does not return */
+	}
+
+	model = of_get_flat_dt_prop(dt_root, "model", NULL);
+	if (!model)
+		model = of_get_flat_dt_prop(dt_root, "compatible", NULL);
+	if (!model)
+		model = "<unknown>";
+	pr_info("Machine: %s, model: %s\n", mdesc_best->name, model);
+
+	/* Retrieve various information from the /chosen node */
+	of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
+
+	return mdesc_best;
+}
+
+/**
+ * copy_fdt - Copy device tree into non-init memory.
+ *
+ * We must copy the flattened device tree blob into non-init memory because the
+ * unflattened device tree will reference the strings in it directly.
+ */
+void __init copy_fdt(void)
+{
+	void *alloc = early_init_dt_alloc_memory_arch(
+			be32_to_cpu(initial_boot_params->totalsize), 0x40);
+	if (alloc) {
+		memcpy(alloc, initial_boot_params,
+		       be32_to_cpu(initial_boot_params->totalsize));
+		initial_boot_params = alloc;
+	}
+}
diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c
new file mode 100644
index 0000000..8c00ded
--- /dev/null
+++ b/arch/metag/kernel/dma.c
@@ -0,0 +1,507 @@
+/*
+ *  Meta version derived from arch/powerpc/lib/dma-noncoherent.c
+ *    Copyright (C) 2008 Imagination Technologies Ltd.
+ *
+ *  PowerPC version derived from arch/arm/mm/consistent.c
+ *    Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * Consistent memory allocators.  Used for DMA devices that want to
+ * share uncached memory with the processor core.  The function return
+ * is the virtual address and 'dma_handle' is the physical address.
+ * Mostly stolen from the ARM port, with some changes for PowerPC.
+ *						-- Dan
+ *
+ * Reorganized to get rid of the arch-specific consistent_* functions
+ * and provide non-coherent implementations for the DMA API. -Matt
+ *
+ * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
+ * implementation. This is pulled straight from ARM and barely
+ * modified. -Matt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+#define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_START) \
+					>> PAGE_SHIFT)
+
+static u64 get_coherent_dma_mask(struct device *dev)
+{
+	u64 mask = ~0ULL;
+
+	if (dev) {
+		mask = dev->coherent_dma_mask;
+
+		/*
+		 * Sanity check the DMA mask - it must be non-zero, and
+		 * must be able to be satisfied by a DMA allocation.
+		 */
+		if (mask == 0) {
+			dev_warn(dev, "coherent DMA mask is unset\n");
+			return 0;
+		}
+	}
+
+	return mask;
+}
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct metag_vm_region	region;
+ *    unsigned long	flags;
+ *    struct page	**pages;
+ *    unsigned int	nr_pages;
+ *    unsigned long	phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call metag_vm_region_alloc with an appropriate
+ * struct metag_vm_region head (eg):
+ *
+ *  struct metag_vm_region vmalloc_head = {
+ *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *	.vm_start	= VMALLOC_START,
+ *	.vm_end		= VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling
+ * metag_vm_region_alloc().
+ */
+struct metag_vm_region {
+	struct list_head vm_list;
+	unsigned long vm_start;
+	unsigned long vm_end;
+	struct page		*vm_pages;
+	int			vm_active;
+};
+
+static struct metag_vm_region consistent_head = {
+	.vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
+	.vm_start = CONSISTENT_START,
+	.vm_end = CONSISTENT_END,
+};
+
+static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region
+						     *head, size_t size,
+						     gfp_t gfp)
+{
+	unsigned long addr = head->vm_start, end = head->vm_end - size;
+	unsigned long flags;
+	struct metag_vm_region *c, *new;
+
+	new = kmalloc(sizeof(struct metag_vm_region), gfp);
+	if (!new)
+		goto out;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if ((addr + size) < addr)
+			goto nospc;
+		if ((addr + size) <= c->vm_start)
+			goto found;
+		addr = c->vm_end;
+		if (addr > end)
+			goto nospc;
+	}
+
+found:
+	/*
+	 * Insert this entry _before_ the one we found.
+	 */
+	list_add_tail(&new->vm_list, &c->vm_list);
+	new->vm_start = addr;
+	new->vm_end = addr + size;
+	new->vm_active = 1;
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	return new;
+
+nospc:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	kfree(new);
+out:
+	return NULL;
+}
+
+static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region
+						    *head, unsigned long addr)
+{
+	struct metag_vm_region *c;
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if (c->vm_active && c->vm_start == addr)
+			goto out;
+	}
+	c = NULL;
+out:
+	return c;
+}
+
+/*
+ * Allocate DMA-coherent memory space and return both the kernel remapped
+ * virtual and bus address for that space.
+ */
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *handle, gfp_t gfp)
+{
+	struct page *page;
+	struct metag_vm_region *c;
+	unsigned long order;
+	u64 mask = get_coherent_dma_mask(dev);
+	u64 limit;
+
+	if (!consistent_pte) {
+		pr_err("%s: not initialised\n", __func__);
+		dump_stack();
+		return NULL;
+	}
+
+	if (!mask)
+		goto no_page;
+	size = PAGE_ALIGN(size);
+	limit = (mask + 1) & ~mask;
+	if ((limit && size >= limit)
+	    || size >= (CONSISTENT_END - CONSISTENT_START)) {
+		pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n",
+			size, mask);
+		return NULL;
+	}
+
+	order = get_order(size);
+
+	if (mask != 0xffffffff)
+		gfp |= GFP_DMA;
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		goto no_page;
+
+	/*
+	 * Invalidate any data that might be lurking in the
+	 * kernel direct-mapped region for device DMA.
+	 */
+	{
+		void *kaddr = page_address(page);
+		memset(kaddr, 0, size);
+		flush_dcache_region(kaddr, size);
+	}
+
+	/*
+	 * Allocate a virtual address in the consistent mapping region.
+	 */
+	c = metag_vm_region_alloc(&consistent_head, size,
+				  gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+	if (c) {
+		unsigned long vaddr = c->vm_start;
+		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
+		struct page *end = page + (1 << order);
+
+		c->vm_pages = page;
+		split_page(page, order);
+
+		/*
+		 * Set the "dma handle"
+		 */
+		*handle = page_to_bus(page);
+
+		do {
+			BUG_ON(!pte_none(*pte));
+
+			SetPageReserved(page);
+			set_pte_at(&init_mm, vaddr,
+				   pte, mk_pte(page,
+					       pgprot_writecombine
+					       (PAGE_KERNEL)));
+			page++;
+			pte++;
+			vaddr += PAGE_SIZE;
+		} while (size -= PAGE_SIZE);
+
+		/*
+		 * Free the otherwise unused pages.
+		 */
+		while (page < end) {
+			__free_page(page);
+			page++;
+		}
+
+		return (void *)c->vm_start;
+	}
+
+	if (page)
+		__free_pages(page, order);
+no_page:
+	return NULL;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+/*
+ * free a page as defined by the above mapping.
+ */
+void dma_free_coherent(struct device *dev, size_t size,
+		       void *vaddr, dma_addr_t dma_handle)
+{
+	struct metag_vm_region *c;
+	unsigned long flags, addr;
+	pte_t *ptep;
+
+	size = PAGE_ALIGN(size);
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr);
+	if (!c)
+		goto no_area;
+
+	c->vm_active = 0;
+	if ((c->vm_end - c->vm_start) != size) {
+		pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
+		       __func__, c->vm_end - c->vm_start, size);
+		dump_stack();
+		size = c->vm_end - c->vm_start;
+	}
+
+	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+	addr = c->vm_start;
+	do {
+		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+		unsigned long pfn;
+
+		ptep++;
+		addr += PAGE_SIZE;
+
+		if (!pte_none(pte) && pte_present(pte)) {
+			pfn = pte_pfn(pte);
+
+			if (pfn_valid(pfn)) {
+				struct page *page = pfn_to_page(pfn);
+				ClearPageReserved(page);
+
+				__free_page(page);
+				continue;
+			}
+		}
+
+		pr_crit("%s: bad page in kernel page table\n",
+			__func__);
+	} while (size -= PAGE_SIZE);
+
+	flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+	list_del(&c->vm_list);
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	kfree(c);
+	return;
+
+no_area:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	pr_err("%s: trying to free invalid coherent area: %p\n",
+	       __func__, vaddr);
+	dump_stack();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+
+static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		    void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	int ret = -ENXIO;
+
+	unsigned long flags, user_size, kern_size;
+	struct metag_vm_region *c;
+
+	user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+	c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	if (c) {
+		unsigned long off = vma->vm_pgoff;
+
+		kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
+
+		if (off < kern_size &&
+		    user_size <= (kern_size - off)) {
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      page_to_pfn(c->vm_pages) + off,
+					      user_size << PAGE_SHIFT,
+					      vma->vm_page_prot);
+		}
+	}
+
+
+	return ret;
+}
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+		      void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_coherent);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+			  void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_writecombine);
+
+
+
+
+/*
+ * Initialise the consistent memory allocation.
+ */
+static int __init dma_alloc_init(void)
+{
+	pgd_t *pgd, *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+	pte_t *pte;
+	int ret = 0;
+
+	do {
+		int offset = pgd_index(CONSISTENT_START);
+		pgd = pgd_offset(&init_mm, CONSISTENT_START);
+		pud = pud_alloc(&init_mm, pgd, CONSISTENT_START);
+		pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START);
+		if (!pmd) {
+			pr_err("%s: no pmd tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+		WARN_ON(!pmd_none(*pmd));
+
+		pte = pte_alloc_kernel(pmd, CONSISTENT_START);
+		if (!pte) {
+			pr_err("%s: no pte tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+
+		pgd_k = ((pgd_t *) mmu_get_base()) + offset;
+		pud_k = pud_offset(pgd_k, CONSISTENT_START);
+		pmd_k = pmd_offset(pud_k, CONSISTENT_START);
+		set_pmd(pmd_k, *pmd);
+
+		consistent_pte = pte;
+	} while (0);
+
+	return ret;
+}
+early_initcall(dma_alloc_init);
+
+/*
+ * make an area consistent to devices.
+ */
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction)
+{
+	/*
+	 * Ensure any writes get through the write combiner. This is necessary
+	 * even with DMA_FROM_DEVICE, or the write may dirty the cache after
+	 * we've invalidated it and get written back during the DMA.
+	 */
+
+	barrier();
+
+	switch (dma_direction) {
+	case DMA_BIDIRECTIONAL:
+		/*
+		 * Writeback to ensure the device can see our latest changes and
+		 * so that we have no dirty lines, and invalidate the cache
+		 * lines too in preparation for receiving the buffer back
+		 * (dma_sync_for_cpu) later.
+		 */
+		flush_dcache_region(vaddr, size);
+		break;
+	case DMA_TO_DEVICE:
+		/*
+		 * Writeback to ensure the device can see our latest changes.
+		 * There's no need to invalidate as the device shouldn't write
+		 * to the buffer.
+		 */
+		writeback_dcache_region(vaddr, size);
+		break;
+	case DMA_FROM_DEVICE:
+		/*
+		 * Invalidate to ensure we have no dirty lines that could get
+		 * written back during the DMA. It's also safe to flush
+		 * (writeback) here if necessary.
+		 */
+		invalidate_dcache_region(vaddr, size);
+		break;
+	case DMA_NONE:
+		BUG();
+	}
+
+	wmb();
+}
+EXPORT_SYMBOL(dma_sync_for_device);
+
+/*
+ * make an area consistent to the core.
+ */
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction)
+{
+	/*
+	 * Hardware L2 cache prefetch doesn't occur across 4K physical
+	 * boundaries, however according to Documentation/DMA-API-HOWTO.txt
+	 * kmalloc'd memory is DMA'able, so accesses in nearby memory could
+	 * trigger a cache fill in the DMA buffer.
+	 *
+	 * This should never cause dirty lines, so a flush or invalidate should
+	 * be safe to allow us to see data from the device.
+	 */
+	if (_meta_l2c_pf_is_enabled()) {
+		switch (dma_direction) {
+		case DMA_BIDIRECTIONAL:
+		case DMA_FROM_DEVICE:
+			invalidate_dcache_region(vaddr, size);
+			break;
+		case DMA_TO_DEVICE:
+			/* The device shouldn't have written to the buffer */
+			break;
+		case DMA_NONE:
+			BUG();
+		}
+	}
+
+	rmb();
+}
+EXPORT_SYMBOL(dma_sync_for_cpu);
diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c
new file mode 100644
index 0000000..a774f32
--- /dev/null
+++ b/arch/metag/kernel/ftrace.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ * Licensed under the GPL
+ *
+ * Dynamic ftrace support.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+
+#define D04_MOVT_TEMPLATE	0x02200005
+#define D04_CALL_TEMPLATE	0xAC200005
+#define D1RTP_MOVT_TEMPLATE	0x03200005
+#define D1RTP_CALL_TEMPLATE	0xAC200006
+
+static const unsigned long NOP[2] = {0xa0fffffe, 0xa0fffffe};
+static unsigned long movt_and_call_insn[2];
+
+static unsigned char *ftrace_nop_replace(void)
+{
+	return (char *)&NOP[0];
+}
+
+static unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+	unsigned long hi16, low16;
+
+	hi16 = (addr & 0xffff0000) >> 13;
+	low16 = (addr & 0x0000ffff) << 3;
+
+	/*
+	 * The compiler makes the call to mcount_wrapper()
+	 * (Meta's wrapper around mcount()) through the register
+	 * D0.4. So whenever we're patching one of those compiler-generated
+	 * calls we also need to go through D0.4. Otherwise use D1RtP.
+	 */
+	if (pc == (unsigned long)&ftrace_call) {
+		writel(D1RTP_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]);
+		writel(D1RTP_CALL_TEMPLATE | low16, &movt_and_call_insn[1]);
+	} else {
+		writel(D04_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]);
+		writel(D04_CALL_TEMPLATE | low16, &movt_and_call_insn[1]);
+	}
+
+	return (unsigned char *)&movt_and_call_insn[0];
+}
+
+static int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
+			      unsigned char *new_code)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine.
+	 */
+
+	/* read the text we want to modify */
+	if (probe_kernel_read(replaced, (void *)pc, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+	/* replace the text with the new text */
+	if (probe_kernel_write((void *)pc, new_code, MCOUNT_INSN_SIZE))
+		return -EPERM;
+
+	flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	int ret;
+	unsigned long pc;
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+	pc = (unsigned long)&ftrace_call;
+	memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+	new = ftrace_call_replace(pc, (unsigned long)func);
+	ret = ftrace_modify_code(pc, old, new);
+
+	return ret;
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char *new, *old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_call_replace(ip, addr);
+	new = ftrace_nop_replace();
+
+	return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char *new, *old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+
+	return ftrace_modify_code(ip, old, new);
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+	/* The return code is returned via data */
+	writel(0, data);
+
+	return 0;
+}
diff --git a/arch/metag/kernel/ftrace_stub.S b/arch/metag/kernel/ftrace_stub.S
new file mode 100644
index 0000000..e70bff7
--- /dev/null
+++ b/arch/metag/kernel/ftrace_stub.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ * Licensed under the GPL
+ *
+ */
+
+#include <asm/ftrace.h>
+
+	.text
+#ifdef CONFIG_DYNAMIC_FTRACE
+	.global	_mcount_wrapper
+	.type	_mcount_wrapper,function
+_mcount_wrapper:
+	MOV	PC,D0.4
+
+	.global _ftrace_caller
+	.type	_ftrace_caller,function
+_ftrace_caller:
+	MOVT    D0Re0,#HI(_function_trace_stop)
+	ADD	D0Re0,D0Re0,#LO(_function_trace_stop)
+	GETD	D0Re0,[D0Re0]
+	CMP	D0Re0,#0
+	BEQ	$Lcall_stub
+	MOV	PC,D0.4
+$Lcall_stub:
+	MSETL   [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4
+	MOV     D1Ar1, D0.4
+	MOV     D0Ar2, D1RtP
+	SUB	D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE
+
+	.global _ftrace_call
+_ftrace_call:
+	MOVT	D1RtP,#HI(_ftrace_stub)
+	CALL	D1RtP,#LO(_ftrace_stub)
+	GETL    D0.4,  D1RtP, [A0StP++#(-8)]
+	GETL    D0Ar2, D1Ar1, [A0StP++#(-8)]
+	GETL    D0Ar4, D1Ar3, [A0StP++#(-8)]
+	GETL    D0Ar6, D1Ar5, [A0StP++#(-8)]
+	MOV     PC, D0.4
+#else
+
+	.global	_mcount_wrapper
+	.type	_mcount_wrapper,function
+_mcount_wrapper:
+	MOVT    D0Re0,#HI(_function_trace_stop)
+	ADD	D0Re0,D0Re0,#LO(_function_trace_stop)
+	GETD	D0Re0,[D0Re0]
+	CMP	D0Re0,#0
+	BEQ	$Lcall_mcount
+	MOV	PC,D0.4
+$Lcall_mcount:
+	MSETL   [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4
+	MOV     D1Ar1, D0.4
+	MOV     D0Ar2, D1RtP
+	MOVT    D0Re0,#HI(_ftrace_trace_function)
+	ADD	D0Re0,D0Re0,#LO(_ftrace_trace_function)
+	GET	D1Ar3,[D0Re0]
+	MOVT	D1Re0,#HI(_ftrace_stub)
+	ADD	D1Re0,D1Re0,#LO(_ftrace_stub)
+	CMP	D1Ar3,D1Re0
+	BEQ	$Ltrace_exit
+	MOV	D1RtP,D1Ar3
+	SUB	D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE
+	SWAP	PC,D1RtP
+$Ltrace_exit:
+	GETL    D0.4,  D1RtP, [A0StP++#(-8)]
+	GETL    D0Ar2, D1Ar1, [A0StP++#(-8)]
+	GETL    D0Ar4, D1Ar3, [A0StP++#(-8)]
+	GETL    D0Ar6, D1Ar5, [A0StP++#(-8)]
+	MOV     PC, D0.4
+
+#endif	/* CONFIG_DYNAMIC_FTRACE */
+
+	.global _ftrace_stub
+_ftrace_stub:
+	MOV 	PC,D1RtP
diff --git a/arch/metag/kernel/head.S b/arch/metag/kernel/head.S
new file mode 100644
index 0000000..969dffa
--- /dev/null
+++ b/arch/metag/kernel/head.S
@@ -0,0 +1,57 @@
+	! Copyright 2005,2006,2007,2009 Imagination Technologies
+
+#include <linux/init.h>
+#include <generated/asm-offsets.h>
+#undef __exit
+
+	__HEAD
+	! Setup the stack and get going into _metag_start_kernel
+	.global	__start
+	.type   __start,function
+__start:
+	! D1Ar1 contains pTBI (ISTAT)
+	! D0Ar2 contains pTBI
+	! D1Ar3 contains __pTBISegs
+	! D0Ar4 contains kernel arglist pointer
+
+	MOVT    D0Re0,#HI(___pTBIs)
+	ADD     D0Re0,D0Re0,#LO(___pTBIs)
+	SETL    [D0Re0],D0Ar2,D1Ar1
+	MOVT    D0Re0,#HI(___pTBISegs)
+	ADD     D0Re0,D0Re0,#LO(___pTBISegs)
+	SETD    [D0Re0],D1Ar3
+	MOV	A0FrP,#0
+	MOV	D0Re0,#0
+	MOV	D1Re0,#0
+	MOV	D1Ar3,#0
+	MOV	D1Ar1,D0Ar4			!Store kernel boot params
+	MOV	D1Ar5,#0
+	MOV	D0Ar6,#0
+#ifdef CONFIG_METAG_DSP
+	MOV	D0.8,#0
+#endif
+	MOVT    A0StP,#HI(_init_thread_union)
+	ADD	A0StP,A0StP,#LO(_init_thread_union)
+	ADD     A0StP,A0StP,#THREAD_INFO_SIZE
+	MOVT	D1RtP,#HI(_metag_start_kernel)
+	CALL	D1RtP,#LO(_metag_start_kernel)
+	.size	__start,.-__start
+
+	!! Needed by TBX
+	.global	__exit
+	.type   __exit,function
+__exit:
+	XOR     TXENABLE,D0Re0,D0Re0
+	.size	__exit,.-__exit
+
+#ifdef CONFIG_SMP
+	.global _secondary_startup
+	.type _secondary_startup,function
+_secondary_startup:
+	MOVT	A0StP,#HI(_secondary_data_stack)
+	ADD	A0StP,A0StP,#LO(_secondary_data_stack)
+	GETD	A0StP,[A0StP]
+	ADD	A0StP,A0StP,#THREAD_INFO_SIZE
+	B	_secondary_start_kernel
+	.size	_secondary_startup,.-_secondary_startup
+#endif
diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c
new file mode 100644
index 0000000..87707ef
--- /dev/null
+++ b/arch/metag/kernel/irq.c
@@ -0,0 +1,323 @@
+/*
+ * Linux/Meta general interrupt handling code
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqchip/metag.h>
+#include <linux/irqdomain.h>
+#include <linux/ratelimit.h>
+
+#include <asm/core_reg.h>
+#include <asm/mach/arch.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_4KSTACKS
+union irq_ctx {
+	struct thread_info      tinfo;
+	u32                     stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
+static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
+#endif
+
+struct irq_domain *root_domain;
+
+static unsigned int startup_meta_irq(struct irq_data *data)
+{
+	tbi_startup_interrupt(data->hwirq);
+	return 0;
+}
+
+static void shutdown_meta_irq(struct irq_data *data)
+{
+	tbi_shutdown_interrupt(data->hwirq);
+}
+
+void do_IRQ(int irq, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+#ifdef CONFIG_4KSTACKS
+	struct irq_desc *desc;
+	union irq_ctx *curctx, *irqctx;
+	u32 *isp;
+#endif
+
+	irq_enter();
+
+	irq = irq_linear_revmap(root_domain, irq);
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	/* Debugging check for stack overflow: is there less than 1KB free? */
+	{
+		unsigned long sp;
+
+		sp = __core_reg_get(A0StP);
+		sp &= THREAD_SIZE - 1;
+
+		if (unlikely(sp > (THREAD_SIZE - 1024)))
+			pr_err("Stack overflow in do_IRQ: %ld\n", sp);
+	}
+#endif
+
+
+#ifdef CONFIG_4KSTACKS
+	curctx = (union irq_ctx *) current_thread_info();
+	irqctx = hardirq_ctx[smp_processor_id()];
+
+	/*
+	 * this is where we switch to the IRQ stack. However, if we are
+	 * already using the IRQ stack (because we interrupted a hardirq
+	 * handler) we can't do that and just have to keep using the
+	 * current stack (which is the irq stack already after all)
+	 */
+	if (curctx != irqctx) {
+		/* build the stack frame on the IRQ stack */
+		isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
+		irqctx->tinfo.task = curctx->tinfo.task;
+
+		/*
+		 * Copy the softirq bits in preempt_count so that the
+		 * softirq checks work in the hardirq context.
+		 */
+		irqctx->tinfo.preempt_count =
+			(irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
+			(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+
+		desc = irq_to_desc(irq);
+
+		asm volatile (
+			"MOV   D0.5,%0\n"
+			"MOV   D1Ar1,%1\n"
+			"MOV   D1RtP,%2\n"
+			"MOV   D0Ar2,%3\n"
+			"SWAP  A0StP,D0.5\n"
+			"SWAP  PC,D1RtP\n"
+			"MOV   A0StP,D0.5\n"
+			:
+			: "r" (isp), "r" (irq), "r" (desc->handle_irq),
+			  "r" (desc)
+			: "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
+			  "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
+			  "D0.5"
+			);
+	} else
+#endif
+		generic_handle_irq(irq);
+
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+
+#ifdef CONFIG_4KSTACKS
+
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+	union irq_ctx *irqctx;
+
+	if (hardirq_ctx[cpu])
+		return;
+
+	irqctx = (union irq_ctx *) &hardirq_stack[cpu * THREAD_SIZE];
+	irqctx->tinfo.task              = NULL;
+	irqctx->tinfo.exec_domain       = NULL;
+	irqctx->tinfo.cpu               = cpu;
+	irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
+	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+	hardirq_ctx[cpu] = irqctx;
+
+	irqctx = (union irq_ctx *) &softirq_stack[cpu * THREAD_SIZE];
+	irqctx->tinfo.task              = NULL;
+	irqctx->tinfo.exec_domain       = NULL;
+	irqctx->tinfo.cpu               = cpu;
+	irqctx->tinfo.preempt_count     = 0;
+	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+	softirq_ctx[cpu] = irqctx;
+
+	pr_info("CPU %u irqstacks, hard=%p soft=%p\n",
+		cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
+}
+
+void irq_ctx_exit(int cpu)
+{
+	hardirq_ctx[smp_processor_id()] = NULL;
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+	unsigned long flags;
+	struct thread_info *curctx;
+	union irq_ctx *irqctx;
+	u32 *isp;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+
+	if (local_softirq_pending()) {
+		curctx = current_thread_info();
+		irqctx = softirq_ctx[smp_processor_id()];
+		irqctx->tinfo.task = curctx->task;
+
+		/* build the stack frame on the softirq stack */
+		isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
+
+		asm volatile (
+			"MOV   D0.5,%0\n"
+			"SWAP  A0StP,D0.5\n"
+			"CALLR D1RtP,___do_softirq\n"
+			"MOV   A0StP,D0.5\n"
+			:
+			: "r" (isp)
+			: "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
+			  "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
+			  "D0.5"
+			);
+		/*
+		 * Shouldn't happen, we returned above if in_interrupt():
+		 */
+		WARN_ON_ONCE(softirq_count());
+	}
+
+	local_irq_restore(flags);
+}
+#endif
+
+static struct irq_chip meta_irq_type = {
+	.name = "META-IRQ",
+	.irq_startup = startup_meta_irq,
+	.irq_shutdown = shutdown_meta_irq,
+};
+
+/**
+ * tbisig_map() - Map a TBI signal number to a virtual IRQ number.
+ * @hw:		Number of the TBI signal. Must be in range.
+ *
+ * Returns:	The virtual IRQ number of the TBI signal number IRQ specified by
+ *		@hw.
+ */
+int tbisig_map(unsigned int hw)
+{
+	return irq_create_mapping(root_domain, hw);
+}
+
+/**
+ * metag_tbisig_map() - map a tbi signal to a Linux virtual IRQ number
+ * @d:		root irq domain
+ * @irq:	virtual irq number
+ * @hw:		hardware irq number (TBI signal number)
+ *
+ * This sets up a virtual irq for a specified TBI signal number.
+ */
+static int metag_tbisig_map(struct irq_domain *d, unsigned int irq,
+			    irq_hw_number_t hw)
+{
+#ifdef CONFIG_SMP
+	irq_set_chip_and_handler(irq, &meta_irq_type, handle_percpu_irq);
+#else
+	irq_set_chip_and_handler(irq, &meta_irq_type, handle_simple_irq);
+#endif
+	return 0;
+}
+
+static const struct irq_domain_ops metag_tbisig_domain_ops = {
+	.map = metag_tbisig_map,
+};
+
+/*
+ * void init_IRQ(void)
+ *
+ * Parameters:	None
+ *
+ * Returns:	Nothing
+ *
+ * This function should be called during kernel startup to initialize
+ * the IRQ handling routines.
+ */
+void __init init_IRQ(void)
+{
+	root_domain = irq_domain_add_linear(NULL, 32,
+					    &metag_tbisig_domain_ops, NULL);
+	if (unlikely(!root_domain))
+		panic("init_IRQ: cannot add root IRQ domain");
+
+	irq_ctx_init(smp_processor_id());
+
+	init_internal_IRQ();
+	init_external_IRQ();
+
+	if (machine_desc->init_irq)
+		machine_desc->init_irq();
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+	if (machine_desc->nr_irqs)
+		nr_irqs = machine_desc->nr_irqs;
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+	raw_spin_lock_irq(&desc->lock);
+	if (chip->irq_set_affinity)
+		chip->irq_set_affinity(data, cpumask_of(cpu), false);
+	raw_spin_unlock_irq(&desc->lock);
+}
+
+/*
+ * The CPU has been marked offline.  Migrate IRQs off this CPU.  If
+ * the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ */
+void migrate_irqs(void)
+{
+	unsigned int i, cpu = smp_processor_id();
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_data *data = irq_desc_get_irq_data(desc);
+		unsigned int newcpu;
+
+		if (irqd_is_per_cpu(data))
+			continue;
+
+		if (!cpumask_test_cpu(cpu, data->affinity))
+			continue;
+
+		newcpu = cpumask_any_and(data->affinity, cpu_online_mask);
+
+		if (newcpu >= nr_cpu_ids) {
+			pr_info_ratelimited("IRQ%u no longer affine to CPU%u\n",
+					    i, cpu);
+
+			cpumask_setall(data->affinity);
+			newcpu = cpumask_any_and(data->affinity,
+						 cpu_online_mask);
+		}
+
+		route_irq(data, i, newcpu);
+	}
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/metag/kernel/kick.c b/arch/metag/kernel/kick.c
new file mode 100644
index 0000000..50fcbec
--- /dev/null
+++ b/arch/metag/kernel/kick.c
@@ -0,0 +1,101 @@
+/*
+ *  Copyright (C) 2009 Imagination Technologies
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ * The Meta KICK interrupt mechanism is generally a useful feature, so
+ * we provide an interface for registering multiple interrupt
+ * handlers. All the registered interrupt handlers are "chained". When
+ * a KICK interrupt is received the first function in the list is
+ * called. If that interrupt handler cannot handle the KICK the next
+ * one is called, then the next until someone handles it (or we run
+ * out of functions). As soon as one function handles the interrupt no
+ * other handlers are called.
+ *
+ * The only downside of chaining interrupt handlers is that each
+ * handler must be able to detect whether the KICK was intended for it
+ * or not.  For example, when the IPI handler runs and it sees that
+ * there are no IPI messages it must not signal that the KICK was
+ * handled, thereby giving the other handlers a chance to run.
+ *
+ * The reason that we provide our own interface for calling KICK
+ * handlers instead of using the generic kernel infrastructure is that
+ * the KICK handlers require access to a CPU's pTBI structure. So we
+ * pass it as an argument.
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+
+#include <asm/traps.h>
+
+/*
+ * All accesses/manipulations of kick_handlers_list should be
+ * performed while holding kick_handlers_lock.
+ */
+static DEFINE_SPINLOCK(kick_handlers_lock);
+static LIST_HEAD(kick_handlers_list);
+
+void kick_register_func(struct kick_irq_handler *kh)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kick_handlers_lock, flags);
+
+	list_add_tail(&kh->list, &kick_handlers_list);
+
+	spin_unlock_irqrestore(&kick_handlers_lock, flags);
+}
+EXPORT_SYMBOL(kick_register_func);
+
+void kick_unregister_func(struct kick_irq_handler *kh)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kick_handlers_lock, flags);
+
+	list_del(&kh->list);
+
+	spin_unlock_irqrestore(&kick_handlers_lock, flags);
+}
+EXPORT_SYMBOL(kick_unregister_func);
+
+TBIRES
+kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI)
+{
+	struct kick_irq_handler *kh;
+	struct list_head *lh;
+	int handled = 0;
+	TBIRES ret;
+
+	head_end(State, ~INTS_OFF_MASK);
+
+	/* If we interrupted user code handle any critical sections. */
+	if (State.Sig.SaveMask & TBICTX_PRIV_BIT)
+		restart_critical_section(State);
+
+	trace_hardirqs_off();
+
+	/*
+	 * There is no need to disable interrupts here because we
+	 * can't nest KICK interrupts in a KICK interrupt handler.
+	 */
+	spin_lock(&kick_handlers_lock);
+
+	list_for_each(lh, &kick_handlers_list) {
+		kh = list_entry(lh, struct kick_irq_handler, list);
+
+		ret = kh->func(State, SigNum, Triggers, Inst, pTBI, &handled);
+		if (handled)
+			break;
+	}
+
+	spin_unlock(&kick_handlers_lock);
+
+	WARN_ON(!handled);
+
+	return tail_end(ret);
+}
diff --git a/arch/metag/kernel/machines.c b/arch/metag/kernel/machines.c
new file mode 100644
index 0000000..1edf6ba
--- /dev/null
+++ b/arch/metag/kernel/machines.c
@@ -0,0 +1,20 @@
+/*
+ *  arch/metag/kernel/machines.c
+ *
+ *  Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ *  Generic Meta Boards.
+ */
+
+#include <linux/init.h>
+#include <asm/irq.h>
+#include <asm/mach/arch.h>
+
+static const char *meta_boards_compat[] __initdata = {
+	"img,meta",
+	NULL,
+};
+
+MACHINE_START(META, "Generic Meta")
+	.dt_compat	= meta_boards_compat,
+MACHINE_END
diff --git a/arch/metag/kernel/metag_ksyms.c b/arch/metag/kernel/metag_ksyms.c
new file mode 100644
index 0000000..ec872ef
--- /dev/null
+++ b/arch/metag/kernel/metag_ksyms.c
@@ -0,0 +1,49 @@
+#include <linux/export.h>
+
+#include <asm/div64.h>
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/tbx.h>
+
+EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
+
+#ifdef CONFIG_FLATMEM
+/* needed for the pfn_valid macro */
+EXPORT_SYMBOL(max_pfn);
+EXPORT_SYMBOL(min_low_pfn);
+#endif
+
+/* TBI symbols */
+EXPORT_SYMBOL(__TBI);
+EXPORT_SYMBOL(__TBIFindSeg);
+EXPORT_SYMBOL(__TBIPoll);
+EXPORT_SYMBOL(__TBITimeStamp);
+
+#define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
+
+/* libgcc functions */
+DECLARE_EXPORT(__ashldi3);
+DECLARE_EXPORT(__ashrdi3);
+DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__udivsi3);
+DECLARE_EXPORT(__divsi3);
+DECLARE_EXPORT(__umodsi3);
+DECLARE_EXPORT(__modsi3);
+DECLARE_EXPORT(__muldi3);
+DECLARE_EXPORT(__cmpdi2);
+DECLARE_EXPORT(__ucmpdi2);
+
+/* Maths functions */
+EXPORT_SYMBOL(div_u64);
+EXPORT_SYMBOL(div_s64);
+
+/* String functions */
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memmove);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(mcount_wrapper);
+#endif
diff --git a/arch/metag/kernel/module.c b/arch/metag/kernel/module.c
new file mode 100644
index 0000000..986331c
--- /dev/null
+++ b/arch/metag/kernel/module.c
@@ -0,0 +1,284 @@
+/*  Kernel module help for Meta.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+*/
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sort.h>
+
+#include <asm/unaligned.h>
+
+/* Count how many different relocations (different symbol, different
+   addend) */
+static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
+{
+	unsigned int i, r_info, r_addend, _count_relocs;
+
+	_count_relocs = 0;
+	r_info = 0;
+	r_addend = 0;
+	for (i = 0; i < num; i++)
+		/* Only count relbranch relocs, others don't need stubs */
+		if (ELF32_R_TYPE(rela[i].r_info) == R_METAG_RELBRANCH &&
+		    (r_info != ELF32_R_SYM(rela[i].r_info) ||
+		     r_addend != rela[i].r_addend)) {
+			_count_relocs++;
+			r_info = ELF32_R_SYM(rela[i].r_info);
+			r_addend = rela[i].r_addend;
+		}
+
+	return _count_relocs;
+}
+
+static int relacmp(const void *_x, const void *_y)
+{
+	const Elf32_Rela *x, *y;
+
+	y = (Elf32_Rela *)_x;
+	x = (Elf32_Rela *)_y;
+
+	/* Compare the entire r_info (as opposed to ELF32_R_SYM(r_info) only) to
+	 * make the comparison cheaper/faster. It won't affect the sorting or
+	 * the counting algorithms' performance
+	 */
+	if (x->r_info < y->r_info)
+		return -1;
+	else if (x->r_info > y->r_info)
+		return 1;
+	else if (x->r_addend < y->r_addend)
+		return -1;
+	else if (x->r_addend > y->r_addend)
+		return 1;
+	else
+		return 0;
+}
+
+static void relaswap(void *_x, void *_y, int size)
+{
+	uint32_t *x, *y, tmp;
+	int i;
+
+	y = (uint32_t *)_x;
+	x = (uint32_t *)_y;
+
+	for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) {
+		tmp = x[i];
+		x[i] = y[i];
+		y[i] = tmp;
+	}
+}
+
+/* Get the potential trampolines size required of the init and
+   non-init sections */
+static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
+				  const Elf32_Shdr *sechdrs,
+				  const char *secstrings,
+				  int is_init)
+{
+	unsigned long ret = 0;
+	unsigned i;
+
+	/* Everything marked ALLOC (this includes the exported
+	   symbols) */
+	for (i = 1; i < hdr->e_shnum; i++) {
+		/* If it's called *.init*, and we're not init, we're
+		   not interested */
+		if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL)
+		    != is_init)
+			continue;
+
+		/* We don't want to look at debug sections. */
+		if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != NULL)
+			continue;
+
+		if (sechdrs[i].sh_type == SHT_RELA) {
+			pr_debug("Found relocations in section %u\n", i);
+			pr_debug("Ptr: %p.  Number: %u\n",
+				 (void *)hdr + sechdrs[i].sh_offset,
+				 sechdrs[i].sh_size / sizeof(Elf32_Rela));
+
+			/* Sort the relocation information based on a symbol and
+			 * addend key. This is a stable O(n*log n) complexity
+			 * alogrithm but it will reduce the complexity of
+			 * count_relocs() to linear complexity O(n)
+			 */
+			sort((void *)hdr + sechdrs[i].sh_offset,
+			     sechdrs[i].sh_size / sizeof(Elf32_Rela),
+			     sizeof(Elf32_Rela), relacmp, relaswap);
+
+			ret += count_relocs((void *)hdr
+					     + sechdrs[i].sh_offset,
+					     sechdrs[i].sh_size
+					     / sizeof(Elf32_Rela))
+				* sizeof(struct metag_plt_entry);
+		}
+	}
+
+	return ret;
+}
+
+int module_frob_arch_sections(Elf32_Ehdr *hdr,
+			      Elf32_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *me)
+{
+	unsigned int i;
+
+	/* Find .plt and .init.plt sections */
+	for (i = 0; i < hdr->e_shnum; i++) {
+		if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0)
+			me->arch.init_plt_section = i;
+		else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
+			me->arch.core_plt_section = i;
+	}
+	if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
+		pr_err("Module doesn't contain .plt or .init.plt sections.\n");
+		return -ENOEXEC;
+	}
+
+	/* Override their sizes */
+	sechdrs[me->arch.core_plt_section].sh_size
+		= get_plt_size(hdr, sechdrs, secstrings, 0);
+	sechdrs[me->arch.core_plt_section].sh_type = SHT_NOBITS;
+	sechdrs[me->arch.init_plt_section].sh_size
+		= get_plt_size(hdr, sechdrs, secstrings, 1);
+	sechdrs[me->arch.init_plt_section].sh_type = SHT_NOBITS;
+	return 0;
+}
+
+/* Set up a trampoline in the PLT to bounce us to the distant function */
+static uint32_t do_plt_call(void *location, Elf32_Addr val,
+			    Elf32_Shdr *sechdrs, struct module *mod)
+{
+	struct metag_plt_entry *entry;
+	/* Instructions used to do the indirect jump.  */
+	uint32_t tramp[2];
+
+	/* We have to trash a register, so we assume that any control
+	   transfer more than 21-bits away must be a function call
+	   (so we can use a call-clobbered register).  */
+
+	/* MOVT D0Re0,#HI(v) */
+	tramp[0] = 0x02000005 | (((val & 0xffff0000) >> 16) << 3);
+	/* JUMP D0Re0,#LO(v) */
+	tramp[1] = 0xac000001 | ((val & 0x0000ffff) << 3);
+
+	/* Init, or core PLT? */
+	if (location >= mod->module_core
+	    && location < mod->module_core + mod->core_size)
+		entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
+	else
+		entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
+
+	/* Find this entry, or if that fails, the next avail. entry */
+	while (entry->tramp[0])
+		if (entry->tramp[0] == tramp[0] && entry->tramp[1] == tramp[1])
+			return (uint32_t)entry;
+		else
+			entry++;
+
+	entry->tramp[0] = tramp[0];
+	entry->tramp[1] = tramp[1];
+
+	return (uint32_t)entry;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	unsigned int i;
+	Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	Elf32_Addr relocation;
+	uint32_t *location;
+	int32_t value;
+
+	pr_debug("Applying relocate section %u to %u\n", relsec,
+		 sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+		relocation = sym->st_value + rel[i].r_addend;
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_METAG_NONE:
+			break;
+		case R_METAG_HIADDR16:
+			relocation >>= 16;
+		case R_METAG_LOADDR16:
+			*location = (*location & 0xfff80007) |
+				((relocation & 0xffff) << 3);
+			break;
+		case R_METAG_ADDR32:
+			/*
+			 * Packed data structures may cause a misaligned
+			 * R_METAG_ADDR32 to be emitted.
+			 */
+			put_unaligned(relocation, location);
+			break;
+		case R_METAG_GETSETOFF:
+			*location += ((relocation & 0xfff) << 7);
+			break;
+		case R_METAG_RELBRANCH:
+			if (*location & (0x7ffff << 5)) {
+				pr_err("bad relbranch relocation\n");
+				break;
+			}
+
+			/* This jump is too big for the offset slot. Build
+			 * a PLT to jump through to get to where we want to go.
+			 * NB: 21bit check - not scaled to 19bit yet
+			 */
+			if (((int32_t)(relocation -
+				       (uint32_t)location) > 0xfffff) ||
+			    ((int32_t)(relocation -
+				       (uint32_t)location) < -0xfffff)) {
+				relocation = do_plt_call(location, relocation,
+							 sechdrs, me);
+			}
+
+			value = relocation - (uint32_t)location;
+
+			/* branch instruction aligned */
+			value /= 4;
+
+			if ((value > 0x7ffff) || (value < -0x7ffff)) {
+				/*
+				 * this should have been caught by the code
+				 * above!
+				 */
+				pr_err("overflow of relbranch reloc\n");
+			}
+
+			*location = (*location & (~(0x7ffff << 5))) |
+				((value & 0x7ffff) << 5);
+			break;
+
+		default:
+			pr_err("module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
diff --git a/arch/metag/kernel/perf/Makefile b/arch/metag/kernel/perf/Makefile
new file mode 100644
index 0000000..b158cb2
--- /dev/null
+++ b/arch/metag/kernel/perf/Makefile
@@ -0,0 +1,3 @@
+# Makefile for performance event core
+
+obj-y += perf_event.o
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
new file mode 100644
index 0000000..a876d5f
--- /dev/null
+++ b/arch/metag/kernel/perf/perf_event.c
@@ -0,0 +1,861 @@
+/*
+ * Meta performance counter support.
+ *  Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This code is based on the sh pmu code:
+ *  Copyright (C) 2009 Paul Mundt
+ *
+ * and on the arm pmu code:
+ *  Copyright (C) 2009 picoChip Designs, Ltd., James Iles
+ *  Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/irqchip/metag.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#include <asm/core_reg.h>
+#include <asm/hwthread.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include "perf_event.h"
+
+static int _hw_perf_event_init(struct perf_event *);
+static void _hw_perf_event_destroy(struct perf_event *);
+
+/* Determines which core type we are */
+static struct metag_pmu *metag_pmu __read_mostly;
+
+/* Processor specific data */
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+/* PMU admin */
+const char *perf_pmu_name(void)
+{
+	if (metag_pmu)
+		return metag_pmu->pmu.name;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+	if (metag_pmu)
+		return metag_pmu->max_events;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+static inline int metag_pmu_initialised(void)
+{
+	return !!metag_pmu;
+}
+
+static void release_pmu_hardware(void)
+{
+	int irq;
+	unsigned int version = (metag_pmu->version &
+			(METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+			METAC_ID_REV_S;
+
+	/* Early cores don't have overflow interrupts */
+	if (version < 0x0104)
+		return;
+
+	irq = internal_irq_map(17);
+	if (irq >= 0)
+		free_irq(irq, (void *)1);
+
+	irq = internal_irq_map(16);
+	if (irq >= 0)
+		free_irq(irq, (void *)0);
+}
+
+static int reserve_pmu_hardware(void)
+{
+	int err = 0, irq[2];
+	unsigned int version = (metag_pmu->version &
+			(METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+			METAC_ID_REV_S;
+
+	/* Early cores don't have overflow interrupts */
+	if (version < 0x0104)
+		goto out;
+
+	/*
+	 * Bit 16 on HWSTATMETA is the interrupt for performance counter 0;
+	 * similarly, 17 is the interrupt for performance counter 1.
+	 * We can't (yet) interrupt on the cycle counter, because it's a
+	 * register, however it holds a 32-bit value as opposed to 24-bit.
+	 */
+	irq[0] = internal_irq_map(16);
+	if (irq[0] < 0) {
+		pr_err("unable to map internal IRQ %d\n", 16);
+		goto out;
+	}
+	err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING,
+			"metagpmu0", (void *)0);
+	if (err) {
+		pr_err("unable to request IRQ%d for metag PMU counters\n",
+				irq[0]);
+		goto out;
+	}
+
+	irq[1] = internal_irq_map(17);
+	if (irq[1] < 0) {
+		pr_err("unable to map internal IRQ %d\n", 17);
+		goto out_irq1;
+	}
+	err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING,
+			"metagpmu1", (void *)1);
+	if (err) {
+		pr_err("unable to request IRQ%d for metag PMU counters\n",
+				irq[1]);
+		goto out_irq1;
+	}
+
+	return 0;
+
+out_irq1:
+	free_irq(irq[0], (void *)0);
+out:
+	return err;
+}
+
+/* PMU operations */
+static void metag_pmu_enable(struct pmu *pmu)
+{
+}
+
+static void metag_pmu_disable(struct pmu *pmu)
+{
+}
+
+static int metag_pmu_event_init(struct perf_event *event)
+{
+	int err = 0;
+	atomic_t *active_events = &metag_pmu->active_events;
+
+	if (!metag_pmu_initialised()) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	event->destroy = _hw_perf_event_destroy;
+
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&metag_pmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = reserve_pmu_hardware();
+
+		if (!err)
+			atomic_inc(active_events);
+
+		mutex_unlock(&metag_pmu->reserve_mutex);
+	}
+
+	/* Hardware and caches counters */
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		err = _hw_perf_event_init(event);
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
+	if (err)
+		event->destroy(event);
+
+out:
+	return err;
+}
+
+void metag_pmu_event_update(struct perf_event *event,
+		struct hw_perf_event *hwc, int idx)
+{
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+	/*
+	 * If this counter is chained, it may be that the previous counter
+	 * value has been changed beneath us.
+	 *
+	 * To get around this, we read and exchange the new raw count, then
+	 * add the delta (new - prev) to the generic counter atomically.
+	 *
+	 * Without interrupts, this is the simplest approach.
+	 */
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = metag_pmu->read(idx);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Calculate the delta and add it to the counter.
+	 */
+	delta = new_raw_count - prev_raw_count;
+
+	local64_add(delta, &event->count);
+}
+
+int metag_pmu_event_set_period(struct perf_event *event,
+		struct hw_perf_event *hwc, int idx)
+{
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (left > (s64)metag_pmu->max_period)
+		left = metag_pmu->max_period;
+
+	if (metag_pmu->write)
+		metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static void metag_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	/*
+	 * We always have to reprogram the period, so ignore PERF_EF_RELOAD.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+
+	/*
+	 * Reset the period.
+	 * Some counters can't be stopped (i.e. are core global), so when the
+	 * counter was 'stopped' we merely disabled the IRQ. If we don't reset
+	 * the period, then we'll either: a) get an overflow too soon;
+	 * or b) too late if the overflow happened since disabling.
+	 * Obviously, this has little bearing on cores without the overflow
+	 * interrupt, as the performance counter resets to zero on write
+	 * anyway.
+	 */
+	if (metag_pmu->max_period)
+		metag_pmu_event_set_period(event, hwc, hwc->idx);
+	cpuc->events[idx] = event;
+	metag_pmu->enable(hwc, idx);
+}
+
+static void metag_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * We should always update the counter on stop; see comment above
+	 * why.
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		metag_pmu_event_update(event, hwc, hwc->idx);
+		metag_pmu->disable(hwc, hwc->idx);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
+}
+
+static int metag_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = 0, ret = 0;
+
+	perf_pmu_disable(event->pmu);
+
+	/* check whether we're counting instructions */
+	if (hwc->config == 0x100) {
+		if (__test_and_set_bit(METAG_INST_COUNTER,
+				cpuc->used_mask)) {
+			ret = -EAGAIN;
+			goto out;
+		}
+		idx = METAG_INST_COUNTER;
+	} else {
+		/* Check whether we have a spare counter */
+		idx = find_first_zero_bit(cpuc->used_mask,
+				atomic_read(&metag_pmu->active_events));
+		if (idx >= METAG_INST_COUNTER) {
+			ret = -EAGAIN;
+			goto out;
+		}
+
+		__set_bit(idx, cpuc->used_mask);
+	}
+	hwc->idx = idx;
+
+	/* Make sure the counter is disabled */
+	metag_pmu->disable(hwc, idx);
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		metag_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+out:
+	perf_pmu_enable(event->pmu);
+	return ret;
+}
+
+static void metag_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	WARN_ON(idx < 0);
+	metag_pmu_stop(event, PERF_EF_UPDATE);
+	cpuc->events[idx] = NULL;
+	__clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static void metag_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/* Don't read disabled counters! */
+	if (hwc->idx < 0)
+		return;
+
+	metag_pmu_event_update(event, hwc, hwc->idx);
+}
+
+static struct pmu pmu = {
+	.pmu_enable	= metag_pmu_enable,
+	.pmu_disable	= metag_pmu_disable,
+
+	.event_init	= metag_pmu_event_init,
+
+	.add		= metag_pmu_add,
+	.del		= metag_pmu_del,
+	.start		= metag_pmu_start,
+	.stop		= metag_pmu_stop,
+	.read		= metag_pmu_read,
+};
+
+/* Core counter specific functions */
+static const int metag_general_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = 0x03,
+	[PERF_COUNT_HW_INSTRUCTIONS] = 0x100,
+	[PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+	[PERF_COUNT_HW_CACHE_MISSES] = -1,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+	[PERF_COUNT_HW_BRANCH_MISSES] = -1,
+	[PERF_COUNT_HW_BUS_CYCLES] = -1,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1,
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = -1,
+};
+
+static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0x08,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0x09,
+			[C(RESULT_MISS)] = 0x0a,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0xd0,
+			[C(RESULT_MISS)] = 0xd2,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = 0xd4,
+			[C(RESULT_MISS)] = 0xd5,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0xd1,
+			[C(RESULT_MISS)] = 0xd3,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+
+static void _hw_perf_event_destroy(struct perf_event *event)
+{
+	atomic_t *active_events = &metag_pmu->active_events;
+	struct mutex *pmu_mutex = &metag_pmu->reserve_mutex;
+
+	if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) {
+		release_pmu_hardware();
+		mutex_unlock(pmu_mutex);
+	}
+}
+
+static int _hw_perf_cache_event(int config, int *evp)
+{
+	unsigned long type, op, result;
+	int ev;
+
+	if (!metag_pmu->cache_events)
+		return -EINVAL;
+
+	/* Unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+			op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+			result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*metag_pmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*evp = ev;
+	return 0;
+}
+
+static int _hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping = 0, err;
+
+	switch (attr->type) {
+	case PERF_TYPE_HARDWARE:
+		if (attr->config >= PERF_COUNT_HW_MAX)
+			return -EINVAL;
+
+		mapping = metag_pmu->event_map(attr->config);
+		break;
+
+	case PERF_TYPE_HW_CACHE:
+		err = _hw_perf_cache_event(attr->config, &mapping);
+		if (err)
+			return err;
+		break;
+	}
+
+	/* Return early if the event is unsupported */
+	if (mapping == -1)
+		return -EINVAL;
+
+	/*
+	 * Early cores have "limited" counters - they have no overflow
+	 * interrupts - and so are unable to do sampling without extra work
+	 * and timer assistance.
+	 */
+	if (metag_pmu->max_period == 0) {
+		if (hwc->sample_period)
+			return -EINVAL;
+	}
+
+	/*
+	 * Don't assign an index until the event is placed into the hardware.
+	 * -1 signifies that we're still deciding where to put it. On SMP
+	 * systems each core has its own set of counters, so we can't do any
+	 * constraint checking yet.
+	 */
+	hwc->idx = -1;
+
+	/* Store the event encoding */
+	hwc->config |= (unsigned long)mapping;
+
+	/*
+	 * For non-sampling runs, limit the sample_period to half of the
+	 * counter width. This way, the new counter value should be less
+	 * likely to overtake the previous one (unless there are IRQ latency
+	 * issues...)
+	 */
+	if (metag_pmu->max_period) {
+		if (!hwc->sample_period) {
+			hwc->sample_period = metag_pmu->max_period >> 1;
+			hwc->last_period = hwc->sample_period;
+			local64_set(&hwc->period_left, hwc->sample_period);
+		}
+	}
+
+	return 0;
+}
+
+static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
+{
+	struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+	unsigned int config = event->config;
+	unsigned int tmp = config & 0xf0;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/*
+	 * Check if we're enabling the instruction counter (index of
+	 * MAX_HWEVENTS - 1)
+	 */
+	if (METAG_INST_COUNTER == idx) {
+		WARN_ONCE((config != 0x100),
+			"invalid configuration (%d) for counter (%d)\n",
+			config, idx);
+
+		/* Reset the cycle count */
+		__core_reg_set(TXTACTCYC, 0);
+		goto unlock;
+	}
+
+	/* Check for a core internal or performance channel event. */
+	if (tmp) {
+		void *perf_addr = (void *)PERF_COUNT(idx);
+
+		/*
+		 * Anything other than a cycle count will write the low-
+		 * nibble to the correct counter register.
+		 */
+		switch (tmp) {
+		case 0xd0:
+			perf_addr = (void *)PERF_ICORE(idx);
+			break;
+
+		case 0xf0:
+			perf_addr = (void *)PERF_CHAN(idx);
+			break;
+		}
+
+		metag_out32((tmp & 0x0f), perf_addr);
+
+		/*
+		 * Now we use the high nibble as the performance event to
+		 * to count.
+		 */
+		config = tmp >> 4;
+	}
+
+	/*
+	 * Enabled counters start from 0. Early cores clear the count on
+	 * write but newer cores don't, so we make sure that the count is
+	 * set to 0.
+	 */
+	tmp = ((config & 0xf) << 28) |
+			((1 << 24) << cpu_2_hwthread_id[get_cpu()]);
+	metag_out32(tmp, PERF_COUNT(idx));
+unlock:
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx)
+{
+	struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+	unsigned int tmp = 0;
+	unsigned long flags;
+
+	/*
+	 * The cycle counter can't be disabled per se, as it's a hardware
+	 * thread register which is always counting. We merely return if this
+	 * is the counter we're attempting to disable.
+	 */
+	if (METAG_INST_COUNTER == idx)
+		return;
+
+	/*
+	 * The counter value _should_ have been read prior to disabling,
+	 * as if we're running on an early core then the value gets reset to
+	 * 0, and any read after that would be useless. On the newer cores,
+	 * however, it's better to read-modify-update this for purposes of
+	 * the overflow interrupt.
+	 * Here we remove the thread id AND the event nibble (there are at
+	 * least two events that count events that are core global and ignore
+	 * the thread id mask). This only works because we don't mix thread
+	 * performance counts, and event 0x00 requires a thread id mask!
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	tmp = metag_in32(PERF_COUNT(idx));
+	tmp &= 0x00ffffff;
+	metag_out32(tmp, PERF_COUNT(idx));
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u64 metag_pmu_read_counter(int idx)
+{
+	u32 tmp = 0;
+
+	/* The act of reading the cycle counter also clears it */
+	if (METAG_INST_COUNTER == idx) {
+		__core_reg_swap(TXTACTCYC, tmp);
+		goto out;
+	}
+
+	tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
+out:
+	return tmp;
+}
+
+static void metag_pmu_write_counter(int idx, u32 val)
+{
+	struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+	u32 tmp = 0;
+	unsigned long flags;
+
+	/*
+	 * This _shouldn't_ happen, but if it does, then we can just
+	 * ignore the write, as the register is read-only and clear-on-write.
+	 */
+	if (METAG_INST_COUNTER == idx)
+		return;
+
+	/*
+	 * We'll keep the thread mask and event id, and just update the
+	 * counter itself. Also , we should bound the value to 24-bits.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	val &= 0x00ffffff;
+	tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000;
+	val |= tmp;
+	metag_out32(val, PERF_COUNT(idx));
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int metag_pmu_event_map(int idx)
+{
+	return metag_general_events[idx];
+}
+
+static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
+{
+	int idx = (int)dev;
+	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct perf_event *event = cpuhw->events[idx];
+	struct hw_perf_event *hwc = &event->hw;
+	struct pt_regs *regs = get_irq_regs();
+	struct perf_sample_data sampledata;
+	unsigned long flags;
+	u32 counter = 0;
+
+	/*
+	 * We need to stop the core temporarily from generating another
+	 * interrupt while we disable this counter. However, we don't want
+	 * to flag the counter as free
+	 */
+	__global_lock2(flags);
+	counter = metag_in32(PERF_COUNT(idx));
+	metag_out32((counter & 0x00ffffff), PERF_COUNT(idx));
+	__global_unlock2(flags);
+
+	/* Update the counts and reset the sample period */
+	metag_pmu_event_update(event, hwc, idx);
+	perf_sample_data_init(&sampledata, 0, hwc->last_period);
+	metag_pmu_event_set_period(event, hwc, idx);
+
+	/*
+	 * Enable the counter again once core overflow processing has
+	 * completed.
+	 */
+	if (!perf_event_overflow(event, &sampledata, regs))
+		metag_out32(counter, PERF_COUNT(idx));
+
+	return IRQ_HANDLED;
+}
+
+static struct metag_pmu _metag_pmu = {
+	.handle_irq	= metag_pmu_counter_overflow,
+	.enable		= metag_pmu_enable_counter,
+	.disable	= metag_pmu_disable_counter,
+	.read		= metag_pmu_read_counter,
+	.write		= metag_pmu_write_counter,
+	.event_map	= metag_pmu_event_map,
+	.cache_events	= &metag_pmu_cache_events,
+	.max_period	= MAX_PERIOD,
+	.max_events	= MAX_HWEVENTS,
+};
+
+/* PMU CPU hotplug notifier */
+static int __cpuinit metag_pmu_cpu_notify(struct notifier_block *b,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned int)hcpu;
+	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+		return NOTIFY_DONE;
+
+	memset(cpuc, 0, sizeof(struct cpu_hw_events));
+	raw_spin_lock_init(&cpuc->pmu_lock);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata metag_pmu_notifier = {
+	.notifier_call = metag_pmu_cpu_notify,
+};
+
+/* PMU Initialisation */
+static int __init init_hw_perf_events(void)
+{
+	int ret = 0, cpu;
+	u32 version = *(u32 *)METAC_ID;
+	int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S;
+	int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS))
+			>> METAC_ID_REV_S;
+
+	/* Not a Meta 2 core, then not supported */
+	if (0x02 > major) {
+		pr_info("no hardware counter support available\n");
+		goto out;
+	} else if (0x02 == major) {
+		metag_pmu = &_metag_pmu;
+
+		if (min_rev < 0x0104) {
+			/*
+			 * A core without overflow interrupts, and clear-on-
+			 * write counters.
+			 */
+			metag_pmu->handle_irq = NULL;
+			metag_pmu->write = NULL;
+			metag_pmu->max_period = 0;
+		}
+
+		metag_pmu->name = "Meta 2";
+		metag_pmu->version = version;
+		metag_pmu->pmu = pmu;
+	}
+
+	pr_info("enabled with %s PMU driver, %d counters available\n",
+			metag_pmu->name, metag_pmu->max_events);
+
+	/* Initialise the active events and reservation mutex */
+	atomic_set(&metag_pmu->active_events, 0);
+	mutex_init(&metag_pmu->reserve_mutex);
+
+	/* Clear the counters */
+	metag_out32(0, PERF_COUNT(0));
+	metag_out32(0, PERF_COUNT(1));
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+		memset(cpuc, 0, sizeof(struct cpu_hw_events));
+		raw_spin_lock_init(&cpuc->pmu_lock);
+	}
+
+	register_cpu_notifier(&metag_pmu_notifier);
+	ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW);
+out:
+	return ret;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/metag/kernel/perf/perf_event.h b/arch/metag/kernel/perf/perf_event.h
new file mode 100644
index 0000000..fd10a13
--- /dev/null
+++ b/arch/metag/kernel/perf/perf_event.h
@@ -0,0 +1,106 @@
+/*
+ * Meta performance counter support.
+ *  Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef METAG_PERF_EVENT_H_
+#define METAG_PERF_EVENT_H_
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+
+/* For performance counter definitions */
+#include <asm/metag_mem.h>
+
+/*
+ * The Meta core has two performance counters, with 24-bit resolution. Newer
+ * cores generate an overflow interrupt on transition from 0xffffff to 0.
+ *
+ * Each counter consists of the counter id, hardware thread id, and the count
+ * itself; each counter can be assigned to multiple hardware threads at any
+ * one time, with the returned count being an aggregate of events. A small
+ * number of events are thread global, i.e. they count the aggregate of all
+ * threads' events, regardless of the thread selected.
+ *
+ * Newer cores can store an arbitrary 24-bit number in the counter, whereas
+ * older cores will clear the counter bits on write.
+ *
+ * We also have a pseudo-counter in the form of the thread active cycles
+ * counter (which, incidentally, is also bound to
+ */
+
+#define MAX_HWEVENTS		3
+#define MAX_PERIOD		((1UL << 24) - 1)
+#define METAG_INST_COUNTER	(MAX_HWEVENTS - 1)
+
+/**
+ * struct cpu_hw_events - a processor core's performance events
+ * @events:	an array of perf_events active for a given index.
+ * @used_mask:	a bitmap of in-use counters.
+ * @pmu_lock:	a perf counter lock
+ *
+ * This is a per-cpu/core structure that maintains a record of its
+ * performance counters' state.
+ */
+struct cpu_hw_events {
+	struct perf_event	*events[MAX_HWEVENTS];
+	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	raw_spinlock_t		pmu_lock;
+};
+
+/**
+ * struct metag_pmu - the Meta PMU structure
+ * @pmu:		core pmu structure
+ * @name:		pmu name
+ * @version:		core version
+ * @handle_irq:		overflow interrupt handler
+ * @enable:		enable a counter
+ * @disable:		disable a counter
+ * @read:		read the value of a counter
+ * @write:		write a value to a counter
+ * @event_map:		kernel event to counter event id map
+ * @cache_events:	kernel cache counter to core cache counter map
+ * @max_period:		maximum value of the counter before overflow
+ * @max_events:		maximum number of counters available at any one time
+ * @active_events:	number of active counters
+ * @reserve_mutex:	counter reservation mutex
+ *
+ * This describes the main functionality and data used by the performance
+ * event core.
+ */
+struct metag_pmu {
+	struct pmu	pmu;
+	const char	*name;
+	u32		version;
+	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
+	void		(*enable)(struct hw_perf_event *evt, int idx);
+	void		(*disable)(struct hw_perf_event *evt, int idx);
+	u64		(*read)(int idx);
+	void		(*write)(int idx, u32 val);
+	int		(*event_map)(int idx);
+	const int	(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+	u32		max_period;
+	int		max_events;
+	atomic_t	active_events;
+	struct mutex	reserve_mutex;
+};
+
+/* Convenience macros for accessing the perf counters */
+/* Define some convenience accessors */
+#define PERF_COUNT(x)	(PERF_COUNT0 + (sizeof(u64) * (x)))
+#define PERF_ICORE(x)	(PERF_ICORE0 + (sizeof(u64) * (x)))
+#define PERF_CHAN(x)	(PERF_CHAN0 + (sizeof(u64) * (x)))
+
+/* Cache index macros */
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define CACHE_OP_UNSUPPORTED	0xfffe
+#define CACHE_OP_NONSENSE	0xffff
+
+#endif
diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c
new file mode 100644
index 0000000..3156334
--- /dev/null
+++ b/arch/metag/kernel/perf_callchain.c
@@ -0,0 +1,96 @@
+/*
+ * Perf callchain handling code.
+ *
+ *   Based on the ARM perf implementation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+static bool is_valid_call(unsigned long calladdr)
+{
+	unsigned int callinsn;
+
+	/* Check the possible return address is aligned. */
+	if (!(calladdr & 0x3)) {
+		if (!get_user(callinsn, (unsigned int *)calladdr)) {
+			/* Check for CALLR or SWAP PC,D1RtP. */
+			if ((callinsn & 0xff000000) == 0xab000000 ||
+			    callinsn == 0xa3200aa0)
+				return true;
+		}
+	}
+	return false;
+}
+
+static struct metag_frame __user *
+user_backtrace(struct metag_frame __user *user_frame,
+	       struct perf_callchain_entry *entry)
+{
+	struct metag_frame frame;
+	unsigned long calladdr;
+
+	/* We cannot rely on having frame pointers in user code. */
+	while (1) {
+		/* Also check accessibility of one struct frame beyond */
+		if (!access_ok(VERIFY_READ, user_frame, sizeof(frame)))
+			return 0;
+		if (__copy_from_user_inatomic(&frame, user_frame,
+					      sizeof(frame)))
+			return 0;
+
+		--user_frame;
+
+		calladdr = frame.lr - 4;
+		if (is_valid_call(calladdr)) {
+			perf_callchain_store(entry, calladdr);
+			return user_frame;
+		}
+	}
+
+	return 0;
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	unsigned long sp = regs->ctx.AX[0].U0;
+	struct metag_frame __user *frame;
+
+	frame = (struct metag_frame __user *)sp;
+
+	--frame;
+
+	while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
+		frame = user_backtrace(frame, entry);
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int
+callchain_trace(struct stackframe *fr,
+		void *data)
+{
+	struct perf_callchain_entry *entry = data;
+	perf_callchain_store(entry, fr->pc);
+	return 0;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	struct stackframe fr;
+
+	fr.fp = regs->ctx.AX[1].U0;
+	fr.sp = regs->ctx.AX[0].U0;
+	fr.lr = regs->ctx.DX[4].U1;
+	fr.pc = regs->ctx.CurrPC;
+	walk_stackframe(&fr, callchain_trace, entry);
+}
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
new file mode 100644
index 0000000..c6efe62
--- /dev/null
+++ b/arch/metag/kernel/process.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008,2009,2010,2011 Imagination Technologies
+ *
+ * This file contains the architecture-dependent parts of process handling.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/elfcore.h>
+#include <linux/fs.h>
+#include <linux/tick.h>
+#include <linux/slab.h>
+#include <linux/mman.h>
+#include <linux/pm.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/core_reg.h>
+#include <asm/user_gateway.h>
+#include <asm/tcm.h>
+#include <asm/traps.h>
+#include <asm/switch_to.h>
+
+/*
+ * Wait for the next interrupt and enable local interrupts
+ */
+static inline void arch_idle(void)
+{
+	int tmp;
+
+	/*
+	 * Quickly jump straight into the interrupt entry point without actually
+	 * triggering an interrupt. When TXSTATI gets read the processor will
+	 * block until an interrupt is triggered.
+	 */
+	asm volatile (/* Switch into ISTAT mode */
+		      "RTH\n\t"
+		      /* Enable local interrupts */
+		      "MOV	TXMASKI, %1\n\t"
+		      /*
+		       * We can't directly "SWAP PC, PCX", so we swap via a
+		       * temporary. Essentially we do:
+		       *  PCX_new = 1f (the place to continue execution)
+		       *  PC = PCX_old
+		       */
+		      "ADD	%0, CPC0, #(1f-.)\n\t"
+		      "SWAP	PCX, %0\n\t"
+		      "MOV	PC, %0\n"
+		      /* Continue execution here with interrupts enabled */
+		      "1:"
+		      : "=a" (tmp)
+		      : "r" (get_trigger_mask()));
+}
+
+void cpu_idle(void)
+{
+	set_thread_flag(TIF_POLLING_NRFLAG);
+
+	while (1) {
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
+
+		while (!need_resched()) {
+			/*
+			 * We need to disable interrupts here to ensure we don't
+			 * miss a wakeup call.
+			 */
+			local_irq_disable();
+			if (!need_resched()) {
+#ifdef CONFIG_HOTPLUG_CPU
+				if (cpu_is_offline(smp_processor_id()))
+					cpu_die();
+#endif
+				arch_idle();
+			} else {
+				local_irq_enable();
+			}
+		}
+
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
+		schedule_preempt_disabled();
+	 }
+}
+
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+void (*soc_restart)(char *cmd);
+void (*soc_halt)(void);
+
+void machine_restart(char *cmd)
+{
+	if (soc_restart)
+		soc_restart(cmd);
+	hard_processor_halt(HALT_OK);
+}
+
+void machine_halt(void)
+{
+	if (soc_halt)
+		soc_halt();
+	smp_send_stop();
+	hard_processor_halt(HALT_OK);
+}
+
+void machine_power_off(void)
+{
+	if (pm_power_off)
+		pm_power_off();
+	smp_send_stop();
+	hard_processor_halt(HALT_OK);
+}
+
+#define FLAG_Z 0x8
+#define FLAG_N 0x4
+#define FLAG_O 0x2
+#define FLAG_C 0x1
+
+void show_regs(struct pt_regs *regs)
+{
+	int i;
+	const char *AX0_names[] = {"A0StP", "A0FrP"};
+	const char *AX1_names[] = {"A1GbP", "A1LbP"};
+
+	const char *DX0_names[] = {
+		"D0Re0",
+		"D0Ar6",
+		"D0Ar4",
+		"D0Ar2",
+		"D0FrT",
+		"D0.5 ",
+		"D0.6 ",
+		"D0.7 "
+	};
+
+	const char *DX1_names[] = {
+		"D1Re0",
+		"D1Ar5",
+		"D1Ar3",
+		"D1Ar1",
+		"D1RtP",
+		"D1.5 ",
+		"D1.6 ",
+		"D1.7 "
+	};
+
+	pr_info(" pt_regs @ %p\n", regs);
+	pr_info(" SaveMask = 0x%04hx\n", regs->ctx.SaveMask);
+	pr_info(" Flags = 0x%04hx (%c%c%c%c)\n", regs->ctx.Flags,
+		regs->ctx.Flags & FLAG_Z ? 'Z' : 'z',
+		regs->ctx.Flags & FLAG_N ? 'N' : 'n',
+		regs->ctx.Flags & FLAG_O ? 'O' : 'o',
+		regs->ctx.Flags & FLAG_C ? 'C' : 'c');
+	pr_info(" TXRPT = 0x%08x\n", regs->ctx.CurrRPT);
+	pr_info(" PC = 0x%08x\n", regs->ctx.CurrPC);
+
+	/* AX regs */
+	for (i = 0; i < 2; i++) {
+		pr_info(" %s = 0x%08x    ",
+			AX0_names[i],
+			regs->ctx.AX[i].U0);
+		printk(" %s = 0x%08x\n",
+			AX1_names[i],
+			regs->ctx.AX[i].U1);
+	}
+
+	if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+		pr_warn(" Extended state present - AX2.[01] will be WRONG\n");
+
+	/* Special place with AXx.2 */
+	pr_info(" A0.2  = 0x%08x    ",
+		regs->ctx.Ext.AX2.U0);
+	printk(" A1.2  = 0x%08x\n",
+		regs->ctx.Ext.AX2.U1);
+
+	/* 'extended' AX regs (nominally, just AXx.3) */
+	for (i = 0; i < (TBICTX_AX_REGS - 3); i++) {
+		pr_info(" A0.%d  = 0x%08x    ", i + 3, regs->ctx.AX3[i].U0);
+		printk(" A1.%d  = 0x%08x\n", i + 3, regs->ctx.AX3[i].U1);
+	}
+
+	for (i = 0; i < 8; i++) {
+		pr_info(" %s = 0x%08x    ", DX0_names[i], regs->ctx.DX[i].U0);
+		printk(" %s = 0x%08x\n", DX1_names[i], regs->ctx.DX[i].U1);
+	}
+
+	show_trace(NULL, (unsigned long *)regs->ctx.AX[0].U0, regs);
+}
+
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+		unsigned long arg, struct task_struct *tsk)
+{
+	struct pt_regs *childregs = task_pt_regs(tsk);
+	void *kernel_context = ((void *) childregs +
+				sizeof(struct pt_regs));
+	unsigned long global_base;
+
+	BUG_ON(((unsigned long)childregs) & 0x7);
+	BUG_ON(((unsigned long)kernel_context) & 0x7);
+
+	memset(&tsk->thread.kernel_context, 0,
+			sizeof(tsk->thread.kernel_context));
+
+	tsk->thread.kernel_context = __TBISwitchInit(kernel_context,
+						     ret_from_fork,
+						     0, 0);
+
+	if (unlikely(tsk->flags & PF_KTHREAD)) {
+		/*
+		 * Make sure we don't leak any kernel data to child's regs
+		 * if kernel thread becomes a userspace thread in the future
+		 */
+		memset(childregs, 0 , sizeof(struct pt_regs));
+
+		global_base = __core_reg_get(A1GbP);
+		childregs->ctx.AX[0].U1 = (unsigned long) global_base;
+		childregs->ctx.AX[0].U0 = (unsigned long) kernel_context;
+		/* Set D1Ar1=arg and D1RtP=usp (fn) */
+		childregs->ctx.DX[4].U1 = usp;
+		childregs->ctx.DX[3].U1 = arg;
+		tsk->thread.int_depth = 2;
+		return 0;
+	}
+	/*
+	 * Get a pointer to where the new child's register block should have
+	 * been pushed.
+	 * The Meta's stack grows upwards, and the context is the the first
+	 * thing to be pushed by TBX (phew)
+	 */
+	*childregs = *current_pt_regs();
+	/* Set the correct stack for the clone mode */
+	if (usp)
+		childregs->ctx.AX[0].U0 = ALIGN(usp, 8);
+	tsk->thread.int_depth = 1;
+
+	/* set return value for child process */
+	childregs->ctx.DX[0].U0 = 0;
+
+	/* The TLS pointer is passed as an argument to sys_clone. */
+	if (clone_flags & CLONE_SETTLS)
+		tsk->thread.tls_ptr =
+				(__force void __user *)childregs->ctx.DX[1].U1;
+
+#ifdef CONFIG_METAG_FPU
+	if (tsk->thread.fpu_context) {
+		struct meta_fpu_context *ctx;
+
+		ctx = kmemdup(tsk->thread.fpu_context,
+			      sizeof(struct meta_fpu_context), GFP_ATOMIC);
+		tsk->thread.fpu_context = ctx;
+	}
+#endif
+
+#ifdef CONFIG_METAG_DSP
+	if (tsk->thread.dsp_context) {
+		struct meta_ext_context *ctx;
+		int i;
+
+		ctx = kmemdup(tsk->thread.dsp_context,
+			      sizeof(struct meta_ext_context), GFP_ATOMIC);
+		for (i = 0; i < 2; i++)
+			ctx->ram[i] = kmemdup(ctx->ram[i], ctx->ram_sz[i],
+					      GFP_ATOMIC);
+		tsk->thread.dsp_context = ctx;
+	}
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_METAG_FPU
+static void alloc_fpu_context(struct thread_struct *thread)
+{
+	thread->fpu_context = kzalloc(sizeof(struct meta_fpu_context),
+				      GFP_ATOMIC);
+}
+
+static void clear_fpu(struct thread_struct *thread)
+{
+	thread->user_flags &= ~TBICTX_FPAC_BIT;
+	kfree(thread->fpu_context);
+	thread->fpu_context = NULL;
+}
+#else
+static void clear_fpu(struct thread_struct *thread)
+{
+}
+#endif
+
+#ifdef CONFIG_METAG_DSP
+static void clear_dsp(struct thread_struct *thread)
+{
+	if (thread->dsp_context) {
+		kfree(thread->dsp_context->ram[0]);
+		kfree(thread->dsp_context->ram[1]);
+
+		kfree(thread->dsp_context);
+
+		thread->dsp_context = NULL;
+	}
+
+	__core_reg_set(D0.8, 0);
+}
+#else
+static void clear_dsp(struct thread_struct *thread)
+{
+}
+#endif
+
+struct task_struct *__sched __switch_to(struct task_struct *prev,
+					struct task_struct *next)
+{
+	TBIRES to, from;
+
+	to.Switch.pCtx = next->thread.kernel_context;
+	to.Switch.pPara = prev;
+
+#ifdef CONFIG_METAG_FPU
+	if (prev->thread.user_flags & TBICTX_FPAC_BIT) {
+		struct pt_regs *regs = task_pt_regs(prev);
+		TBIRES state;
+
+		state.Sig.SaveMask = prev->thread.user_flags;
+		state.Sig.pCtx = &regs->ctx;
+
+		if (!prev->thread.fpu_context)
+			alloc_fpu_context(&prev->thread);
+		if (prev->thread.fpu_context)
+			__TBICtxFPUSave(state, prev->thread.fpu_context);
+	}
+	/*
+	 * Force a restore of the FPU context next time this process is
+	 * scheduled.
+	 */
+	if (prev->thread.fpu_context)
+		prev->thread.fpu_context->needs_restore = true;
+#endif
+
+
+	from = __TBISwitch(to, &prev->thread.kernel_context);
+
+	/* Restore TLS pointer for this process. */
+	set_gateway_tls(current->thread.tls_ptr);
+
+	return (struct task_struct *) from.Switch.pPara;
+}
+
+void flush_thread(void)
+{
+	clear_fpu(&current->thread);
+	clear_dsp(&current->thread);
+}
+
+/*
+ * Free current thread data structures etc.
+ */
+void exit_thread(void)
+{
+	clear_fpu(&current->thread);
+	clear_dsp(&current->thread);
+}
+
+/* TODO: figure out how to unwind the kernel stack here to figure out
+ * where we went to sleep. */
+unsigned long get_wchan(struct task_struct *p)
+{
+	return 0;
+}
+
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+	/* Returning 0 indicates that the FPU state was not stored (as it was
+	 * not in use) */
+	return 0;
+}
+
+#ifdef CONFIG_METAG_USER_TCM
+
+#define ELF_MIN_ALIGN	PAGE_SIZE
+
+#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
+#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
+#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
+
+#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
+
+unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
+			      struct elf_phdr *eppnt, int prot, int type,
+			      unsigned long total_size)
+{
+	unsigned long map_addr, size;
+	unsigned long page_off = ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long raw_size = eppnt->p_filesz + page_off;
+	unsigned long off = eppnt->p_offset - page_off;
+	unsigned int tcm_tag;
+	addr = ELF_PAGESTART(addr);
+	size = ELF_PAGEALIGN(raw_size);
+
+	/* mmap() will return -EINVAL if given a zero size, but a
+	 * segment with zero filesize is perfectly valid */
+	if (!size)
+		return addr;
+
+	tcm_tag = tcm_lookup_tag(addr);
+
+	if (tcm_tag != TCM_INVALID_TAG)
+		type &= ~MAP_FIXED;
+
+	/*
+	* total_size is the size of the ELF (interpreter) image.
+	* The _first_ mmap needs to know the full size, otherwise
+	* randomization might put this image into an overlapping
+	* position with the ELF binary image. (since size < total_size)
+	* So we first map the 'big' image - and unmap the remainder at
+	* the end. (which unmap is needed for ELF images with holes.)
+	*/
+	if (total_size) {
+		total_size = ELF_PAGEALIGN(total_size);
+		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
+		if (!BAD_ADDR(map_addr))
+			vm_munmap(map_addr+size, total_size-size);
+	} else
+		map_addr = vm_mmap(filep, addr, size, prot, type, off);
+
+	if (!BAD_ADDR(map_addr) && tcm_tag != TCM_INVALID_TAG) {
+		struct tcm_allocation *tcm;
+		unsigned long tcm_addr;
+
+		tcm = kmalloc(sizeof(*tcm), GFP_KERNEL);
+		if (!tcm)
+			return -ENOMEM;
+
+		tcm_addr = tcm_alloc(tcm_tag, raw_size);
+		if (tcm_addr != addr) {
+			kfree(tcm);
+			return -ENOMEM;
+		}
+
+		tcm->tag = tcm_tag;
+		tcm->addr = tcm_addr;
+		tcm->size = raw_size;
+
+		list_add(&tcm->list, &current->mm->context.tcm);
+
+		eppnt->p_vaddr = map_addr;
+		if (copy_from_user((void *) addr, (void __user *) map_addr,
+				   raw_size))
+			return -EFAULT;
+	}
+
+	return map_addr;
+}
+#endif
diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
new file mode 100644
index 0000000..47a8828
--- /dev/null
+++ b/arch/metag/kernel/ptrace.c
@@ -0,0 +1,380 @@
+/*
+ *  Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/elf.h>
+#include <linux/uaccess.h>
+#include <trace/syscall.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+/*
+ * user_regset definitions.
+ */
+
+int metag_gp_regs_copyout(const struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf)
+{
+	const void *ptr;
+	unsigned long data;
+	int ret;
+
+	/* D{0-1}.{0-7} */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs->ctx.DX, 0, 4*16);
+	if (ret)
+		goto out;
+	/* A{0-1}.{0-1} */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs->ctx.AX, 4*16, 4*20);
+	if (ret)
+		goto out;
+	/* A{0-1}.2 */
+	if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+		ptr = regs->ctx.Ext.Ctx.pExt;
+	else
+		ptr = &regs->ctx.Ext.AX2;
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  ptr, 4*20, 4*22);
+	if (ret)
+		goto out;
+	/* A{0-1}.3 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->ctx.AX3, 4*22, 4*24);
+	if (ret)
+		goto out;
+	/* PC */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->ctx.CurrPC, 4*24, 4*25);
+	if (ret)
+		goto out;
+	/* TXSTATUS */
+	data = (unsigned long)regs->ctx.Flags;
+	if (regs->ctx.SaveMask & TBICTX_CBUF_BIT)
+		data |= USER_GP_REGS_STATUS_CATCH_BIT;
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &data, 4*25, 4*26);
+	if (ret)
+		goto out;
+	/* TXRPT, TXBPOBITS, TXMODE */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->ctx.CurrRPT, 4*26, 4*29);
+	if (ret)
+		goto out;
+	/* Padding */
+	ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+				       4*29, 4*30);
+out:
+	return ret;
+}
+
+int metag_gp_regs_copyin(struct pt_regs *regs,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	void *ptr;
+	unsigned long data;
+	int ret;
+
+	/* D{0-1}.{0-7} */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs->ctx.DX, 0, 4*16);
+	if (ret)
+		goto out;
+	/* A{0-1}.{0-1} */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs->ctx.AX, 4*16, 4*20);
+	if (ret)
+		goto out;
+	/* A{0-1}.2 */
+	if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+		ptr = regs->ctx.Ext.Ctx.pExt;
+	else
+		ptr = &regs->ctx.Ext.AX2;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 ptr, 4*20, 4*22);
+	if (ret)
+		goto out;
+	/* A{0-1}.3 */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->ctx.AX3, 4*22, 4*24);
+	if (ret)
+		goto out;
+	/* PC */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->ctx.CurrPC, 4*24, 4*25);
+	if (ret)
+		goto out;
+	/* TXSTATUS */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &data, 4*25, 4*26);
+	if (ret)
+		goto out;
+	regs->ctx.Flags = data & 0xffff;
+	if (data & USER_GP_REGS_STATUS_CATCH_BIT)
+		regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBUF_BIT;
+	else
+		regs->ctx.SaveMask &= ~TBICTX_CBUF_BIT;
+	/* TXRPT, TXBPOBITS, TXMODE */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->ctx.CurrRPT, 4*26, 4*29);
+out:
+	return ret;
+}
+
+static int metag_gp_regs_get(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	return metag_gp_regs_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_gp_regs_set(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	return metag_gp_regs_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+int metag_cb_regs_copyout(const struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	/* TXCATCH{0-3} */
+	if (regs->ctx.SaveMask & TBICTX_XCBF_BIT)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  regs->extcb0, 0, 4*4);
+	else
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       0, 4*4);
+	return ret;
+}
+
+int metag_cb_regs_copyin(struct pt_regs *regs,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	/* TXCATCH{0-3} */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs->extcb0, 0, 4*4);
+	return ret;
+}
+
+static int metag_cb_regs_get(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	return metag_cb_regs_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_cb_regs_set(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	return metag_cb_regs_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+int metag_rp_state_copyout(const struct pt_regs *regs,
+			   unsigned int pos, unsigned int count,
+			   void *kbuf, void __user *ubuf)
+{
+	unsigned long mask;
+	u64 *ptr;
+	int ret, i;
+
+	/* Empty read pipeline */
+	if (!(regs->ctx.SaveMask & TBICTX_CBRP_BIT)) {
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       0, 4*13);
+		goto out;
+	}
+
+	mask = (regs->ctx.CurrDIVTIME & TXDIVTIME_RPMASK_BITS) >>
+		TXDIVTIME_RPMASK_S;
+
+	/* Read pipeline entries */
+	ptr = (void *)&regs->extcb0[1];
+	for (i = 0; i < 6; ++i, ++ptr) {
+		if (mask & (1 << i))
+			ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+						  ptr, 8*i, 8*(i + 1));
+		else
+			ret = user_regset_copyout_zero(&pos, &count, &kbuf,
+						       &ubuf, 8*i, 8*(i + 1));
+		if (ret)
+			goto out;
+	}
+	/* Mask of entries */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &mask, 4*12, 4*13);
+out:
+	return ret;
+}
+
+int metag_rp_state_copyin(struct pt_regs *regs,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct user_rp_state rp;
+	unsigned long long *ptr;
+	int ret, i;
+
+	/* Read the entire pipeline before making any changes */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &rp, 0, 4*13);
+	if (ret)
+		goto out;
+
+	/* Write pipeline entries */
+	ptr = (void *)&regs->extcb0[1];
+	for (i = 0; i < 6; ++i, ++ptr)
+		if (rp.mask & (1 << i))
+			*ptr = rp.entries[i];
+
+	/* Update RPMask in TXDIVTIME */
+	regs->ctx.CurrDIVTIME &= ~TXDIVTIME_RPMASK_BITS;
+	regs->ctx.CurrDIVTIME |= (rp.mask << TXDIVTIME_RPMASK_S)
+				 & TXDIVTIME_RPMASK_BITS;
+
+	/* Set/clear flags to indicate catch/read pipeline state */
+	if (rp.mask)
+		regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBRP_BIT;
+	else
+		regs->ctx.SaveMask &= ~TBICTX_CBRP_BIT;
+out:
+	return ret;
+}
+
+static int metag_rp_state_get(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	return metag_rp_state_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_rp_state_set(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+enum metag_regset {
+	REGSET_GENERAL,
+	REGSET_CBUF,
+	REGSET_READPIPE,
+};
+
+static const struct user_regset metag_regsets[] = {
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(long),
+		.align = sizeof(long long),
+		.get = metag_gp_regs_get,
+		.set = metag_gp_regs_set,
+	},
+	[REGSET_CBUF] = {
+		.core_note_type = NT_METAG_CBUF,
+		.n = sizeof(struct user_cb_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long long),
+		.get = metag_cb_regs_get,
+		.set = metag_cb_regs_set,
+	},
+	[REGSET_READPIPE] = {
+		.core_note_type = NT_METAG_RPIPE,
+		.n = sizeof(struct user_rp_state) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long long),
+		.get = metag_rp_state_get,
+		.set = metag_rp_state_set,
+	},
+};
+
+static const struct user_regset_view user_metag_view = {
+	.name = "metag",
+	.e_machine = EM_METAG,
+	.regsets = metag_regsets,
+	.n = ARRAY_SIZE(metag_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_metag_view;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	/* nothing to do.. */
+}
+
+long arch_ptrace(struct task_struct *child, long request, unsigned long addr,
+		 unsigned long data)
+{
+	int ret;
+
+	switch (request) {
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+int syscall_trace_enter(struct pt_regs *regs)
+{
+	int ret = 0;
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		ret = tracehook_report_syscall_entry(regs);
+
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_enter(regs, regs->ctx.DX[0].U1);
+
+	return ret ? -1 : regs->ctx.DX[0].U1;
+}
+
+void syscall_trace_leave(struct pt_regs *regs)
+{
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_exit(regs, regs->ctx.DX[0].U1);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(regs, 0);
+}
diff --git a/arch/metag/kernel/setup.c b/arch/metag/kernel/setup.c
new file mode 100644
index 0000000..8792461
--- /dev/null
+++ b/arch/metag/kernel/setup.c
@@ -0,0 +1,631 @@
+/*
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * This file contains the architecture-dependant parts of system setup.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/of_fdt.h>
+#include <linux/pfn.h>
+#include <linux/root_dev.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/start_kernel.h>
+#include <linux/string.h>
+
+#include <asm/cachepart.h>
+#include <asm/clock.h>
+#include <asm/core_reg.h>
+#include <asm/cpu.h>
+#include <asm/da.h>
+#include <asm/highmem.h>
+#include <asm/hwthread.h>
+#include <asm/l2cache.h>
+#include <asm/mach/arch.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+#include <asm/mmu.h>
+#include <asm/mmzone.h>
+#include <asm/processor.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/traps.h>
+
+/* Priv protect as many registers as possible. */
+#define DEFAULT_PRIV	(TXPRIVEXT_COPRO_BITS		| \
+			 TXPRIVEXT_TXTRIGGER_BIT	| \
+			 TXPRIVEXT_TXGBLCREG_BIT	| \
+			 TXPRIVEXT_ILOCK_BIT		| \
+			 TXPRIVEXT_TXITACCYC_BIT	| \
+			 TXPRIVEXT_TXDIVTIME_BIT	| \
+			 TXPRIVEXT_TXAMAREGX_BIT	| \
+			 TXPRIVEXT_TXTIMERI_BIT		| \
+			 TXPRIVEXT_TXSTATUS_BIT		| \
+			 TXPRIVEXT_TXDISABLE_BIT)
+
+/* Meta2 specific bits. */
+#ifdef CONFIG_METAG_META12
+#define META2_PRIV	0
+#else
+#define META2_PRIV	(TXPRIVEXT_TXTIMER_BIT		| \
+			 TXPRIVEXT_TRACE_BIT)
+#endif
+
+/* Unaligned access checking bits. */
+#ifdef CONFIG_METAG_UNALIGNED
+#define UNALIGNED_PRIV	TXPRIVEXT_ALIGNREW_BIT
+#else
+#define UNALIGNED_PRIV	0
+#endif
+
+#define PRIV_BITS 	(DEFAULT_PRIV			| \
+			 META2_PRIV			| \
+			 UNALIGNED_PRIV)
+
+/*
+ * Protect access to:
+ * 0x06000000-0x07ffffff Direct mapped region
+ * 0x05000000-0x05ffffff MMU table region (Meta1)
+ * 0x04400000-0x047fffff Cache flush region
+ * 0x84000000-0x87ffffff Core cache memory region (Meta2)
+ *
+ * Allow access to:
+ * 0x80000000-0x81ffffff Core code memory region (Meta2)
+ */
+#ifdef CONFIG_METAG_META12
+#define PRIVSYSR_BITS	TXPRIVSYSR_ALL_BITS
+#else
+#define PRIVSYSR_BITS	(TXPRIVSYSR_ALL_BITS & ~TXPRIVSYSR_CORECODE_BIT)
+#endif
+
+/* Protect all 0x02xxxxxx and 0x048xxxxx. */
+#define PIOREG_BITS	0xffffffff
+
+/*
+ * Protect all 0x04000xx0 (system events)
+ * except write combiner flush and write fence (system events 4 and 5).
+ */
+#define PSYREG_BITS	0xfffffffb
+
+
+extern char _heap_start[];
+
+#ifdef CONFIG_METAG_BUILTIN_DTB
+extern u32 __dtb_start[];
+#endif
+
+#ifdef CONFIG_DA_CONSOLE
+/* Our early channel based console driver */
+extern struct console dash_console;
+#endif
+
+struct machine_desc *machine_desc __initdata;
+
+/*
+ * Map a Linux CPU number to a hardware thread ID
+ * In SMP this will be setup with the correct mapping at startup; in UP this
+ * will map to the HW thread on which we are running.
+ */
+u8 cpu_2_hwthread_id[NR_CPUS] __read_mostly = {
+	[0 ... NR_CPUS-1] = BAD_HWTHREAD_ID
+};
+
+/*
+ * Map a hardware thread ID to a Linux CPU number
+ * In SMP this will be fleshed out with the correct CPU ID for a particular
+ * hardware thread. In UP this will be initialised with the boot CPU ID.
+ */
+u8 hwthread_id_2_cpu[4] __read_mostly = {
+	[0 ... 3] = BAD_CPU_ID
+};
+
+/* The relative offset of the MMU mapped memory (from ldlk or bootloader)
+ * to the real physical memory.  This is needed as we have to use the
+ * physical addresses in the MMU tables (pte entries), and not the virtual
+ * addresses.
+ * This variable is used in the __pa() and __va() macros, and should
+ * probably only be used via them.
+ */
+unsigned int meta_memoffset;
+EXPORT_SYMBOL(meta_memoffset);
+
+static char __initdata *original_cmd_line;
+
+DEFINE_PER_CPU(PTBI, pTBI);
+
+/*
+ * Mapping are specified as "CPU_ID:HWTHREAD_ID", e.g.
+ *
+ *	"hwthread_map=0:1,1:2,2:3,3:0"
+ *
+ *	Linux CPU ID	HWTHREAD_ID
+ *	---------------------------
+ *	    0		      1
+ *	    1		      2
+ *	    2		      3
+ *	    3		      0
+ */
+static int __init parse_hwthread_map(char *p)
+{
+	int cpu;
+
+	while (*p) {
+		cpu = (*p++) - '0';
+		if (cpu < 0 || cpu > 9)
+			goto err_cpu;
+
+		p++;		/* skip semi-colon */
+		cpu_2_hwthread_id[cpu] = (*p++) - '0';
+		if (cpu_2_hwthread_id[cpu] >= 4)
+			goto err_thread;
+		hwthread_id_2_cpu[cpu_2_hwthread_id[cpu]] = cpu;
+
+		if (*p == ',')
+			p++;		/* skip comma */
+	}
+
+	return 0;
+err_cpu:
+	pr_err("%s: hwthread_map cpu argument out of range\n", __func__);
+	return -EINVAL;
+err_thread:
+	pr_err("%s: hwthread_map thread argument out of range\n", __func__);
+	return -EINVAL;
+}
+early_param("hwthread_map", parse_hwthread_map);
+
+void __init dump_machine_table(void)
+{
+	struct machine_desc *p;
+	const char **compat;
+
+	pr_info("Available machine support:\n\tNAME\t\tCOMPATIBLE LIST\n");
+	for_each_machine_desc(p) {
+		pr_info("\t%s\t[", p->name);
+		for (compat = p->dt_compat; compat && *compat; ++compat)
+			printk(" '%s'", *compat);
+		printk(" ]\n");
+	}
+
+	pr_info("\nPlease check your kernel config and/or bootloader.\n");
+
+	hard_processor_halt(HALT_PANIC);
+}
+
+#ifdef CONFIG_METAG_HALT_ON_PANIC
+static int metag_panic_event(struct notifier_block *this, unsigned long event,
+			     void *ptr)
+{
+	hard_processor_halt(HALT_PANIC);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block metag_panic_block = {
+	metag_panic_event,
+	NULL,
+	0
+};
+#endif
+
+void __init setup_arch(char **cmdline_p)
+{
+	unsigned long start_pfn;
+	unsigned long text_start = (unsigned long)(&_stext);
+	unsigned long cpu = smp_processor_id();
+	unsigned long heap_start, heap_end;
+	unsigned long start_pte;
+	PTBI _pTBI;
+	PTBISEG p_heap;
+	int heap_id, i;
+
+	metag_cache_probe();
+
+	metag_da_probe();
+#ifdef CONFIG_DA_CONSOLE
+	if (metag_da_enabled()) {
+		/* An early channel based console driver */
+		register_console(&dash_console);
+		add_preferred_console("ttyDA", 1, NULL);
+	}
+#endif
+
+	/* try interpreting the argument as a device tree */
+	machine_desc = setup_machine_fdt(original_cmd_line);
+	/* if it doesn't look like a device tree it must be a command line */
+	if (!machine_desc) {
+#ifdef CONFIG_METAG_BUILTIN_DTB
+		/* try the embedded device tree */
+		machine_desc = setup_machine_fdt(__dtb_start);
+		if (!machine_desc)
+			panic("Invalid embedded device tree.");
+#else
+		/* use the default machine description */
+		machine_desc = default_machine_desc();
+#endif
+#ifndef CONFIG_CMDLINE_FORCE
+		/* append the bootloader cmdline to any builtin fdt cmdline */
+		if (boot_command_line[0] && original_cmd_line[0])
+			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+		strlcat(boot_command_line, original_cmd_line,
+			COMMAND_LINE_SIZE);
+#endif
+	}
+	setup_meta_clocks(machine_desc->clocks);
+
+	*cmdline_p = boot_command_line;
+	parse_early_param();
+
+	/*
+	 * Make sure we don't alias in dcache or icache
+	 */
+	check_for_cache_aliasing(cpu);
+
+
+#ifdef CONFIG_METAG_HALT_ON_PANIC
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &metag_panic_block);
+#endif
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	if (!(__core_reg_get(TXSTATUS) & TXSTATUS_PSTAT_BIT))
+		panic("Privilege must be enabled for this thread.");
+
+	_pTBI = __TBI(TBID_ISTAT_BIT);
+
+	per_cpu(pTBI, cpu) = _pTBI;
+
+	if (!per_cpu(pTBI, cpu))
+		panic("No TBI found!");
+
+	/*
+	 * Initialize all interrupt vectors to our copy of __TBIUnExpXXX,
+	 * rather than the version from the bootloader. This makes call
+	 * stacks easier to understand and may allow us to unmap the
+	 * bootloader at some point.
+	 *
+	 * We need to keep the LWK handler that TBI installed in order to
+	 * be able to do inter-thread comms.
+	 */
+	for (i = 0; i <= TBID_SIGNUM_MAX; i++)
+		if (i != TBID_SIGNUM_LWK)
+			_pTBI->fnSigs[i] = __TBIUnExpXXX;
+
+	/* A Meta requirement is that the kernel is loaded (virtually)
+	 * at the PAGE_OFFSET.
+	 */
+	if (PAGE_OFFSET != text_start)
+		panic("Kernel not loaded at PAGE_OFFSET (%#x) but at %#lx.",
+		      PAGE_OFFSET, text_start);
+
+	start_pte = mmu_read_second_level_page(text_start);
+
+	/*
+	 * Kernel pages should have the PRIV bit set by the bootloader.
+	 */
+	if (!(start_pte & _PAGE_KERNEL))
+		panic("kernel pte does not have PRIV set");
+
+	/*
+	 * See __pa and __va in include/asm/page.h.
+	 * This value is negative when running in local space but the
+	 * calculations work anyway.
+	 */
+	meta_memoffset = text_start - (start_pte & PAGE_MASK);
+
+	/* Now lets look at the heap space */
+	heap_id = (__TBIThreadId() & TBID_THREAD_BITS)
+		+ TBID_SEG(0, TBID_SEGSCOPE_LOCAL, TBID_SEGTYPE_HEAP);
+
+	p_heap = __TBIFindSeg(NULL, heap_id);
+
+	if (!p_heap)
+		panic("Could not find heap from TBI!");
+
+	/* The heap begins at the first full page after the kernel data. */
+	heap_start = (unsigned long) &_heap_start;
+
+	/* The heap ends at the end of the heap segment specified with
+	 * ldlk.
+	 */
+	if (is_global_space(text_start)) {
+		pr_debug("WARNING: running in global space!\n");
+		heap_end = (unsigned long)p_heap->pGAddr + p_heap->Bytes;
+	} else {
+		heap_end = (unsigned long)p_heap->pLAddr + p_heap->Bytes;
+	}
+
+	ROOT_DEV = Root_RAM0;
+
+	/* init_mm is the mm struct used for the first task.  It is then
+	 * cloned for all other tasks spawned from that task.
+	 *
+	 * Note - we are using the virtual addresses here.
+	 */
+	init_mm.start_code = (unsigned long)(&_stext);
+	init_mm.end_code = (unsigned long)(&_etext);
+	init_mm.end_data = (unsigned long)(&_edata);
+	init_mm.brk = (unsigned long)heap_start;
+
+	min_low_pfn = PFN_UP(__pa(text_start));
+	max_low_pfn = PFN_DOWN(__pa(heap_end));
+
+	pfn_base = min_low_pfn;
+
+	/* Round max_pfn up to a 4Mb boundary. The free_bootmem_node()
+	 * call later makes sure to keep the rounded up pages marked reserved.
+	 */
+	max_pfn = max_low_pfn + ((1 << MAX_ORDER) - 1);
+	max_pfn &= ~((1 << MAX_ORDER) - 1);
+
+	start_pfn = PFN_UP(__pa(heap_start));
+
+	if (min_low_pfn & ((1 << MAX_ORDER) - 1)) {
+		/* Theoretically, we could expand the space that the
+		 * bootmem allocator covers - much as we do for the
+		 * 'high' address, and then tell the bootmem system
+		 * that the lowest chunk is 'not available'.  Right
+		 * now it is just much easier to constrain the
+		 * user to always MAX_ORDER align their kernel space.
+		 */
+
+		panic("Kernel must be %d byte aligned, currently at %#lx.",
+		      1 << (MAX_ORDER + PAGE_SHIFT),
+		      min_low_pfn << PAGE_SHIFT);
+	}
+
+#ifdef CONFIG_HIGHMEM
+	highstart_pfn = highend_pfn = max_pfn;
+	high_memory = (void *) __va(PFN_PHYS(highstart_pfn));
+#else
+	high_memory = (void *)__va(PFN_PHYS(max_pfn));
+#endif
+
+	paging_init(heap_end);
+
+	setup_priv();
+
+	/* Setup the boot cpu's mapping. The rest will be setup below. */
+	cpu_2_hwthread_id[smp_processor_id()] = hard_processor_id();
+	hwthread_id_2_cpu[hard_processor_id()] = smp_processor_id();
+
+	/* Copy device tree blob into non-init memory before unflattening */
+	copy_fdt();
+	unflatten_device_tree();
+
+#ifdef CONFIG_SMP
+	smp_init_cpus();
+#endif
+
+	if (machine_desc->init_early)
+		machine_desc->init_early();
+}
+
+static int __init customize_machine(void)
+{
+	/* customizes platform devices, or adds new ones */
+	if (machine_desc->init_machine)
+		machine_desc->init_machine();
+	return 0;
+}
+arch_initcall(customize_machine);
+
+static int __init init_machine_late(void)
+{
+	if (machine_desc->init_late)
+		machine_desc->init_late();
+	return 0;
+}
+late_initcall(init_machine_late);
+
+#ifdef CONFIG_PROC_FS
+/*
+ *	Get CPU information for use by the procfs.
+ */
+static const char *get_cpu_capabilities(unsigned int txenable)
+{
+#ifdef CONFIG_METAG_META21
+	/* See CORE_ID in META HTP.GP TRM - Architecture Overview 2.1.238 */
+	int coreid = metag_in32(METAC_CORE_ID);
+	unsigned int dsp_type = (coreid >> 3) & 7;
+	unsigned int fpu_type = (coreid >> 7) & 3;
+
+	switch (dsp_type | fpu_type << 3) {
+	case (0x00): return "EDSP";
+	case (0x01): return "DSP";
+	case (0x08): return "EDSP+LFPU";
+	case (0x09): return "DSP+LFPU";
+	case (0x10): return "EDSP+FPU";
+	case (0x11): return "DSP+FPU";
+	}
+	return "UNKNOWN";
+
+#else
+	if (!(txenable & TXENABLE_CLASS_BITS))
+		return "DSP";
+	else
+		return "";
+#endif
+}
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	const char *cpu;
+	unsigned int txenable, thread_id, major, minor;
+	unsigned long clockfreq = get_coreclock();
+#ifdef CONFIG_SMP
+	int i;
+	unsigned long lpj;
+#endif
+
+	cpu = "META";
+
+	txenable = __core_reg_get(TXENABLE);
+	major = (txenable & TXENABLE_MAJOR_REV_BITS) >> TXENABLE_MAJOR_REV_S;
+	minor = (txenable & TXENABLE_MINOR_REV_BITS) >> TXENABLE_MINOR_REV_S;
+	thread_id = (txenable >> 8) & 0x3;
+
+#ifdef CONFIG_SMP
+	for_each_online_cpu(i) {
+		lpj = per_cpu(cpu_data, i).loops_per_jiffy;
+		txenable = core_reg_read(TXUCT_ID, TXENABLE_REGNUM,
+							cpu_2_hwthread_id[i]);
+
+		seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n"
+			      "Clocking:\t%lu.%1luMHz\n"
+			      "BogoMips:\t%lu.%02lu\n"
+			      "Calibration:\t%lu loops\n"
+			      "Capabilities:\t%s\n\n",
+			      cpu, major, minor, i,
+			      clockfreq / 1000000, (clockfreq / 100000) % 10,
+			      lpj / (500000 / HZ), (lpj / (5000 / HZ)) % 100,
+			      lpj,
+			      get_cpu_capabilities(txenable));
+	}
+#else
+	seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n"
+		   "Clocking:\t%lu.%1luMHz\n"
+		   "BogoMips:\t%lu.%02lu\n"
+		   "Calibration:\t%lu loops\n"
+		   "Capabilities:\t%s\n",
+		   cpu, major, minor, thread_id,
+		   clockfreq / 1000000, (clockfreq / 100000) % 10,