diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 07a56ff614940d2c4a38002a28b76c69570bb0ef..4a2af55e134bcdc2cb07f01fc45d85458c673d94 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -31,8 +31,3 @@ head-y			:= head.o
 obj-$(CONFIG_DEBUG_LL)	+= debug.o
 
 extra-y := $(head-y) init_task.o vmlinux.lds
-
-# Spell out some dependencies that aren't automatically figured out
-$(obj)/entry-armv.o: 	$(obj)/entry-header.S include/asm-arm/constants.h
-$(obj)/entry-common.o: 	$(obj)/entry-header.S include/asm-arm/constants.h \
-			$(obj)/calls.S
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 99d43259ff8917a4e6cd8c6f7b1a7e7761c5e0e3..c1ff4d1f1bfde619616141830a46d3a3cfe4bdc0 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -64,6 +64,26 @@ int main(void)
   DEFINE(TI_VFPSTATE,		offsetof(struct thread_info, vfpstate));
   DEFINE(TI_IWMMXT_STATE,	(offsetof(struct thread_info, fpstate)+4)&~7);
   BLANK();
+  DEFINE(S_R0,			offsetof(struct pt_regs, ARM_r0));
+  DEFINE(S_R1,			offsetof(struct pt_regs, ARM_r1));
+  DEFINE(S_R2,			offsetof(struct pt_regs, ARM_r2));
+  DEFINE(S_R3,			offsetof(struct pt_regs, ARM_r3));
+  DEFINE(S_R4,			offsetof(struct pt_regs, ARM_r4));
+  DEFINE(S_R5,			offsetof(struct pt_regs, ARM_r5));
+  DEFINE(S_R6,			offsetof(struct pt_regs, ARM_r6));
+  DEFINE(S_R7,			offsetof(struct pt_regs, ARM_r7));
+  DEFINE(S_R8,			offsetof(struct pt_regs, ARM_r8));
+  DEFINE(S_R9,			offsetof(struct pt_regs, ARM_r9));
+  DEFINE(S_R10,			offsetof(struct pt_regs, ARM_r10));
+  DEFINE(S_FP,			offsetof(struct pt_regs, ARM_fp));
+  DEFINE(S_IP,			offsetof(struct pt_regs, ARM_ip));
+  DEFINE(S_SP,			offsetof(struct pt_regs, ARM_sp));
+  DEFINE(S_LR,			offsetof(struct pt_regs, ARM_lr));
+  DEFINE(S_PC,			offsetof(struct pt_regs, ARM_pc));
+  DEFINE(S_PSR,			offsetof(struct pt_regs, ARM_cpsr));
+  DEFINE(S_OLD_R0,		offsetof(struct pt_regs, ARM_ORIG_r0));
+  DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
+  BLANK();
 #if __LINUX_ARM_ARCH__ >= 6
   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id));
   BLANK();
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index bb27c317d94bab88f4578a0c6be7981d15abae65..2a5c3fe09a95484a0d0844454e5f332686d96750 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -14,12 +14,12 @@
  *  it to save wrong values...  Be aware!
  */
 #include <linux/config.h>
-#include <linux/init.h>
 
-#include <asm/thread_info.h>
 #include <asm/glue.h>
-#include <asm/ptrace.h>
 #include <asm/vfpmacros.h>
+#include <asm/hardware.h>		@ should be moved into entry-macro.S
+#include <asm/arch/irqs.h>		@ should be moved into entry-macro.S
+#include <asm/arch/entry-macro.S>
 
 #include "entry-header.S"
 
@@ -118,7 +118,7 @@ __dabt_svc:
 	@
 	@ IRQs off again before pulling preserved data off the stack
 	@
-	disable_irq r0
+	disable_irq
 
 	@
 	@ restore SPSR and restart the instruction
@@ -198,7 +198,7 @@ __und_svc:
 	@
 	@ IRQs off again before pulling preserved data off the stack
 	@
-1:	disable_irq r0
+1:	disable_irq
 
 	@
 	@ restore SPSR and restart the instruction
@@ -232,7 +232,7 @@ __pabt_svc:
 	@
 	@ IRQs off again before pulling preserved data off the stack
 	@
-	disable_irq r0
+	disable_irq
 
 	@
 	@ restore SPSR and restart the instruction
@@ -316,7 +316,7 @@ __dabt_usr:
 	@
 	@ IRQs on, then call the main handler
 	@
-	enable_irq r2
+	enable_irq
 	mov	r2, sp
 	adr	lr, ret_from_exception
 	b	do_DataAbort
@@ -418,7 +418,7 @@ call_fpe:
 	movcss	r7, r5, lsr #(TIF_USING_IWMMXT + 1)
 	bcs	iwmmxt_task_enable
 #endif
-	enable_irq r7
+	enable_irq
 	add	pc, pc, r8, lsr #6
 	mov	r0, r0
 
@@ -472,7 +472,7 @@ fpundefinstr:
 __pabt_usr:
 	usr_entry abt
 
-	enable_irq r0				@ Enable interrupts
+	enable_irq				@ Enable interrupts
 	mov	r0, r2				@ address (pc)
 	mov	r1, sp				@ regs
 	bl	do_PrefetchAbort		@ call abort handler
@@ -522,8 +522,9 @@ ENTRY(__switch_to)
 /*
  * Vector stubs.
  *
- * This code is copied to 0x200 or 0xffff0200 so we can use branches in the
- * vectors, rather than ldr's.
+ * This code is copied to 0xffff0200 so we can use branches in the
+ * vectors, rather than ldr's.  Note that this code must not
+ * exceed 0x300 bytes.
  *
  * Common stub entry macro:
  *   Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
@@ -544,7 +545,7 @@ vector_\name:
 	@
 	mrs	r13, cpsr
 	bic	r13, r13, #MODE_MASK
-	orr	r13, r13, #MODE_SVC
+	orr	r13, r13, #SVC_MODE
 	msr	spsr_cxsf, r13			@ switch to SVC_32 mode
 
 	and	lr, lr, #15
@@ -552,6 +553,7 @@ vector_\name:
 	movs	pc, lr				@ Changes mode and branches
 	.endm
 
+	.globl	__stubs_start
 __stubs_start:
 /*
  * Interrupt dispatcher
@@ -686,37 +688,24 @@ vector_addrexcptn:
 .LCsabt:
 	.word	__temp_abt
 
+	.globl	__stubs_end
 __stubs_end:
 
-	.equ	__real_stubs_start, .LCvectors + 0x200
+	.equ	stubs_offset, __vectors_start + 0x200 - __stubs_start
 
-.LCvectors:
+	.globl	__vectors_start
+__vectors_start:
 	swi	SYS_ERROR0
-	b	__real_stubs_start + (vector_und - __stubs_start)
-	ldr	pc, __real_stubs_start + (.LCvswi - __stubs_start)
-	b	__real_stubs_start + (vector_pabt - __stubs_start)
-	b	__real_stubs_start + (vector_dabt - __stubs_start)
-	b	__real_stubs_start + (vector_addrexcptn - __stubs_start)
-	b	__real_stubs_start + (vector_irq - __stubs_start)
-	b	__real_stubs_start + (vector_fiq - __stubs_start)
-
-ENTRY(__trap_init)
-	stmfd	sp!, {r4 - r6, lr}
-
-	mov	r0, #0xff000000
-	orr	r0, r0, #0x00ff0000		@ high vectors position
-	adr	r1, .LCvectors			@ set up the vectors
-	ldmia	r1, {r1, r2, r3, r4, r5, r6, ip, lr}
-	stmia	r0, {r1, r2, r3, r4, r5, r6, ip, lr}
-
-	add	r2, r0, #0x200
-	adr	r0, __stubs_start		@ copy stubs to 0x200
-	adr	r1, __stubs_end
-1:	ldr	r3, [r0], #4
-	str	r3, [r2], #4
-	cmp	r0, r1
-	blt	1b
-	LOADREGS(fd, sp!, {r4 - r6, pc})
+	b	vector_und + stubs_offset
+	ldr	pc, .LCvswi + stubs_offset
+	b	vector_pabt + stubs_offset
+	b	vector_dabt + stubs_offset
+	b	vector_addrexcptn + stubs_offset
+	b	vector_irq + stubs_offset
+	b	vector_fiq + stubs_offset
+
+	.globl	__vectors_end
+__vectors_end:
 
 	.data
 
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 53a7e0dea44d30281c716f2a6a0cafbe85ac79cb..3f8d0e3aefabf71c6bf82783024a72c39988974f 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -9,19 +9,10 @@
  */
 #include <linux/config.h>
 
-#include <asm/thread_info.h>
-#include <asm/ptrace.h>
 #include <asm/unistd.h>
 
 #include "entry-header.S"
 
-/* 
- * We rely on the fact that R0 is at the bottom of the stack (due to
- * slow/fast restore user regs).
- */
-#if S_R0 != 0
-#error "Please fix"
-#endif
 
 	.align	5
 /*
@@ -30,11 +21,19 @@
  * stack.
  */
 ret_fast_syscall:
-	disable_irq r1				@ disable interrupts
+	disable_irq				@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
 	bne	fast_work_pending
-	fast_restore_user_regs
+
+	@ fast_restore_user_regs
+	ldr	r1, [sp, #S_OFF + S_PSR]	@ get calling cpsr
+	ldr	lr, [sp, #S_OFF + S_PC]!	@ get pc
+	msr	spsr_cxsf, r1			@ save in spsr_svc
+	ldmdb	sp, {r1 - lr}^			@ get calling r1 - lr
+	mov	r0, r0
+	add	sp, sp, #S_FRAME_SIZE - S_PC
+	movs	pc, lr				@ return & move spsr_svc into cpsr
 
 /*
  * Ok, we need to do extra processing, enter the slow path.
@@ -49,7 +48,7 @@ work_pending:
 	mov	r0, sp				@ 'regs'
 	mov	r2, why				@ 'syscall'
 	bl	do_notify_resume
-	disable_irq r1				@ disable interrupts
+	disable_irq				@ disable interrupts
 	b	no_work_pending
 
 work_resched:
@@ -59,12 +58,19 @@ work_resched:
  */
 ENTRY(ret_to_user)
 ret_slow_syscall:
-	disable_irq r1				@ disable interrupts
+	disable_irq				@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
 	bne	work_pending
 no_work_pending:
-	slow_restore_user_regs
+	@ slow_restore_user_regs
+	ldr	r1, [sp, #S_PSR]		@ get calling cpsr
+	ldr	lr, [sp, #S_PC]!		@ get pc
+	msr	spsr_cxsf, r1			@ save in spsr_svc
+	ldmdb	sp, {r0 - lr}^			@ get calling r1 - lr
+	mov	r0, r0
+	add	sp, sp, #S_FRAME_SIZE - S_PC
+	movs	pc, lr				@ return & move spsr_svc into cpsr
 
 /*
  * This is how we return from a fork.
@@ -116,9 +122,26 @@ ENTRY(ret_from_fork)
 
 	.align	5
 ENTRY(vector_swi)
-	save_user_regs
+	sub	sp, sp, #S_FRAME_SIZE
+	stmia	sp, {r0 - r12}			@ Calling r0 - r12
+	add	r8, sp, #S_PC
+	stmdb	r8, {sp, lr}^			@ Calling sp, lr
+	mrs	r8, spsr			@ called from non-FIQ mode, so ok.
+	str	lr, [sp, #S_PC]			@ Save calling PC
+	str	r8, [sp, #S_PSR]		@ Save CPSR
+	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
 	zero_fp
-	get_scno
+
+	/*
+	 * Get the system call number.
+	 */
+#ifdef CONFIG_ARM_THUMB
+	tst	r8, #PSR_T_BIT			@ this is SPSR from save_user_regs
+	addne	scno, r7, #__NR_SYSCALL_BASE	@ put OS number in
+	ldreq	scno, [lr, #-4]
+#else
+	ldr	scno, [lr, #-4]			@ get SWI instruction
+#endif
 	arm710_bug_check scno, ip
 
 #ifdef CONFIG_ALIGNMENT_TRAP
@@ -126,14 +149,14 @@ ENTRY(vector_swi)
 	ldr	ip, [ip]
 	mcr	p15, 0, ip, c1, c0		@ update control register
 #endif
-	enable_irq ip
+	enable_irq
 
 	str	r4, [sp, #-S_OFF]!		@ push fifth arg
 
 	get_thread_info tsk
 	ldr	ip, [tsk, #TI_FLAGS]		@ check for syscall tracing
 	bic	scno, scno, #0xff000000		@ mask off SWI op-code
-	eor	scno, scno, #OS_NUMBER << 20	@ check OS number
+	eor	scno, scno, #__NR_SYSCALL_BASE	@ check OS number
 	adr	tbl, sys_call_table		@ load syscall table pointer
 	tst	ip, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
 	bne	__sys_trace
@@ -144,8 +167,8 @@ ENTRY(vector_swi)
 
 	add	r1, sp, #S_OFF
 2:	mov	why, #0				@ no longer a real syscall
-	cmp	scno, #ARMSWI_OFFSET
-	eor	r0, scno, #OS_NUMBER << 20	@ put OS number back
+	cmp	scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
+	eor	r0, scno, #__NR_SYSCALL_BASE	@ put OS number back
 	bcs	arm_syscall	
 	b	sys_ni_syscall			@ not private func
 
@@ -190,7 +213,7 @@ ENTRY(sys_call_table)
 @ r5 = syscall table
 		.type	sys_syscall, #function
 sys_syscall:
-		eor	scno, r0, #OS_NUMBER << 20
+		eor	scno, r0, #__NR_SYSCALL_BASE
 		cmp	scno, #__NR_syscall - __NR_SYSCALL_BASE
 		cmpne	scno, #NR_syscalls	@ check range
 		stmloia	sp, {r5, r6}		@ shuffle args
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 4039d8c120b57446a02687993f8aed6721a5bb81..a3d40a0e2b0479032a6d2f7126d64fb2ea626215 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -1,24 +1,11 @@
-#include <linux/config.h> /* for CONFIG_ARCH_xxxx */
+#include <linux/config.h>
+#include <linux/init.h>
 #include <linux/linkage.h>
 
 #include <asm/assembler.h>
 #include <asm/constants.h>
 #include <asm/errno.h>
-#include <asm/hardware.h>
-#include <asm/arch/irqs.h>
-#include <asm/arch/entry-macro.S>
-
-#ifndef MODE_SVC
-#define MODE_SVC 0x13
-#endif
-
-	.macro	zero_fp
-#ifdef CONFIG_FRAME_POINTER
-	mov	fp, #0
-#endif
-	.endm
-
-	.text
+#include <asm/thread_info.h>
 
 @ Bad Abort numbers
 @ -----------------
@@ -29,113 +16,44 @@
 #define BAD_IRQ		3
 #define BAD_UNDEFINSTR	4
 
-#define PT_TRACESYS	0x00000002
-
-@ OS version number used in SWIs
-@  RISC OS is 0
-@  RISC iX is 8
 @
-#define OS_NUMBER	9
-#define ARMSWI_OFFSET	0x000f0000
-
+@ Most of the stack format comes from struct pt_regs, but with
+@ the addition of 8 bytes for storing syscall args 5 and 6.
 @
-@ Stack format (ensured by USER_* and SVC_*)
-@
-#define S_FRAME_SIZE	72
-#define S_OLD_R0	68
-#define S_PSR		64
-
-#define S_PC		60
-#define S_LR		56
-#define S_SP		52
-#define S_IP		48
-#define S_FP		44
-#define S_R10		40
-#define S_R9		36
-#define S_R8		32
-#define S_R7		28
-#define S_R6		24
-#define S_R5		20
-#define S_R4		16
-#define S_R3		12
-#define S_R2		8
-#define S_R1		4
-#define S_R0		0
 #define S_OFF		8
 
-	.macro	set_cpsr_c, reg, mode
-	msr	cpsr_c, \mode
+/* 
+ * The SWI code relies on the fact that R0 is at the bottom of the stack
+ * (due to slow/fast restore user regs).
+ */
+#if S_R0 != 0
+#error "Please fix"
+#endif
+
+	.macro	zero_fp
+#ifdef CONFIG_FRAME_POINTER
+	mov	fp, #0
+#endif
 	.endm
 
 #if __LINUX_ARM_ARCH__ >= 6
-	.macro	disable_irq, temp
+	.macro	disable_irq
 	cpsid	i
 	.endm
 
-	.macro	enable_irq, temp
+	.macro	enable_irq
 	cpsie	i
 	.endm
 #else
-	.macro	disable_irq, temp
-	set_cpsr_c \temp, #PSR_I_BIT | MODE_SVC
+	.macro	disable_irq
+	msr	cpsr_c, #PSR_I_BIT | SVC_MODE
 	.endm
 
-	.macro	enable_irq, temp
-	set_cpsr_c \temp, #MODE_SVC
+	.macro	enable_irq
+	msr	cpsr_c, #SVC_MODE
 	.endm
 #endif
 
-	.macro	save_user_regs
-	sub	sp, sp, #S_FRAME_SIZE
-	stmia	sp, {r0 - r12}			@ Calling r0 - r12
-	add	r8, sp, #S_PC
-	stmdb	r8, {sp, lr}^			@ Calling sp, lr
-	mrs	r8, spsr			@ called from non-FIQ mode, so ok.
-	str	lr, [sp, #S_PC]			@ Save calling PC
-	str	r8, [sp, #S_PSR]		@ Save CPSR
-	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
-	.endm
-
-	.macro	restore_user_regs
-	ldr	r1, [sp, #S_PSR]		@ Get calling cpsr
-	disable_irq ip				@ disable IRQs
-	ldr	lr, [sp, #S_PC]!		@ Get PC
-	msr	spsr_cxsf, r1			@ save in spsr_svc
-	ldmdb	sp, {r0 - lr}^			@ Get calling r0 - lr
-	mov	r0, r0
-	add	sp, sp, #S_FRAME_SIZE - S_PC
-	movs	pc, lr				@ return & move spsr_svc into cpsr
-	.endm
-
-/*
- * Must be called with IRQs already disabled.
- */
-	.macro	fast_restore_user_regs
-	ldr	r1, [sp, #S_OFF + S_PSR]	@ get calling cpsr
-	ldr	lr, [sp, #S_OFF + S_PC]!	@ get pc
-	msr	spsr_cxsf, r1			@ save in spsr_svc
-	ldmdb	sp, {r1 - lr}^			@ get calling r1 - lr
-	mov	r0, r0
-	add	sp, sp, #S_FRAME_SIZE - S_PC
-	movs	pc, lr				@ return & move spsr_svc into cpsr
-	.endm
-
-/*
- * Must be called with IRQs already disabled.
- */
-	.macro	slow_restore_user_regs
-	ldr	r1, [sp, #S_PSR]		@ get calling cpsr
-	ldr	lr, [sp, #S_PC]!		@ get pc
-	msr	spsr_cxsf, r1			@ save in spsr_svc
-	ldmdb	sp, {r0 - lr}^			@ get calling r1 - lr
-	mov	r0, r0
-	add	sp, sp, #S_FRAME_SIZE - S_PC
-	movs	pc, lr				@ return & move spsr_svc into cpsr
-	.endm
-
-	.macro	mask_pc, rd, rm
-	.endm
-
 	.macro	get_thread_info, rd
 	mov	\rd, sp, lsr #13
 	mov	\rd, \rd, lsl #13
@@ -165,18 +83,3 @@ scno	.req	r7		@ syscall number
 tbl	.req	r8		@ syscall table pointer
 why	.req	r8		@ Linux syscall (!= 0)
 tsk	.req	r9		@ current thread_info
-
-/*
- * Get the system call number.
- */
-	.macro	get_scno
-#ifdef CONFIG_ARM_THUMB
-	tst	r8, #PSR_T_BIT		@ this is SPSR from save_user_regs
-	addne	scno, r7, #OS_NUMBER << 20 @ put OS number in
-	ldreq	scno, [lr, #-4]
-
-#else
-	mask_pc	lr, lr
-	ldr	scno, [lr, #-4]		@ get SWI instruction
-#endif
-	.endm
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 6e31718f60085f097bbcd14759b9bf9d1ef5df11..0078aeb85737197a84af1eeb0353dbef74427901 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -578,9 +578,16 @@ EXPORT_SYMBOL(abort);
 
 void __init trap_init(void)
 {
-	extern void __trap_init(void);
+	extern char __stubs_start[], __stubs_end[];
+	extern char __vectors_start[], __vectors_end[];
 
-	__trap_init();
+	/*
+	 * Copy the vectors and stubs (in entry-armv.S) into the
+	 * vector page, mapped at 0xffff0000, and ensure these are
+	 * visible to the instruction stream.
+	 */
+	memcpy((void *)0xffff0000, __vectors_start, __vectors_end - __vectors_start);
+	memcpy((void *)0xffff0200, __stubs_start, __stubs_end - __stubs_start);
 	flush_icache_range(0xffff0000, 0xffff0000 + PAGE_SIZE);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
 }