[klibc] mips/mips64: simplify crt0 code

Various simplifications and adjustments to the MIPS crt0 files:
- Use NESTED(__start, 0, ra) - this has no effect on the code, but is
  arguably more correct since we have no real stack frame.
- Don't allocate extra stack space. We need none on 64-bit, and only 16
  bytes on 32-bit.
- Align the stack pointer in the (unlikely) event it is misaligned.
- Don't load the gp register - it is useless in non-PIC code.
- Use jal in 64-bit code.
- Crash if __libc_start returns (teq should causes a trap exception).

Signed-off-by: James Cowgill <james.cowgill@mips.com>
Link: https://www.zytor.com/pipermail/klibc/2018-March/003982.html
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
diff --git a/usr/klibc/arch/mips/crt0.S b/usr/klibc/arch/mips/crt0.S
index 142d9f2..47d7d8f 100644
--- a/usr/klibc/arch/mips/crt0.S
+++ b/usr/klibc/arch/mips/crt0.S
@@ -10,16 +10,13 @@
 
 #include <machine/asm.h>
 
-NESTED(__start, 32, sp)
-	subu	sp, 32
-	sw	zero, 16(sp)
-
-	lui	gp, %hi(_gp)		# Initialize gp
-	addiu	gp, gp, _gp
-
-	addiu	a0, sp, 32		# Pointer to ELF entry structure
+NESTED(__start, 0, ra)
+	move	a0, sp			# Pointer to ELF entry structure
 	move	a1, v0			# Kernel-provided atexit() pointer
+	and	sp, -8			# Align stack to 8 bytes
+	subu	sp, 16			# Allocate 16 bytes for function call
 
 	jal	__libc_init
+	teq	zero, zero		# Crash if we return
 
 	END(__start)
diff --git a/usr/klibc/arch/mips64/crt0.S b/usr/klibc/arch/mips64/crt0.S
index 775a919..3f1c2a9 100644
--- a/usr/klibc/arch/mips64/crt0.S
+++ b/usr/klibc/arch/mips64/crt0.S
@@ -10,22 +10,12 @@
 
 #include <machine/asm.h>
 
-NESTED(__start, 64, sp)
-	daddiu  sp,sp,-64
-	sd	zero, 32(sp)
-
-					# Initialize gp
-	lui gp,%highest(_gp) 		# load highest "halfword"
-	daddiu gp,gp,%higher(_gp) 	# merge next "halfword"
-	dsll gp,gp,16 			# shift by one halfword
-	daddiu gp,gp,%hi(_gp) 		# merge next "halfword"
-	dsll gp,gp,16 			# shift into final position
-	daddiu gp,gp,%lo(_gp) 		# merge lowest "halfword"
-
-	daddiu	a0, sp, 64		# Pointer to ELF entry structure
+NESTED(__start, 0, ra)
+	move	a0, sp			# Pointer to ELF entry structure
 	move	a1, v0			# Kernel-provided atexit() pointer
+	and	sp, -16			# Align stack to 16 bytes
 
-	ld  t9, %call16(__libc_init)(gp)
-	jalr t9
+	jal	__libc_init
+	teq	zero, zero		# Crash if we return
 
 	END(__start)