kernel/FPU-emu/reg_round.S - pub/scm/linux/kernel/git/nico/archive - Git at Google

 	.file "reg_round.S"
 /*---------------------------------------------------------------------------+
  |  reg_round.S                                                              |
  |                                                                           |
  | Rounding/truncation/etc for FPU basic arithmetic functions.               |
  |                                                                           |
  | Copyright (C) 1993                                                        |
  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  |                       Australia.  E-mail apm233m@vaxc.cc.monash.edu.au    |
  |                                                                           |
  | This code has four possible entry points.                                 |
  | The following must be entered by a jmp intruction:                        |
  |   FPU_round, FPU_round_sqrt, and FPU_Arith_exit.                          |
  |                                                                           |
  | The _round_reg entry point is intended to be used by C code.              |
  | From C, call as:                                                          |
  | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
  |                                                                           |
  +---------------------------------------------------------------------------*/

 /*---------------------------------------------------------------------------+
  | Four entry points.                                                        |
  |                                                                           |
  | Needed by both the FPU_round and FPU_round_sqrt entry points:             |
  |  %eax:%ebx  64 bit significand                                            |
  |  %edx       32 bit extension of the significand                           |
  |  %edi       pointer to an FPU_REG for the result to be stored             |
  |  stack      calling function must have set up a C stack frame and         |
  |             pushed %esi, %edi, and %ebx                                   |
  |                                                                           |
  | Needed just for the FPU_round_sqrt entry point:                           |
  |  %cx  A control word in the same format as the FPU control word.          |
  | Otherwise, PARAM4 must give such a value.                                 |
  |                                                                           |
  |                                                                           |
  | The significand and its extension are assumed to be exact in the          |
  | following sense:                                                          |
  |   If the significand by itself is the exact result then the significand   |
  |   extension (%edx) must contain 0, otherwise the significand extension    |
  |   must be non-zero.                                                       |
  |   If the significand extension is non-zero then the significand is        |
  |   smaller than the magnitude of the correct exact result by an amount     |
  |   greater than zero and less than one ls bit of the significand.          |
  |   The significand extension is only required to have three possible       |
  |   non-zero values:                                                        |
  |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
  |                                 bit smaller than the magnitude of the     |
  |                                 true exact result.                        |
  |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
  |                                 smaller than the magnitude of the true    |
  |                                 exact result.                             |
  |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
  |                                 bit smaller than the magnitude of the     |
  |                                 true exact result.                        |
  |                                                                           |
  +---------------------------------------------------------------------------*/

 /*---------------------------------------------------------------------------+
  |  The code in this module has become quite complex, but it should handle   |
  |  all of the FPU flags which are set at this stage of the basic arithmetic |
  |  computations.                                                            |
  |  There are a few rare cases where the results are not set identically to  |
  |  a real FPU. These require a bit more thought because at this stage the   |
  |  results of the code here appear to be more consistent...                 |
  |  This may be changed in a future version.                                 |
  +---------------------------------------------------------------------------*/


 #include "fpu_asm.h"
 #include "exception.h"
 #include "control_w.h"

 #define	LOST_DOWN	$1
 #define	LOST_UP		$2
 #define	DENORMAL	$1
 #define	UNMASKED_UNDERFLOW $2

 .data
 	.align 2,0
 FPU_bits_lost:
 	.byte	0
 FPU_denormal:
 	.byte	0

 .text
 	.align 2,144
 .globl FPU_round
 .globl FPU_round_sqrt
 .globl FPU_Arith_exit
 .globl _round_reg

 // Entry point when called from C
 _round_reg:
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
 	pushl	%edi
 	pushl	%ebx

 	movl	PARAM1,%edi
 	movl	SIGH(%edi),%eax
 	movl	SIGL(%edi),%ebx
 	movl	PARAM2,%edx
 	movl	PARAM3,%ecx
 	jmp	FPU_round_sqrt

 FPU_round:		// Normal entry point
 	movl	PARAM4,%ecx

 FPU_round_sqrt:		// Entry point from wm_sqrt.S

 #ifdef PARANOID
 // Cannot use this here yet
 //	orl	%eax,%eax
 //	jns	L_entry_bugged
 #endif PARANOID

 	cmpl	EXP_UNDER,EXP(%edi)
 	jle	xMake_denorm			// The number is a de-normal

 	movb	$0,FPU_denormal			// 0 -> not a de-normal

 xDenorm_done:
 	movb	$0,FPU_bits_lost		// No bits yet lost in rounding

 	movl	%ecx,%esi
 	andl	CW_PC,%ecx
 	cmpl	PR_64_BITS,%ecx
 	je	LRound_To_64

 	cmpl	PR_53_BITS,%ecx
 	je	LRound_To_53

 	cmpl	PR_24_BITS,%ecx
 	je	LRound_To_24

 #ifdef PARANOID
 	jmp	L_bugged	// There is no bug, just a bad control word
 #endif PARANOID


 // Round etc to 24 bit precision
 LRound_To_24:
 	movl	%esi,%ecx
 	andl	CW_RC,%ecx
 	cmpl	RC_RND,%ecx
 	je	LRound_nearest_24

 	cmpl	RC_CHOP,%ecx
 	je	LCheck_truncate_24

 	cmpl	RC_UP,%ecx		// Towards +infinity
 	je	LUp_24

 	cmpl	RC_DOWN,%ecx		// Towards -infinity
 	je	LDown_24

 #ifdef PARANOID
 	jmp	L_bugged
 #endif PARANOID

 LUp_24:
 	cmpb	SIGN_POS,SIGN(%edi)
 	jne	LCheck_truncate_24	// If negative then  up==truncate

 	jmp	LCheck_24_round_up

 LDown_24:
 	cmpb	SIGN_POS,SIGN(%edi)
 	je	LCheck_truncate_24	// If positive then  down==truncate

 LCheck_24_round_up:
 	movl	%eax,%ecx
 	andl	$0x000000ff,%ecx
 	orl	%ebx,%ecx
 	orl	%edx,%ecx
 	jnz	LDo_24_round_up
 	jmp	LRe_normalise

 LRound_nearest_24:
 	// Do rounding of the 24th bit if needed (nearest or even)
 	movl	%eax,%ecx
 	andl	$0x000000ff,%ecx
 	cmpl	$0x00000080,%ecx
 	jc	LCheck_truncate_24	// less than half, no increment needed

 	jne	LGreater_Half_24	// greater than half, increment needed

 	// Possibly half, we need to check the ls bits
 	orl	%ebx,%ebx
 	jnz	LGreater_Half_24	// greater than half, increment needed

 	orl	%edx,%edx
 	jnz	LGreater_Half_24	// greater than half, increment needed

 	// Exactly half, increment only if 24th bit is 1 (round to even)
 	testl	$0x00000100,%eax
 	jz	LDo_truncate_24

 LGreater_Half_24:			// Rounding: increment at the 24th bit
 LDo_24_round_up:
 	andl	$0xffffff00,%eax	// Truncate to 24 bits
 	xorl	%ebx,%ebx
 	movb	LOST_UP,FPU_bits_lost
 	addl	$0x00000100,%eax
 	jmp	LCheck_Round_Overflow

 LCheck_truncate_24:
 	movl	%eax,%ecx
 	andl	$0x000000ff,%ecx
 	orl	%ebx,%ecx
 	orl	%edx,%ecx
 	jz	LRe_normalise			// No truncation needed

 LDo_truncate_24:
 	andl	$0xffffff00,%eax	// Truncate to 24 bits
 	xorl	%ebx,%ebx
 	movb	LOST_DOWN,FPU_bits_lost
 	jmp	LRe_normalise


 // Round etc to 53 bit precision
 LRound_To_53:
 	movl	%esi,%ecx
 	andl	CW_RC,%ecx
 	cmpl	RC_RND,%ecx
 	je	LRound_nearest_53

 	cmpl	RC_CHOP,%ecx
 	je	LCheck_truncate_53

 	cmpl	RC_UP,%ecx		// Towards +infinity
 	je	LUp_53

 	cmpl	RC_DOWN,%ecx		// Towards -infinity
 	je	LDown_53

 #ifdef PARANOID
 	jmp	L_bugged
 #endif PARANOID

 LUp_53:
 	cmpb	SIGN_POS,SIGN(%edi)
 	jne	LCheck_truncate_53	// If negative then  up==truncate

 	jmp	LCheck_53_round_up

 LDown_53:
 	cmpb	SIGN_POS,SIGN(%edi)
 	je	LCheck_truncate_53	// If positive then  down==truncate

 LCheck_53_round_up:
 	movl	%ebx,%ecx
 	andl	$0x000007ff,%ecx
 	orl	%edx,%ecx
 	jnz	LDo_53_round_up
 	jmp	LRe_normalise

 LRound_nearest_53:
 	// Do rounding of the 53rd bit if needed (nearest or even)
 	movl	%ebx,%ecx
 	andl	$0x000007ff,%ecx
 	cmpl	$0x00000400,%ecx
 	jc	LCheck_truncate_53	// less than half, no increment needed

 	jnz	LGreater_Half_53	// greater than half, increment needed

 	// Possibly half, we need to check the ls bits
 	orl	%edx,%edx
 	jnz	LGreater_Half_53	// greater than half, increment needed

 	// Exactly half, increment only if 53rd bit is 1 (round to even)
 	testl	$0x00000800,%ebx
 	jz	LTruncate_53

 LGreater_Half_53:			// Rounding: increment at the 53rd bit
 LDo_53_round_up:
 	movb	LOST_UP,FPU_bits_lost
 	andl	$0xfffff800,%ebx	// Truncate to 53 bits
 	addl	$0x00000800,%ebx
 	adcl	$0,%eax
 	jmp	LCheck_Round_Overflow

 LCheck_truncate_53:
 	movl	%ebx,%ecx
 	andl	$0x000007ff,%ecx
 	orl	%edx,%ecx
 	jz	LRe_normalise

 LTruncate_53:
 	movb	LOST_DOWN,FPU_bits_lost
 	andl	$0xfffff800,%ebx	// Truncate to 53 bits
 	jmp	LRe_normalise


 // Round etc to 64 bit precision
 LRound_To_64:
 	movl	%esi,%ecx
 	andl	CW_RC,%ecx
 	cmpl	RC_RND,%ecx
 	je	LRound_nearest_64

 	cmpl	RC_CHOP,%ecx
 	je	LCheck_truncate_64

 	cmpl	RC_UP,%ecx		// Towards +infinity
 	je	LUp_64

 	cmpl	RC_DOWN,%ecx		// Towards -infinity
 	je	LDown_64

 #ifdef PARANOID
 	jmp	L_bugged
 #endif PARANOID

 LUp_64:
 	cmpb	SIGN_POS,SIGN(%edi)
 	jne	LCheck_truncate_64	// If negative then  up==truncate

 	orl	%edx,%edx
 	jnz	LDo_64_round_up
 	jmp	LRe_normalise

 LDown_64:
 	cmpb	SIGN_POS,SIGN(%edi)
 	je	LCheck_truncate_64	// If positive then  down==truncate

 	orl	%edx,%edx
 	jnz	LDo_64_round_up
 	jmp	LRe_normalise

 LRound_nearest_64:
 	cmpl	$0x80000000,%edx
 	jc	LCheck_truncate_64

 	jne	LDo_64_round_up

 	/* Now test for round-to-even */
 	testb	$1,%ebx
 	jz	LCheck_truncate_64

 LDo_64_round_up:
 	movb	LOST_UP,FPU_bits_lost
 	addl	$1,%ebx
 	adcl	$0,%eax

 LCheck_Round_Overflow:
 	jnc	LRe_normalise		/* Rounding done, no overflow */

 	/* Overflow, adjust the result (to 1.0) */
 	rcrl	$1,%eax
 	rcrl	$1,%ebx
 	incl	EXP(%edi)
 	jmp	LRe_normalise

 LCheck_truncate_64:
 	orl	%edx,%edx
 	jz	LRe_normalise

 LTruncate_64:
 	movb	LOST_DOWN,FPU_bits_lost

 LRe_normalise:
 	testb	$0xff,FPU_denormal
 	jnz	xNormalise_result

 xL_Normalised:
 	cmpb	LOST_UP,FPU_bits_lost
 	je	xL_precision_lost_up

 	cmpb	LOST_DOWN,FPU_bits_lost
 	je	xL_precision_lost_down

 xL_no_precision_loss:
 	cmpl	EXP_OVER,EXP(%edi)
 	jge	L_overflow

 	/* store the result */
 	movb	TW_Valid,TAG(%edi)

 xL_Store_significand:
 	movl	%eax,SIGH(%edi)
 	movl	%ebx,SIGL(%edi)

 FPU_Arith_exit:
 	popl	%ebx
 	popl	%edi
 	popl	%esi
 	leave
 	ret


 // Set the FPU status flags to represent precision loss due to
 // round-up.
 xL_precision_lost_up:
 	push	%eax
 	call	_set_precision_flag_up
 	popl	%eax
 	jmp	xL_no_precision_loss

 // Set the FPU status flags to represent precision loss due to
 // truncation.
 xL_precision_lost_down:
 	push	%eax
 	call	_set_precision_flag_down
 	popl	%eax
 	jmp	xL_no_precision_loss


 // The number is a denormal (which might get rounded up to a normal)
 // Shift the number right the required number of bits, which will
 // have to be undone later...
 xMake_denorm:
 	// The action to be taken depends upon whether the underflow
 	// exception is masked
 	testb	CW_Underflow,%cl		// Underflow mask.
 	jz	xUnmasked_underflow		// Do not make a denormal.

 	movb	DENORMAL,FPU_denormal

 	pushl	%ecx		// Save
 	movl	EXP(%edi),%ecx
 	subl	EXP_UNDER+1,%ecx
 	negl	%ecx

 	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */
 	jnc	xDenorm_shift_more_than_63

 	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
 	jnc	xDenorm_shift_more_than_32

 // We got here without jumps by assuming that the most common requirement
 //   is for a small de-normalising shift.
 // Shift by [1..31] bits
 	addl	%ecx,EXP(%edi)
 	orl	%edx,%edx	// extension
 	setne	%ch
 	xorl	%edx,%edx
 	shrd	%cl,%ebx,%edx
 	shrd	%cl,%eax,%ebx
 	shr	%cl,%eax
 	orb	%ch,%dl
 	popl	%ecx
 	jmp	xDenorm_done

 // Shift by [32..63] bits
 xDenorm_shift_more_than_32:
 	addl	%ecx,EXP(%edi)
 	subb	$32,%cl
 	orl	%edx,%edx
 	setne	%ch
 	orb	%ch,%bl
 	xorl	%edx,%edx
 	shrd	%cl,%ebx,%edx
 	shrd	%cl,%eax,%ebx
 	shr	%cl,%eax
 	orl	%edx,%edx		// test these 32 bits
 	setne	%cl
 	orb	%ch,%bl
 	orb	%cl,%bl
 	movl	%ebx,%edx
 	movl	%eax,%ebx
 	xorl	%eax,%eax
 	popl	%ecx
 	jmp	xDenorm_done

 // Shift by [64..) bits
 xDenorm_shift_more_than_63:
 	cmpl	$64,%ecx
 	jne	xDenorm_shift_more_than_64

 // Exactly 64 bit shift
 	addl	%ecx,EXP(%edi)
 	xorl	%ecx,%ecx
 	orl	%edx,%edx
 	setne	%cl
 	orl	%ebx,%ebx
 	setne	%ch
 	orb	%ch,%cl
 	orb	%cl,%al
 	movl	%eax,%edx
 	xorl	%eax,%eax
 	xorl	%ebx,%ebx
 	popl	%ecx
 	jmp	xDenorm_done

 xDenorm_shift_more_than_64:
 	movl	EXP_UNDER+1,EXP(%edi)
 // This is easy, %eax must be non-zero, so..
 	movl	$1,%edx
 	xorl	%eax,%eax
 	xorl	%ebx,%ebx
 	popl	%ecx
 	jmp	xDenorm_done


 xUnmasked_underflow:
 	// Increase the exponent by the magic number
 	addl	$(3*(1<<13)),EXP(%edi)
 	movb	UNMASKED_UNDERFLOW,FPU_denormal
 	jmp	xDenorm_done


 // Undo the de-normalisation.
 xNormalise_result:
 	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
 	je	xSignal_underflow

 // The number must be a denormal if we got here.
 #ifdef PARANOID
 	// But check it... just in case.
 	cmpl	EXP_UNDER+1,EXP(%edi)
 	jne	L_norm_bugged
 #endif PARANOID

 	orl	%eax,%eax	// ms bits
 	jnz	LNormalise_shift_up_to_31	// Shift left 0 - 31 bits

 	orl	%ebx,%ebx
 	jz	L_underflow_to_zero	// The contents are zero

 // Shift left 32 - 63 bits
 	movl	%ebx,%eax
 	xorl	%ebx,%ebx
 	subl	$32,EXP(%edi)

 LNormalise_shift_up_to_31:
 	bsrl	%eax,%ecx	/* get the required shift in %ecx */
 	subl	$31,%ecx
 	negl	%ecx
 	shld	%cl,%ebx,%eax
 	shl	%cl,%ebx
 	subl	%ecx,EXP(%edi)

 LNormalise_shift_done:
 	testb	$0xff,FPU_bits_lost	// bits lost == underflow
 	jz	xL_Normalised

 	// There must be a masked underflow
 	push	%eax
 	pushl	EX_Underflow
 	call	_exception
 	popl	%eax
 	popl	%eax
 	jmp	xL_Normalised


 // The operations resulted in a number too small to represent.
 // Masked response.
 L_underflow_to_zero:
 	push	%eax
 	call	_set_precision_flag_down
 	popl	%eax

 	push	%eax
 	pushl	EX_Underflow
 	call	_exception
 	popl	%eax
 	popl	%eax

 	movb	TW_Zero,TAG(%edi)
 	jmp	xL_Store_significand


 // The operations resulted in a number too large to represent.
 L_overflow:
 	push	%edi
 	call	_arith_overflow
 	pop	%edi
 	jmp	FPU_Arith_exit


 xSignal_underflow:
 	push	%eax
 	pushl	EX_Underflow
 	call	EXCEPTION
 	popl	%eax
 	popl	%eax
 	jmp	xL_Normalised


 #ifdef PARANOID
 /* If we ever get here then we have problems! */
 L_bugged:
 	pushl	EX_INTERNAL|0x201
 	call	EXCEPTION
 	popl	%ebx
 	jmp	FPU_Arith_exit

 L_norm_bugged:
 	pushl	EX_INTERNAL|0x216
 	call	EXCEPTION
 	popl	%ebx
 	jmp	FPU_Arith_exit

 L_entry_bugged:
 	pushl	EX_INTERNAL|0x217
 	call	EXCEPTION
 	popl	%ebx
 	jmp	FPU_Arith_exit
 #endif PARANOID
	.file "reg_round.S"
	/*---------------------------------------------------------------------------+
	\| reg_round.S \|
	\| \|
	\| Rounding/truncation/etc for FPU basic arithmetic functions. \|
	\| \|
	\| Copyright (C) 1993 \|
	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
	\| Australia. E-mail apm233m@vaxc.cc.monash.edu.au \|
	\| \|
	\| This code has four possible entry points. \|
	\| The following must be entered by a jmp intruction: \|
	\| FPU_round, FPU_round_sqrt, and FPU_Arith_exit. \|
	\| \|
	\| The _round_reg entry point is intended to be used by C code. \|
	\| From C, call as: \|
	\| void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) \|
	\| \|
	+---------------------------------------------------------------------------*/

	/*---------------------------------------------------------------------------+
	\| Four entry points. \|
	\| \|
	\| Needed by both the FPU_round and FPU_round_sqrt entry points: \|
	\| %eax:%ebx 64 bit significand \|
	\| %edx 32 bit extension of the significand \|
	\| %edi pointer to an FPU_REG for the result to be stored \|
	\| stack calling function must have set up a C stack frame and \|
	\| pushed %esi, %edi, and %ebx \|
	\| \|
	\| Needed just for the FPU_round_sqrt entry point: \|
	\| %cx A control word in the same format as the FPU control word. \|
	\| Otherwise, PARAM4 must give such a value. \|
	\| \|
	\| \|
	\| The significand and its extension are assumed to be exact in the \|
	\| following sense: \|
	\| If the significand by itself is the exact result then the significand \|
	\| extension (%edx) must contain 0, otherwise the significand extension \|
	\| must be non-zero. \|
	\| If the significand extension is non-zero then the significand is \|
	\| smaller than the magnitude of the correct exact result by an amount \|
	\| greater than zero and less than one ls bit of the significand. \|
	\| The significand extension is only required to have three possible \|
	\| non-zero values: \|
	\| less than 0x80000000 <=> the significand is less than 1/2 an ls \|
	\| bit smaller than the magnitude of the \|
	\| true exact result. \|
	\| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit \|
	\| smaller than the magnitude of the true \|
	\| exact result. \|
	\| greater than 0x80000000 <=> the significand is more than 1/2 an ls \|
	\| bit smaller than the magnitude of the \|
	\| true exact result. \|
	\| \|
	+---------------------------------------------------------------------------*/

	/*---------------------------------------------------------------------------+
	\| The code in this module has become quite complex, but it should handle \|
	\| all of the FPU flags which are set at this stage of the basic arithmetic \|
	\| computations. \|
	\| There are a few rare cases where the results are not set identically to \|
	\| a real FPU. These require a bit more thought because at this stage the \|
	\| results of the code here appear to be more consistent... \|
	\| This may be changed in a future version. \|
	+---------------------------------------------------------------------------*/


	#include "fpu_asm.h"
	#include "exception.h"
	#include "control_w.h"

	#define LOST_DOWN $1
	#define LOST_UP $2
	#define DENORMAL $1
	#define UNMASKED_UNDERFLOW $2

	.data
	.align 2,0
	FPU_bits_lost:
	.byte 0
	FPU_denormal:
	.byte 0

	.text
	.align 2,144
	.globl FPU_round
	.globl FPU_round_sqrt
	.globl FPU_Arith_exit
	.globl _round_reg

	// Entry point when called from C
	_round_reg:
	pushl %ebp
	movl %esp,%ebp
	pushl %esi
	pushl %edi
	pushl %ebx

	movl PARAM1,%edi
	movl SIGH(%edi),%eax
	movl SIGL(%edi),%ebx
	movl PARAM2,%edx
	movl PARAM3,%ecx
	jmp FPU_round_sqrt

	FPU_round: // Normal entry point
	movl PARAM4,%ecx

	FPU_round_sqrt: // Entry point from wm_sqrt.S

	#ifdef PARANOID
	// Cannot use this here yet
	// orl %eax,%eax
	// jns L_entry_bugged
	#endif PARANOID

	cmpl EXP_UNDER,EXP(%edi)
	jle xMake_denorm // The number is a de-normal

	movb $0,FPU_denormal // 0 -> not a de-normal

	xDenorm_done:
	movb $0,FPU_bits_lost // No bits yet lost in rounding

	movl %ecx,%esi
	andl CW_PC,%ecx
	cmpl PR_64_BITS,%ecx
	je LRound_To_64

	cmpl PR_53_BITS,%ecx
	je LRound_To_53

	cmpl PR_24_BITS,%ecx
	je LRound_To_24

	#ifdef PARANOID
	jmp L_bugged // There is no bug, just a bad control word
	#endif PARANOID


	// Round etc to 24 bit precision
	LRound_To_24:
	movl %esi,%ecx
	andl CW_RC,%ecx
	cmpl RC_RND,%ecx
	je LRound_nearest_24

	cmpl RC_CHOP,%ecx
	je LCheck_truncate_24

	cmpl RC_UP,%ecx // Towards +infinity
	je LUp_24

	cmpl RC_DOWN,%ecx // Towards -infinity
	je LDown_24

	#ifdef PARANOID
	jmp L_bugged
	#endif PARANOID

	LUp_24:
	cmpb SIGN_POS,SIGN(%edi)
	jne LCheck_truncate_24 // If negative then up==truncate

	jmp LCheck_24_round_up

	LDown_24:
	cmpb SIGN_POS,SIGN(%edi)
	je LCheck_truncate_24 // If positive then down==truncate

	LCheck_24_round_up:
	movl %eax,%ecx
	andl $0x000000ff,%ecx
	orl %ebx,%ecx
	orl %edx,%ecx
	jnz LDo_24_round_up
	jmp LRe_normalise

	LRound_nearest_24:
	// Do rounding of the 24th bit if needed (nearest or even)
	movl %eax,%ecx
	andl $0x000000ff,%ecx
	cmpl $0x00000080,%ecx
	jc LCheck_truncate_24 // less than half, no increment needed

	jne LGreater_Half_24 // greater than half, increment needed

	// Possibly half, we need to check the ls bits
	orl %ebx,%ebx
	jnz LGreater_Half_24 // greater than half, increment needed

	orl %edx,%edx
	jnz LGreater_Half_24 // greater than half, increment needed

	// Exactly half, increment only if 24th bit is 1 (round to even)
	testl $0x00000100,%eax
	jz LDo_truncate_24

	LGreater_Half_24: // Rounding: increment at the 24th bit
	LDo_24_round_up:
	andl $0xffffff00,%eax // Truncate to 24 bits
	xorl %ebx,%ebx
	movb LOST_UP,FPU_bits_lost
	addl $0x00000100,%eax
	jmp LCheck_Round_Overflow

	LCheck_truncate_24:
	movl %eax,%ecx
	andl $0x000000ff,%ecx
	orl %ebx,%ecx
	orl %edx,%ecx
	jz LRe_normalise // No truncation needed

	LDo_truncate_24:
	andl $0xffffff00,%eax // Truncate to 24 bits
	xorl %ebx,%ebx
	movb LOST_DOWN,FPU_bits_lost
	jmp LRe_normalise


	// Round etc to 53 bit precision
	LRound_To_53:
	movl %esi,%ecx
	andl CW_RC,%ecx
	cmpl RC_RND,%ecx
	je LRound_nearest_53

	cmpl RC_CHOP,%ecx
	je LCheck_truncate_53

	cmpl RC_UP,%ecx // Towards +infinity
	je LUp_53

	cmpl RC_DOWN,%ecx // Towards -infinity
	je LDown_53

	#ifdef PARANOID
	jmp L_bugged
	#endif PARANOID

	LUp_53:
	cmpb SIGN_POS,SIGN(%edi)
	jne LCheck_truncate_53 // If negative then up==truncate

	jmp LCheck_53_round_up

	LDown_53:
	cmpb SIGN_POS,SIGN(%edi)
	je LCheck_truncate_53 // If positive then down==truncate

	LCheck_53_round_up:
	movl %ebx,%ecx
	andl $0x000007ff,%ecx
	orl %edx,%ecx
	jnz LDo_53_round_up
	jmp LRe_normalise

	LRound_nearest_53:
	// Do rounding of the 53rd bit if needed (nearest or even)
	movl %ebx,%ecx
	andl $0x000007ff,%ecx
	cmpl $0x00000400,%ecx
	jc LCheck_truncate_53 // less than half, no increment needed

	jnz LGreater_Half_53 // greater than half, increment needed

	// Possibly half, we need to check the ls bits
	orl %edx,%edx
	jnz LGreater_Half_53 // greater than half, increment needed

	// Exactly half, increment only if 53rd bit is 1 (round to even)
	testl $0x00000800,%ebx
	jz LTruncate_53

	LGreater_Half_53: // Rounding: increment at the 53rd bit
	LDo_53_round_up:
	movb LOST_UP,FPU_bits_lost
	andl $0xfffff800,%ebx // Truncate to 53 bits
	addl $0x00000800,%ebx
	adcl $0,%eax
	jmp LCheck_Round_Overflow

	LCheck_truncate_53:
	movl %ebx,%ecx
	andl $0x000007ff,%ecx
	orl %edx,%ecx
	jz LRe_normalise

	LTruncate_53:
	movb LOST_DOWN,FPU_bits_lost
	andl $0xfffff800,%ebx // Truncate to 53 bits
	jmp LRe_normalise


	// Round etc to 64 bit precision
	LRound_To_64:
	movl %esi,%ecx
	andl CW_RC,%ecx
	cmpl RC_RND,%ecx
	je LRound_nearest_64

	cmpl RC_CHOP,%ecx
	je LCheck_truncate_64

	cmpl RC_UP,%ecx // Towards +infinity
	je LUp_64

	cmpl RC_DOWN,%ecx // Towards -infinity
	je LDown_64

	#ifdef PARANOID
	jmp L_bugged
	#endif PARANOID

	LUp_64:
	cmpb SIGN_POS,SIGN(%edi)
	jne LCheck_truncate_64 // If negative then up==truncate

	orl %edx,%edx
	jnz LDo_64_round_up
	jmp LRe_normalise

	LDown_64:
	cmpb SIGN_POS,SIGN(%edi)
	je LCheck_truncate_64 // If positive then down==truncate

	orl %edx,%edx
	jnz LDo_64_round_up
	jmp LRe_normalise

	LRound_nearest_64:
	cmpl $0x80000000,%edx
	jc LCheck_truncate_64

	jne LDo_64_round_up

	/* Now test for round-to-even */
	testb $1,%ebx
	jz LCheck_truncate_64

	LDo_64_round_up:
	movb LOST_UP,FPU_bits_lost
	addl $1,%ebx
	adcl $0,%eax

	LCheck_Round_Overflow:
	jnc LRe_normalise /* Rounding done, no overflow */

	/* Overflow, adjust the result (to 1.0) */
	rcrl $1,%eax
	rcrl $1,%ebx
	incl EXP(%edi)
	jmp LRe_normalise

	LCheck_truncate_64:
	orl %edx,%edx
	jz LRe_normalise

	LTruncate_64:
	movb LOST_DOWN,FPU_bits_lost

	LRe_normalise:
	testb $0xff,FPU_denormal
	jnz xNormalise_result

	xL_Normalised:
	cmpb LOST_UP,FPU_bits_lost
	je xL_precision_lost_up

	cmpb LOST_DOWN,FPU_bits_lost
	je xL_precision_lost_down

	xL_no_precision_loss:
	cmpl EXP_OVER,EXP(%edi)
	jge L_overflow

	/* store the result */
	movb TW_Valid,TAG(%edi)

	xL_Store_significand:
	movl %eax,SIGH(%edi)
	movl %ebx,SIGL(%edi)

	FPU_Arith_exit:
	popl %ebx
	popl %edi
	popl %esi
	leave
	ret


	// Set the FPU status flags to represent precision loss due to
	// round-up.
	xL_precision_lost_up:
	push %eax
	call _set_precision_flag_up
	popl %eax
	jmp xL_no_precision_loss

	// Set the FPU status flags to represent precision loss due to
	// truncation.
	xL_precision_lost_down:
	push %eax
	call _set_precision_flag_down
	popl %eax
	jmp xL_no_precision_loss


	// The number is a denormal (which might get rounded up to a normal)
	// Shift the number right the required number of bits, which will
	// have to be undone later...
	xMake_denorm:
	// The action to be taken depends upon whether the underflow
	// exception is masked
	testb CW_Underflow,%cl // Underflow mask.
	jz xUnmasked_underflow // Do not make a denormal.

	movb DENORMAL,FPU_denormal

	pushl %ecx // Save
	movl EXP(%edi),%ecx
	subl EXP_UNDER+1,%ecx
	negl %ecx

	cmpl $64,%ecx /* shrd only works for 0..31 bits */
	jnc xDenorm_shift_more_than_63

	cmpl $32,%ecx /* shrd only works for 0..31 bits */
	jnc xDenorm_shift_more_than_32

	// We got here without jumps by assuming that the most common requirement
	// is for a small de-normalising shift.
	// Shift by [1..31] bits
	addl %ecx,EXP(%edi)
	orl %edx,%edx // extension
	setne %ch
	xorl %edx,%edx
	shrd %cl,%ebx,%edx
	shrd %cl,%eax,%ebx
	shr %cl,%eax
	orb %ch,%dl
	popl %ecx
	jmp xDenorm_done

	// Shift by [32..63] bits
	xDenorm_shift_more_than_32:
	addl %ecx,EXP(%edi)
	subb $32,%cl
	orl %edx,%edx
	setne %ch
	orb %ch,%bl
	xorl %edx,%edx
	shrd %cl,%ebx,%edx
	shrd %cl,%eax,%ebx
	shr %cl,%eax
	orl %edx,%edx // test these 32 bits
	setne %cl
	orb %ch,%bl
	orb %cl,%bl
	movl %ebx,%edx
	movl %eax,%ebx
	xorl %eax,%eax
	popl %ecx
	jmp xDenorm_done

	// Shift by [64..) bits
	xDenorm_shift_more_than_63:
	cmpl $64,%ecx
	jne xDenorm_shift_more_than_64

	// Exactly 64 bit shift
	addl %ecx,EXP(%edi)
	xorl %ecx,%ecx
	orl %edx,%edx
	setne %cl
	orl %ebx,%ebx
	setne %ch
	orb %ch,%cl
	orb %cl,%al
	movl %eax,%edx
	xorl %eax,%eax
	xorl %ebx,%ebx
	popl %ecx
	jmp xDenorm_done

	xDenorm_shift_more_than_64:
	movl EXP_UNDER+1,EXP(%edi)
	// This is easy, %eax must be non-zero, so..
	movl $1,%edx
	xorl %eax,%eax
	xorl %ebx,%ebx
	popl %ecx
	jmp xDenorm_done


	xUnmasked_underflow:
	// Increase the exponent by the magic number
	addl $(3*(1<<13)),EXP(%edi)
	movb UNMASKED_UNDERFLOW,FPU_denormal
	jmp xDenorm_done


	// Undo the de-normalisation.
	xNormalise_result:
	cmpb UNMASKED_UNDERFLOW,FPU_denormal
	je xSignal_underflow

	// The number must be a denormal if we got here.
	#ifdef PARANOID
	// But check it... just in case.
	cmpl EXP_UNDER+1,EXP(%edi)
	jne L_norm_bugged
	#endif PARANOID

	orl %eax,%eax // ms bits
	jnz LNormalise_shift_up_to_31 // Shift left 0 - 31 bits

	orl %ebx,%ebx
	jz L_underflow_to_zero // The contents are zero

	// Shift left 32 - 63 bits
	movl %ebx,%eax
	xorl %ebx,%ebx
	subl $32,EXP(%edi)

	LNormalise_shift_up_to_31:
	bsrl %eax,%ecx /* get the required shift in %ecx */
	subl $31,%ecx
	negl %ecx
	shld %cl,%ebx,%eax
	shl %cl,%ebx
	subl %ecx,EXP(%edi)

	LNormalise_shift_done:
	testb $0xff,FPU_bits_lost // bits lost == underflow
	jz xL_Normalised

	// There must be a masked underflow
	push %eax
	pushl EX_Underflow
	call _exception
	popl %eax
	popl %eax
	jmp xL_Normalised


	// The operations resulted in a number too small to represent.
	// Masked response.
	L_underflow_to_zero:
	push %eax
	call _set_precision_flag_down
	popl %eax

	push %eax
	pushl EX_Underflow
	call _exception
	popl %eax
	popl %eax

	movb TW_Zero,TAG(%edi)
	jmp xL_Store_significand


	// The operations resulted in a number too large to represent.
	L_overflow:
	push %edi
	call _arith_overflow
	pop %edi
	jmp FPU_Arith_exit


	xSignal_underflow:
	push %eax
	pushl EX_Underflow
	call EXCEPTION
	popl %eax
	popl %eax
	jmp xL_Normalised


	#ifdef PARANOID
	/* If we ever get here then we have problems! */
	L_bugged:
	pushl EX_INTERNAL\|0x201
	call EXCEPTION
	popl %ebx
	jmp FPU_Arith_exit

	L_norm_bugged:
	pushl EX_INTERNAL\|0x216
	call EXCEPTION
	popl %ebx
	jmp FPU_Arith_exit

	L_entry_bugged:
	pushl EX_INTERNAL\|0x217
	call EXCEPTION
	popl %ebx
	jmp FPU_Arith_exit
	#endif PARANOID