drivers/FPU-emu/reg_round.S - pub/scm/linux/kernel/git/nico/archive - Git at Google

 	.file "reg_round.S"
 /*---------------------------------------------------------------------------+
  |  reg_round.S                                                              |
  |                                                                           |
  | Rounding/truncation/etc for FPU basic arithmetic functions.               |
  |                                                                           |
  | Copyright (C) 1993                                                        |
  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
  |                                                                           |
  | This code has four possible entry points.                                 |
  | The following must be entered by a jmp intruction:                        |
  |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
  |                                                                           |
  | The _round_reg entry point is intended to be used by C code.              |
  | From C, call as:                                                          |
  | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
  |                                                                           |
  | For correct "up" and "down" rounding, the argument must have the correct  |
  | sign.                                                                     |
  |                                                                           |
  +---------------------------------------------------------------------------*/

 /*---------------------------------------------------------------------------+
  | Four entry points.                                                        |
  |                                                                           |
  | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
  |  %eax:%ebx  64 bit significand                                            |
  |  %edx       32 bit extension of the significand                           |
  |  %edi       pointer to an FPU_REG for the result to be stored             |
  |  stack      calling function must have set up a C stack frame and         |
  |             pushed %esi, %edi, and %ebx                                   |
  |                                                                           |
  | Needed just for the fpu_reg_round_sqrt entry point:                       |
  |  %cx  A control word in the same format as the FPU control word.          |
  | Otherwise, PARAM4 must give such a value.                                 |
  |                                                                           |
  |                                                                           |
  | The significand and its extension are assumed to be exact in the          |
  | following sense:                                                          |
  |   If the significand by itself is the exact result then the significand   |
  |   extension (%edx) must contain 0, otherwise the significand extension    |
  |   must be non-zero.                                                       |
  |   If the significand extension is non-zero then the significand is        |
  |   smaller than the magnitude of the correct exact result by an amount     |
  |   greater than zero and less than one ls bit of the significand.          |
  |   The significand extension is only required to have three possible       |
  |   non-zero values:                                                        |
  |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
  |                                 bit smaller than the magnitude of the     |
  |                                 true exact result.                        |
  |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
  |                                 smaller than the magnitude of the true    |
  |                                 exact result.                             |
  |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
  |                                 bit smaller than the magnitude of the     |
  |                                 true exact result.                        |
  |                                                                           |
  +---------------------------------------------------------------------------*/

 /*---------------------------------------------------------------------------+
  |  The code in this module has become quite complex, but it should handle   |
  |  all of the FPU flags which are set at this stage of the basic arithmetic |
  |  computations.                                                            |
  |  There are a few rare cases where the results are not set identically to  |
  |  a real FPU. These require a bit more thought because at this stage the   |
  |  results of the code here appear to be more consistent...                 |
  |  This may be changed in a future version.                                 |
  +---------------------------------------------------------------------------*/


 #include "fpu_asm.h"
 #include "exception.h"
 #include "control_w.h"

 /* Flags for FPU_bits_lost */
 #define	LOST_DOWN	$1
 #define	LOST_UP		$2

 /* Flags for FPU_denormal */
 #define	DENORMAL	$1
 #define	UNMASKED_UNDERFLOW $2


 #ifdef REENTRANT_FPU
 /*	Make the code re-entrant by putting
 	local storage on the stack: */
 #define FPU_bits_lost	(%esp)
 #define FPU_denormal	1(%esp)

 #else
 /*	Not re-entrant, so we can gain speed by putting
 	local storage in a static area: */
 .data
 	.align 2,0
 FPU_bits_lost:
 	.byte	0
 FPU_denormal:
 	.byte	0
 #endif REENTRANT_FPU


 .text
 	.align 2,144
 .globl fpu_reg_round
 .globl fpu_reg_round_sqrt
 .globl fpu_Arith_exit
 .globl _round_reg

 /* Entry point when called from C */
 _round_reg:
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
 	pushl	%edi
 	pushl	%ebx

 	movl	PARAM1,%edi
 	movl	SIGH(%edi),%eax
 	movl	SIGL(%edi),%ebx
 	movl	PARAM2,%edx
 	movl	PARAM3,%ecx
 	jmp	fpu_reg_round_sqrt

 fpu_reg_round:			/* Normal entry point */
 	movl	PARAM4,%ecx

 fpu_reg_round_sqrt:		/* Entry point from wm_sqrt.S */

 #ifdef REENTRANT_FPU
 	pushl	%ebx		/* adjust the stack pointer */
 #endif REENTRANT_FPU

 #ifdef PARANOID
 /* Cannot use this here yet */
 /*	orl	%eax,%eax */
 /*	jns	L_entry_bugged */
 #endif PARANOID

 	cmpl	EXP_UNDER,EXP(%edi)
 	jle	xMake_denorm			/* The number is a de-normal */

 	movb	$0,FPU_denormal			/* 0 -> not a de-normal */

 xDenorm_done:
 	movb	$0,FPU_bits_lost		/* No bits yet lost in rounding */

 	movl	%ecx,%esi
 	andl	CW_PC,%ecx
 	cmpl	PR_64_BITS,%ecx
 	je	LRound_To_64

 	cmpl	PR_53_BITS,%ecx
 	je	LRound_To_53

 	cmpl	PR_24_BITS,%ecx
 	je	LRound_To_24

 #ifdef PARANOID
 	jmp	L_bugged_denorm	/* There is no bug, just a bad control word */
 #endif PARANOID


 /* Round etc to 24 bit precision */
 LRound_To_24:
 	movl	%esi,%ecx
 	andl	CW_RC,%ecx
 	cmpl	RC_RND,%ecx
 	je	LRound_nearest_24

 	cmpl	RC_CHOP,%ecx
 	je	LCheck_truncate_24

 	cmpl	RC_UP,%ecx		/* Towards +infinity */
 	je	LUp_24

 	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
 	je	LDown_24

 #ifdef PARANOID
 	jmp	L_bugged_round24
 #endif PARANOID

 LUp_24:
 	cmpb	SIGN_POS,SIGN(%edi)
 	jne	LCheck_truncate_24	/* If negative then  up==truncate */

 	jmp	LCheck_24_round_up

 LDown_24:
 	cmpb	SIGN_POS,SIGN(%edi)
 	je	LCheck_truncate_24	/* If positive then  down==truncate */

 LCheck_24_round_up:
 	movl	%eax,%ecx
 	andl	$0x000000ff,%ecx
 	orl	%ebx,%ecx
 	orl	%edx,%ecx
 	jnz	LDo_24_round_up
 	jmp	LRe_normalise

 LRound_nearest_24:
 	/* Do rounding of the 24th bit if needed (nearest or even) */
 	movl	%eax,%ecx
 	andl	$0x000000ff,%ecx
 	cmpl	$0x00000080,%ecx
 	jc	LCheck_truncate_24	/* less than half, no increment needed */

 	jne	LGreater_Half_24	/* greater than half, increment needed */

 	/* Possibly half, we need to check the ls bits */
 	orl	%ebx,%ebx
 	jnz	LGreater_Half_24	/* greater than half, increment needed */

 	orl	%edx,%edx
 	jnz	LGreater_Half_24	/* greater than half, increment needed */

 	/* Exactly half, increment only if 24th bit is 1 (round to even) */
 	testl	$0x00000100,%eax
 	jz	LDo_truncate_24

 LGreater_Half_24:			/* Rounding: increment at the 24th bit */
 LDo_24_round_up:
 	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
 	xorl	%ebx,%ebx
 	movb	LOST_UP,FPU_bits_lost
 	addl	$0x00000100,%eax
 	jmp	LCheck_Round_Overflow

 LCheck_truncate_24:
 	movl	%eax,%ecx
 	andl	$0x000000ff,%ecx
 	orl	%ebx,%ecx
 	orl	%edx,%ecx
 	jz	LRe_normalise		/* No truncation needed */

 LDo_truncate_24:
 	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
 	xorl	%ebx,%ebx
 	movb	LOST_DOWN,FPU_bits_lost
 	jmp	LRe_normalise


 /* Round etc to 53 bit precision */
 LRound_To_53:
 	movl	%esi,%ecx
 	andl	CW_RC,%ecx
 	cmpl	RC_RND,%ecx
 	je	LRound_nearest_53

 	cmpl	RC_CHOP,%ecx
 	je	LCheck_truncate_53

 	cmpl	RC_UP,%ecx		/* Towards +infinity */
 	je	LUp_53

 	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
 	je	LDown_53

 #ifdef PARANOID
 	jmp	L_bugged_round53
 #endif PARANOID

 LUp_53:
 	cmpb	SIGN_POS,SIGN(%edi)
 	jne	LCheck_truncate_53	/* If negative then  up==truncate */

 	jmp	LCheck_53_round_up

 LDown_53:
 	cmpb	SIGN_POS,SIGN(%edi)
 	je	LCheck_truncate_53	/* If positive then  down==truncate */

 LCheck_53_round_up:
 	movl	%ebx,%ecx
 	andl	$0x000007ff,%ecx
 	orl	%edx,%ecx
 	jnz	LDo_53_round_up
 	jmp	LRe_normalise

 LRound_nearest_53:
 	/* Do rounding of the 53rd bit if needed (nearest or even) */
 	movl	%ebx,%ecx
 	andl	$0x000007ff,%ecx
 	cmpl	$0x00000400,%ecx
 	jc	LCheck_truncate_53	/* less than half, no increment needed */

 	jnz	LGreater_Half_53	/* greater than half, increment needed */

 	/* Possibly half, we need to check the ls bits */
 	orl	%edx,%edx
 	jnz	LGreater_Half_53	/* greater than half, increment needed */

 	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
 	testl	$0x00000800,%ebx
 	jz	LTruncate_53

 LGreater_Half_53:			/* Rounding: increment at the 53rd bit */
 LDo_53_round_up:
 	movb	LOST_UP,FPU_bits_lost
 	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
 	addl	$0x00000800,%ebx
 	adcl	$0,%eax
 	jmp	LCheck_Round_Overflow

 LCheck_truncate_53:
 	movl	%ebx,%ecx
 	andl	$0x000007ff,%ecx
 	orl	%edx,%ecx
 	jz	LRe_normalise

 LTruncate_53:
 	movb	LOST_DOWN,FPU_bits_lost
 	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
 	jmp	LRe_normalise


 /* Round etc to 64 bit precision */
 LRound_To_64:
 	movl	%esi,%ecx
 	andl	CW_RC,%ecx
 	cmpl	RC_RND,%ecx
 	je	LRound_nearest_64

 	cmpl	RC_CHOP,%ecx
 	je	LCheck_truncate_64

 	cmpl	RC_UP,%ecx		/* Towards +infinity */
 	je	LUp_64

 	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
 	je	LDown_64

 #ifdef PARANOID
 	jmp	L_bugged_round64
 #endif PARANOID

 LUp_64:
 	cmpb	SIGN_POS,SIGN(%edi)
 	jne	LCheck_truncate_64	/* If negative then  up==truncate */

 	orl	%edx,%edx
 	jnz	LDo_64_round_up
 	jmp	LRe_normalise

 LDown_64:
 	cmpb	SIGN_POS,SIGN(%edi)
 	je	LCheck_truncate_64	/* If positive then  down==truncate */

 	orl	%edx,%edx
 	jnz	LDo_64_round_up
 	jmp	LRe_normalise

 LRound_nearest_64:
 	cmpl	$0x80000000,%edx
 	jc	LCheck_truncate_64

 	jne	LDo_64_round_up

 	/* Now test for round-to-even */
 	testb	$1,%ebx
 	jz	LCheck_truncate_64

 LDo_64_round_up:
 	movb	LOST_UP,FPU_bits_lost
 	addl	$1,%ebx
 	adcl	$0,%eax

 LCheck_Round_Overflow:
 	jnc	LRe_normalise

 	/* Overflow, adjust the result (significand to 1.0) */
 	rcrl	$1,%eax
 	rcrl	$1,%ebx
 	incl	EXP(%edi)
 	jmp	LRe_normalise

 LCheck_truncate_64:
 	orl	%edx,%edx
 	jz	LRe_normalise

 LTruncate_64:
 	movb	LOST_DOWN,FPU_bits_lost

 LRe_normalise:
 	testb	$0xff,FPU_denormal
 	jnz	xNormalise_result

 xL_Normalised:
 	cmpb	LOST_UP,FPU_bits_lost
 	je	xL_precision_lost_up

 	cmpb	LOST_DOWN,FPU_bits_lost
 	je	xL_precision_lost_down

 xL_no_precision_loss:
 	/* store the result */
 	movb	TW_Valid,TAG(%edi)

 xL_Store_significand:
 	movl	%eax,SIGH(%edi)
 	movl	%ebx,SIGL(%edi)

 	xorl	%eax,%eax	/* No errors detected. */

 	cmpl	EXP_OVER,EXP(%edi)
 	jge	L_overflow

 fpu_reg_round_exit:
 #ifdef REENTRANT_FPU
 	popl	%ebx		/* adjust the stack pointer */
 #endif REENTRANT_FPU

 fpu_Arith_exit:
 	popl	%ebx
 	popl	%edi
 	popl	%esi
 	leave
 	ret


 /*
  * Set the FPU status flags to represent precision loss due to
  * round-up.
  */
 xL_precision_lost_up:
 	push	%eax
 	call	_set_precision_flag_up
 	popl	%eax
 	jmp	xL_no_precision_loss

 /*
  * Set the FPU status flags to represent precision loss due to
  * truncation.
  */
 xL_precision_lost_down:
 	push	%eax
 	call	_set_precision_flag_down
 	popl	%eax
 	jmp	xL_no_precision_loss


 /*
  * The number is a denormal (which might get rounded up to a normal)
  * Shift the number right the required number of bits, which will
  * have to be undone later...
  */
 xMake_denorm:
 	/* The action to be taken depends upon whether the underflow
 	   exception is masked */
 	testb	CW_Underflow,%cl		/* Underflow mask. */
 	jz	xUnmasked_underflow		/* Do not make a denormal. */

 	movb	DENORMAL,FPU_denormal

 	pushl	%ecx		/* Save */
 	movl	EXP_UNDER+1,%ecx
 	subl	EXP(%edi),%ecx

 	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */
 	jnc	xDenorm_shift_more_than_63

 	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
 	jnc	xDenorm_shift_more_than_32

 /*
  * We got here without jumps by assuming that the most common requirement
  *   is for a small de-normalising shift.
  * Shift by [1..31] bits
  */
 	addl	%ecx,EXP(%edi)
 	orl	%edx,%edx	/* extension */
 	setne	%ch		/* Save whether %edx is non-zero */
 	xorl	%edx,%edx
 	shrd	%cl,%ebx,%edx
 	shrd	%cl,%eax,%ebx
 	shr	%cl,%eax
 	orb	%ch,%dl
 	popl	%ecx
 	jmp	xDenorm_done

 /* Shift by [32..63] bits */
 xDenorm_shift_more_than_32:
 	addl	%ecx,EXP(%edi)
 	subb	$32,%cl
 	orl	%edx,%edx
 	setne	%ch
 	orb	%ch,%bl
 	xorl	%edx,%edx
 	shrd	%cl,%ebx,%edx
 	shrd	%cl,%eax,%ebx
 	shr	%cl,%eax
 	orl	%edx,%edx		/* test these 32 bits */
 	setne	%cl
 	orb	%ch,%bl
 	orb	%cl,%bl
 	movl	%ebx,%edx
 	movl	%eax,%ebx
 	xorl	%eax,%eax
 	popl	%ecx
 	jmp	xDenorm_done

 /* Shift by [64..) bits */
 xDenorm_shift_more_than_63:
 	cmpl	$64,%ecx
 	jne	xDenorm_shift_more_than_64

 /* Exactly 64 bit shift */
 	addl	%ecx,EXP(%edi)
 	xorl	%ecx,%ecx
 	orl	%edx,%edx
 	setne	%cl
 	orl	%ebx,%ebx
 	setne	%ch
 	orb	%ch,%cl
 	orb	%cl,%al
 	movl	%eax,%edx
 	xorl	%eax,%eax
 	xorl	%ebx,%ebx
 	popl	%ecx
 	jmp	xDenorm_done

 xDenorm_shift_more_than_64:
 	movl	EXP_UNDER+1,EXP(%edi)
 /* This is easy, %eax must be non-zero, so.. */
 	movl	$1,%edx
 	xorl	%eax,%eax
 	xorl	%ebx,%ebx
 	popl	%ecx
 	jmp	xDenorm_done


 xUnmasked_underflow:
 	movb	UNMASKED_UNDERFLOW,FPU_denormal
 	jmp	xDenorm_done


 /* Undo the de-normalisation. */
 xNormalise_result:
 	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
 	je	xSignal_underflow

 /* The number must be a denormal if we got here. */
 #ifdef PARANOID
 	/* But check it... just in case. */
 	cmpl	EXP_UNDER+1,EXP(%edi)
 	jne	L_norm_bugged
 #endif PARANOID

 #ifdef PECULIAR_486
 	/*
 	 * This implements a special feature of 80486 behaviour.
 	 * Underflow will be signalled even if the number is
 	 * not a denormal after rounding.
 	 * This difference occurs only for masked underflow, and not
 	 * in the unmasked case.
 	 * Actual 80486 behaviour differs from this in some circumstances.
 	 */
 	orl	%eax,%eax		/* ms bits */
 	js	LNormalise_shift_done	/* Will be masked underflow */
 #endif PECULIAR_486

 	orl	%eax,%eax		/* ms bits */
 	js	xL_Normalised		/* No longer a denormal */

 	jnz	LNormalise_shift_up_to_31	/* Shift left 0 - 31 bits */

 	orl	%ebx,%ebx
 	jz	L_underflow_to_zero	/* The contents are zero */

 /* Shift left 32 - 63 bits */
 	movl	%ebx,%eax
 	xorl	%ebx,%ebx
 	subl	$32,EXP(%edi)

 LNormalise_shift_up_to_31:
 	bsrl	%eax,%ecx	/* get the required shift in %ecx */
 	subl	$31,%ecx
 	negl	%ecx
 	shld	%cl,%ebx,%eax
 	shl	%cl,%ebx
 	subl	%ecx,EXP(%edi)

 LNormalise_shift_done:
 	testb	$0xff,FPU_bits_lost	/* bits lost == underflow */
 	jz	xL_Normalised

 	/* There must be a masked underflow */
 	push	%eax
 	pushl	EX_Underflow
 	call	_exception
 	popl	%eax
 	popl	%eax
 	jmp	xL_Normalised


 /*
  * The operations resulted in a number too small to represent.
  * Masked response.
  */
 L_underflow_to_zero:
 	push	%eax
 	call	_set_precision_flag_down
 	popl	%eax

 	push	%eax
 	pushl	EX_Underflow
 	call	_exception
 	popl	%eax
 	popl	%eax

 /* Reduce the exponent to EXP_UNDER */
 	movl	EXP_UNDER,EXP(%edi)
 	movb	TW_Zero,TAG(%edi)
 	jmp	xL_Store_significand


 /* The operations resulted in a number too large to represent. */
 L_overflow:
 	push	%edi
 	call	_arith_overflow
 	pop	%edi
 	jmp	fpu_reg_round_exit


 xSignal_underflow:
 	/* The number may have been changed to a non-denormal */
 	/* by the rounding operations. */
 	cmpl	EXP_UNDER,EXP(%edi)
 	jle	xDo_unmasked_underflow

 	jmp	xL_Normalised

 xDo_unmasked_underflow:
 	/* Increase the exponent by the magic number */
 	addl	$(3*(1<<13)),EXP(%edi)
 	push	%eax
 	pushl	EX_Underflow
 	call	EXCEPTION
 	popl	%eax
 	popl	%eax
 	jmp	xL_Normalised


 #ifdef PARANOID
 /* If we ever get here then we have problems! */
 L_bugged_denorm:
 	pushl	EX_INTERNAL|0x230
 	call	EXCEPTION
 	popl	%ebx
 	jmp	L_exception_exit

 L_bugged_round24:
 	pushl	EX_INTERNAL|0x231
 	call	EXCEPTION
 	popl	%ebx
 	jmp	L_exception_exit

 L_bugged_round53:
 	pushl	EX_INTERNAL|0x232
 	call	EXCEPTION
 	popl	%ebx
 	jmp	L_exception_exit

 L_bugged_round64:
 	pushl	EX_INTERNAL|0x233
 	call	EXCEPTION
 	popl	%ebx
 	jmp	L_exception_exit

 L_norm_bugged:
 	pushl	EX_INTERNAL|0x234
 	call	EXCEPTION
 	popl	%ebx
 	jmp	L_exception_exit

 L_entry_bugged:
 	pushl	EX_INTERNAL|0x235
 	call	EXCEPTION
 	popl	%ebx
 L_exception_exit:
 	mov	$1,%eax
 	jmp	fpu_reg_round_exit
 #endif PARANOID
	.file "reg_round.S"
	/*---------------------------------------------------------------------------+
	\| reg_round.S \|
	\| \|
	\| Rounding/truncation/etc for FPU basic arithmetic functions. \|
	\| \|
	\| Copyright (C) 1993 \|
	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
	\| Australia. E-mail billm@vaxc.cc.monash.edu.au \|
	\| \|
	\| This code has four possible entry points. \|
	\| The following must be entered by a jmp intruction: \|
	\| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. \|
	\| \|
	\| The _round_reg entry point is intended to be used by C code. \|
	\| From C, call as: \|
	\| void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) \|
	\| \|
	\| For correct "up" and "down" rounding, the argument must have the correct \|
	\| sign. \|
	\| \|
	+---------------------------------------------------------------------------*/

	/*---------------------------------------------------------------------------+
	\| Four entry points. \|
	\| \|
	\| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: \|
	\| %eax:%ebx 64 bit significand \|
	\| %edx 32 bit extension of the significand \|
	\| %edi pointer to an FPU_REG for the result to be stored \|
	\| stack calling function must have set up a C stack frame and \|
	\| pushed %esi, %edi, and %ebx \|
	\| \|
	\| Needed just for the fpu_reg_round_sqrt entry point: \|
	\| %cx A control word in the same format as the FPU control word. \|
	\| Otherwise, PARAM4 must give such a value. \|
	\| \|
	\| \|
	\| The significand and its extension are assumed to be exact in the \|
	\| following sense: \|
	\| If the significand by itself is the exact result then the significand \|
	\| extension (%edx) must contain 0, otherwise the significand extension \|
	\| must be non-zero. \|
	\| If the significand extension is non-zero then the significand is \|
	\| smaller than the magnitude of the correct exact result by an amount \|
	\| greater than zero and less than one ls bit of the significand. \|
	\| The significand extension is only required to have three possible \|
	\| non-zero values: \|
	\| less than 0x80000000 <=> the significand is less than 1/2 an ls \|
	\| bit smaller than the magnitude of the \|
	\| true exact result. \|
	\| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit \|
	\| smaller than the magnitude of the true \|
	\| exact result. \|
	\| greater than 0x80000000 <=> the significand is more than 1/2 an ls \|
	\| bit smaller than the magnitude of the \|
	\| true exact result. \|
	\| \|
	+---------------------------------------------------------------------------*/

	/*---------------------------------------------------------------------------+
	\| The code in this module has become quite complex, but it should handle \|
	\| all of the FPU flags which are set at this stage of the basic arithmetic \|
	\| computations. \|
	\| There are a few rare cases where the results are not set identically to \|
	\| a real FPU. These require a bit more thought because at this stage the \|
	\| results of the code here appear to be more consistent... \|
	\| This may be changed in a future version. \|
	+---------------------------------------------------------------------------*/


	#include "fpu_asm.h"
	#include "exception.h"
	#include "control_w.h"

	/* Flags for FPU_bits_lost */
	#define LOST_DOWN $1
	#define LOST_UP $2

	/* Flags for FPU_denormal */
	#define DENORMAL $1
	#define UNMASKED_UNDERFLOW $2


	#ifdef REENTRANT_FPU
	/* Make the code re-entrant by putting
	local storage on the stack: */
	#define FPU_bits_lost (%esp)
	#define FPU_denormal 1(%esp)

	#else
	/* Not re-entrant, so we can gain speed by putting
	local storage in a static area: */
	.data
	.align 2,0
	FPU_bits_lost:
	.byte 0
	FPU_denormal:
	.byte 0
	#endif REENTRANT_FPU


	.text
	.align 2,144
	.globl fpu_reg_round
	.globl fpu_reg_round_sqrt
	.globl fpu_Arith_exit
	.globl _round_reg

	/* Entry point when called from C */
	_round_reg:
	pushl %ebp
	movl %esp,%ebp
	pushl %esi
	pushl %edi
	pushl %ebx

	movl PARAM1,%edi
	movl SIGH(%edi),%eax
	movl SIGL(%edi),%ebx
	movl PARAM2,%edx
	movl PARAM3,%ecx
	jmp fpu_reg_round_sqrt

	fpu_reg_round: /* Normal entry point */
	movl PARAM4,%ecx

	fpu_reg_round_sqrt: /* Entry point from wm_sqrt.S */

	#ifdef REENTRANT_FPU
	pushl %ebx /* adjust the stack pointer */
	#endif REENTRANT_FPU

	#ifdef PARANOID
	/* Cannot use this here yet */
	/* orl %eax,%eax */
	/* jns L_entry_bugged */
	#endif PARANOID

	cmpl EXP_UNDER,EXP(%edi)
	jle xMake_denorm /* The number is a de-normal */

	movb $0,FPU_denormal /* 0 -> not a de-normal */

	xDenorm_done:
	movb $0,FPU_bits_lost /* No bits yet lost in rounding */

	movl %ecx,%esi
	andl CW_PC,%ecx
	cmpl PR_64_BITS,%ecx
	je LRound_To_64

	cmpl PR_53_BITS,%ecx
	je LRound_To_53

	cmpl PR_24_BITS,%ecx
	je LRound_To_24

	#ifdef PARANOID
	jmp L_bugged_denorm /* There is no bug, just a bad control word */
	#endif PARANOID


	/* Round etc to 24 bit precision */
	LRound_To_24:
	movl %esi,%ecx
	andl CW_RC,%ecx
	cmpl RC_RND,%ecx
	je LRound_nearest_24

	cmpl RC_CHOP,%ecx
	je LCheck_truncate_24

	cmpl RC_UP,%ecx /* Towards +infinity */
	je LUp_24

	cmpl RC_DOWN,%ecx /* Towards -infinity */
	je LDown_24

	#ifdef PARANOID
	jmp L_bugged_round24
	#endif PARANOID

	LUp_24:
	cmpb SIGN_POS,SIGN(%edi)
	jne LCheck_truncate_24 /* If negative then up==truncate */

	jmp LCheck_24_round_up

	LDown_24:
	cmpb SIGN_POS,SIGN(%edi)
	je LCheck_truncate_24 /* If positive then down==truncate */

	LCheck_24_round_up:
	movl %eax,%ecx
	andl $0x000000ff,%ecx
	orl %ebx,%ecx
	orl %edx,%ecx
	jnz LDo_24_round_up
	jmp LRe_normalise

	LRound_nearest_24:
	/* Do rounding of the 24th bit if needed (nearest or even) */
	movl %eax,%ecx
	andl $0x000000ff,%ecx
	cmpl $0x00000080,%ecx
	jc LCheck_truncate_24 /* less than half, no increment needed */

	jne LGreater_Half_24 /* greater than half, increment needed */

	/* Possibly half, we need to check the ls bits */
	orl %ebx,%ebx
	jnz LGreater_Half_24 /* greater than half, increment needed */

	orl %edx,%edx
	jnz LGreater_Half_24 /* greater than half, increment needed */

	/* Exactly half, increment only if 24th bit is 1 (round to even) */
	testl $0x00000100,%eax
	jz LDo_truncate_24

	LGreater_Half_24: /* Rounding: increment at the 24th bit */
	LDo_24_round_up:
	andl $0xffffff00,%eax /* Truncate to 24 bits */
	xorl %ebx,%ebx
	movb LOST_UP,FPU_bits_lost
	addl $0x00000100,%eax
	jmp LCheck_Round_Overflow

	LCheck_truncate_24:
	movl %eax,%ecx
	andl $0x000000ff,%ecx
	orl %ebx,%ecx
	orl %edx,%ecx
	jz LRe_normalise /* No truncation needed */

	LDo_truncate_24:
	andl $0xffffff00,%eax /* Truncate to 24 bits */
	xorl %ebx,%ebx
	movb LOST_DOWN,FPU_bits_lost
	jmp LRe_normalise


	/* Round etc to 53 bit precision */
	LRound_To_53:
	movl %esi,%ecx
	andl CW_RC,%ecx
	cmpl RC_RND,%ecx
	je LRound_nearest_53

	cmpl RC_CHOP,%ecx
	je LCheck_truncate_53

	cmpl RC_UP,%ecx /* Towards +infinity */
	je LUp_53

	cmpl RC_DOWN,%ecx /* Towards -infinity */
	je LDown_53

	#ifdef PARANOID
	jmp L_bugged_round53
	#endif PARANOID

	LUp_53:
	cmpb SIGN_POS,SIGN(%edi)
	jne LCheck_truncate_53 /* If negative then up==truncate */

	jmp LCheck_53_round_up

	LDown_53:
	cmpb SIGN_POS,SIGN(%edi)
	je LCheck_truncate_53 /* If positive then down==truncate */

	LCheck_53_round_up:
	movl %ebx,%ecx
	andl $0x000007ff,%ecx
	orl %edx,%ecx
	jnz LDo_53_round_up
	jmp LRe_normalise

	LRound_nearest_53:
	/* Do rounding of the 53rd bit if needed (nearest or even) */
	movl %ebx,%ecx
	andl $0x000007ff,%ecx
	cmpl $0x00000400,%ecx
	jc LCheck_truncate_53 /* less than half, no increment needed */

	jnz LGreater_Half_53 /* greater than half, increment needed */

	/* Possibly half, we need to check the ls bits */
	orl %edx,%edx
	jnz LGreater_Half_53 /* greater than half, increment needed */

	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
	testl $0x00000800,%ebx
	jz LTruncate_53

	LGreater_Half_53: /* Rounding: increment at the 53rd bit */
	LDo_53_round_up:
	movb LOST_UP,FPU_bits_lost
	andl $0xfffff800,%ebx /* Truncate to 53 bits */
	addl $0x00000800,%ebx
	adcl $0,%eax
	jmp LCheck_Round_Overflow

	LCheck_truncate_53:
	movl %ebx,%ecx
	andl $0x000007ff,%ecx
	orl %edx,%ecx
	jz LRe_normalise

	LTruncate_53:
	movb LOST_DOWN,FPU_bits_lost
	andl $0xfffff800,%ebx /* Truncate to 53 bits */
	jmp LRe_normalise


	/* Round etc to 64 bit precision */
	LRound_To_64:
	movl %esi,%ecx
	andl CW_RC,%ecx
	cmpl RC_RND,%ecx
	je LRound_nearest_64

	cmpl RC_CHOP,%ecx
	je LCheck_truncate_64

	cmpl RC_UP,%ecx /* Towards +infinity */
	je LUp_64

	cmpl RC_DOWN,%ecx /* Towards -infinity */
	je LDown_64

	#ifdef PARANOID
	jmp L_bugged_round64
	#endif PARANOID

	LUp_64:
	cmpb SIGN_POS,SIGN(%edi)
	jne LCheck_truncate_64 /* If negative then up==truncate */

	orl %edx,%edx
	jnz LDo_64_round_up
	jmp LRe_normalise

	LDown_64:
	cmpb SIGN_POS,SIGN(%edi)
	je LCheck_truncate_64 /* If positive then down==truncate */

	orl %edx,%edx
	jnz LDo_64_round_up
	jmp LRe_normalise

	LRound_nearest_64:
	cmpl $0x80000000,%edx
	jc LCheck_truncate_64

	jne LDo_64_round_up

	/* Now test for round-to-even */
	testb $1,%ebx
	jz LCheck_truncate_64

	LDo_64_round_up:
	movb LOST_UP,FPU_bits_lost
	addl $1,%ebx
	adcl $0,%eax

	LCheck_Round_Overflow:
	jnc LRe_normalise

	/* Overflow, adjust the result (significand to 1.0) */
	rcrl $1,%eax
	rcrl $1,%ebx
	incl EXP(%edi)
	jmp LRe_normalise

	LCheck_truncate_64:
	orl %edx,%edx
	jz LRe_normalise

	LTruncate_64:
	movb LOST_DOWN,FPU_bits_lost

	LRe_normalise:
	testb $0xff,FPU_denormal
	jnz xNormalise_result

	xL_Normalised:
	cmpb LOST_UP,FPU_bits_lost
	je xL_precision_lost_up

	cmpb LOST_DOWN,FPU_bits_lost
	je xL_precision_lost_down

	xL_no_precision_loss:
	/* store the result */
	movb TW_Valid,TAG(%edi)

	xL_Store_significand:
	movl %eax,SIGH(%edi)
	movl %ebx,SIGL(%edi)

	xorl %eax,%eax /* No errors detected. */

	cmpl EXP_OVER,EXP(%edi)
	jge L_overflow

	fpu_reg_round_exit:
	#ifdef REENTRANT_FPU
	popl %ebx /* adjust the stack pointer */
	#endif REENTRANT_FPU

	fpu_Arith_exit:
	popl %ebx
	popl %edi
	popl %esi
	leave
	ret


	/*
	* Set the FPU status flags to represent precision loss due to
	* round-up.
	*/
	xL_precision_lost_up:
	push %eax
	call _set_precision_flag_up
	popl %eax
	jmp xL_no_precision_loss

	/*
	* Set the FPU status flags to represent precision loss due to
	* truncation.
	*/
	xL_precision_lost_down:
	push %eax
	call _set_precision_flag_down
	popl %eax
	jmp xL_no_precision_loss


	/*
	* The number is a denormal (which might get rounded up to a normal)
	* Shift the number right the required number of bits, which will
	* have to be undone later...
	*/
	xMake_denorm:
	/* The action to be taken depends upon whether the underflow
	exception is masked */
	testb CW_Underflow,%cl /* Underflow mask. */
	jz xUnmasked_underflow /* Do not make a denormal. */

	movb DENORMAL,FPU_denormal

	pushl %ecx /* Save */
	movl EXP_UNDER+1,%ecx
	subl EXP(%edi),%ecx

	cmpl $64,%ecx /* shrd only works for 0..31 bits */
	jnc xDenorm_shift_more_than_63

	cmpl $32,%ecx /* shrd only works for 0..31 bits */
	jnc xDenorm_shift_more_than_32

	/*
	* We got here without jumps by assuming that the most common requirement
	* is for a small de-normalising shift.
	* Shift by [1..31] bits
	*/
	addl %ecx,EXP(%edi)
	orl %edx,%edx /* extension */
	setne %ch /* Save whether %edx is non-zero */
	xorl %edx,%edx
	shrd %cl,%ebx,%edx
	shrd %cl,%eax,%ebx
	shr %cl,%eax
	orb %ch,%dl
	popl %ecx
	jmp xDenorm_done

	/* Shift by [32..63] bits */
	xDenorm_shift_more_than_32:
	addl %ecx,EXP(%edi)
	subb $32,%cl
	orl %edx,%edx
	setne %ch
	orb %ch,%bl
	xorl %edx,%edx
	shrd %cl,%ebx,%edx
	shrd %cl,%eax,%ebx
	shr %cl,%eax
	orl %edx,%edx /* test these 32 bits */
	setne %cl
	orb %ch,%bl
	orb %cl,%bl
	movl %ebx,%edx
	movl %eax,%ebx
	xorl %eax,%eax
	popl %ecx
	jmp xDenorm_done

	/* Shift by [64..) bits */
	xDenorm_shift_more_than_63:
	cmpl $64,%ecx
	jne xDenorm_shift_more_than_64

	/* Exactly 64 bit shift */
	addl %ecx,EXP(%edi)
	xorl %ecx,%ecx
	orl %edx,%edx
	setne %cl
	orl %ebx,%ebx
	setne %ch
	orb %ch,%cl
	orb %cl,%al
	movl %eax,%edx
	xorl %eax,%eax
	xorl %ebx,%ebx
	popl %ecx
	jmp xDenorm_done

	xDenorm_shift_more_than_64:
	movl EXP_UNDER+1,EXP(%edi)
	/* This is easy, %eax must be non-zero, so.. */
	movl $1,%edx
	xorl %eax,%eax
	xorl %ebx,%ebx
	popl %ecx
	jmp xDenorm_done


	xUnmasked_underflow:
	movb UNMASKED_UNDERFLOW,FPU_denormal
	jmp xDenorm_done


	/* Undo the de-normalisation. */
	xNormalise_result:
	cmpb UNMASKED_UNDERFLOW,FPU_denormal
	je xSignal_underflow

	/* The number must be a denormal if we got here. */
	#ifdef PARANOID
	/* But check it... just in case. */
	cmpl EXP_UNDER+1,EXP(%edi)
	jne L_norm_bugged
	#endif PARANOID

	#ifdef PECULIAR_486
	/*
	* This implements a special feature of 80486 behaviour.
	* Underflow will be signalled even if the number is
	* not a denormal after rounding.
	* This difference occurs only for masked underflow, and not
	* in the unmasked case.
	* Actual 80486 behaviour differs from this in some circumstances.
	*/
	orl %eax,%eax /* ms bits */
	js LNormalise_shift_done /* Will be masked underflow */
	#endif PECULIAR_486

	orl %eax,%eax /* ms bits */
	js xL_Normalised /* No longer a denormal */

	jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits */

	orl %ebx,%ebx
	jz L_underflow_to_zero /* The contents are zero */

	/* Shift left 32 - 63 bits */
	movl %ebx,%eax
	xorl %ebx,%ebx
	subl $32,EXP(%edi)

	LNormalise_shift_up_to_31:
	bsrl %eax,%ecx /* get the required shift in %ecx */
	subl $31,%ecx
	negl %ecx
	shld %cl,%ebx,%eax
	shl %cl,%ebx
	subl %ecx,EXP(%edi)

	LNormalise_shift_done:
	testb $0xff,FPU_bits_lost /* bits lost == underflow */
	jz xL_Normalised

	/* There must be a masked underflow */
	push %eax
	pushl EX_Underflow
	call _exception
	popl %eax
	popl %eax
	jmp xL_Normalised


	/*
	* The operations resulted in a number too small to represent.
	* Masked response.
	*/
	L_underflow_to_zero:
	push %eax
	call _set_precision_flag_down
	popl %eax

	push %eax
	pushl EX_Underflow
	call _exception
	popl %eax
	popl %eax

	/* Reduce the exponent to EXP_UNDER */
	movl EXP_UNDER,EXP(%edi)
	movb TW_Zero,TAG(%edi)
	jmp xL_Store_significand


	/* The operations resulted in a number too large to represent. */
	L_overflow:
	push %edi
	call _arith_overflow
	pop %edi
	jmp fpu_reg_round_exit


	xSignal_underflow:
	/* The number may have been changed to a non-denormal */
	/* by the rounding operations. */
	cmpl EXP_UNDER,EXP(%edi)
	jle xDo_unmasked_underflow

	jmp xL_Normalised

	xDo_unmasked_underflow:
	/* Increase the exponent by the magic number */
	addl $(3*(1<<13)),EXP(%edi)
	push %eax
	pushl EX_Underflow
	call EXCEPTION
	popl %eax
	popl %eax
	jmp xL_Normalised


	#ifdef PARANOID
	/* If we ever get here then we have problems! */
	L_bugged_denorm:
	pushl EX_INTERNAL\|0x230
	call EXCEPTION
	popl %ebx
	jmp L_exception_exit

	L_bugged_round24:
	pushl EX_INTERNAL\|0x231
	call EXCEPTION
	popl %ebx
	jmp L_exception_exit

	L_bugged_round53:
	pushl EX_INTERNAL\|0x232
	call EXCEPTION
	popl %ebx
	jmp L_exception_exit

	L_bugged_round64:
	pushl EX_INTERNAL\|0x233
	call EXCEPTION
	popl %ebx
	jmp L_exception_exit

	L_norm_bugged:
	pushl EX_INTERNAL\|0x234
	call EXCEPTION
	popl %ebx
	jmp L_exception_exit

	L_entry_bugged:
	pushl EX_INTERNAL\|0x235
	call EXCEPTION
	popl %ebx
	L_exception_exit:
	mov $1,%eax
	jmp fpu_reg_round_exit
	#endif PARANOID