blob: 6da3aadcff88dffb2498ed7beb39fb6fb04b390b [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
/*
* Modify History
*
* who when what
* --- ---- ----
* stone 2004-09-02 Add SIMD floating emulation code
* fire3 2008-12-27 Add SIMD floating emulation code for SW64
*/
#include <linux/uaccess.h>
#include <asm/ptrace.h>
#include "sfp-util.h"
#include <math-emu/soft-fp.h>
#include <math-emu/single.h>
#include <math-emu/double.h>
#define math_debug 0
#define DEBUG_INFO(fmt, arg...) \
do { \
if (math_debug) \
printk(KERN_DEBUG fmt, ## arg); \
} while (0)
/*
* This is for sw64
*/
#define IEEE_E_STATUS_MASK IEEE_STATUS_MASK
#define IEEE_E_STATUS_TO_EXCSUM_SHIFT 0
#define SW64_FP_DENOMAL 1 /* A denormal data */
#define SW64_FP_NORMAL 0 /* A denormal data */
#define SW64_FP_NAN 2
#define SW64_FP_NAN_S(X, val) \
do { \
union _FP_UNION_S *_flo = \
(union _FP_UNION_S *)(val); \
\
X##_f = _flo->bits.frac; \
X##_e = _flo->bits.exp; \
X##_s = _flo->bits.sign; \
\
switch (X##_e) { \
case 255: \
if (_FP_FRAC_ZEROP_1(X)) \
X##_c = SW64_FP_NORMAL; \
else \
X##_c = SW64_FP_NAN; \
break; \
default: \
X##_c = SW64_FP_NORMAL; \
break; \
} \
} while (0)
#define SW64_FP_NAN_D(X, val) \
do { \
union _FP_UNION_D *_flo = \
(union _FP_UNION_D *)(val); \
\
X##_f = _flo->bits.frac; \
X##_e = _flo->bits.exp; \
X##_s = _flo->bits.sign; \
\
switch (X##_e) { \
case 2047: \
if (_FP_FRAC_ZEROP_1(X)) \
X##_c = SW64_FP_NORMAL; \
else \
X##_c = SW64_FP_NAN; \
break; \
default: \
X##_c = SW64_FP_NORMAL; \
break; \
} \
} while (0)
#define SW64_FP_NORMAL_S(X, val) \
do { \
union _FP_UNION_S *_flo = \
(union _FP_UNION_S *)(val); \
\
X##_f = _flo->bits.frac; \
X##_e = _flo->bits.exp; \
X##_s = _flo->bits.sign; \
\
switch (X##_e) { \
case 0: \
if (_FP_FRAC_ZEROP_1(X)) \
X##_c = SW64_FP_NORMAL; \
else \
X##_c = SW64_FP_DENOMAL; \
break; \
default: \
X##_c = SW64_FP_NORMAL; \
break; \
} \
} while (0)
#define SW64_FP_NORMAL_D(X, val) \
do { \
union _FP_UNION_D *_flo = \
(union _FP_UNION_D *)(val); \
\
X##_f = _flo->bits.frac; \
X##_e = _flo->bits.exp; \
X##_s = _flo->bits.sign; \
\
switch (X##_e) { \
case 0: \
if (_FP_FRAC_ZEROP_1(X)) \
X##_c = SW64_FP_NORMAL; \
else \
X##_c = SW64_FP_DENOMAL; \
break; \
default: \
X##_c = SW64_FP_NORMAL; \
break; \
} \
} while (0)
/* Operation Code for SW64 */
#define OP_SIMD_1 0x1A
#define OP_SIMD_2 0x1B
#define OP_SIMD_MUL_ADD 0x1B
#define OP_SIMD_NORMAL 0x1A
#define OP_MUL_ADD 0x19
#define FNC_FMAS 0x0
#define FNC_FMAD 0x1
#define FNC_FMSS 0x2
#define FNC_FMSD 0x3
#define FNC_FNMAS 0x4
#define FNC_FNMAD 0x5
#define FNC_FNMSS 0x6
#define FNC_FNMSD 0x7
#define FNC_VADDS 0x80
#define FNC_VADDD 0x81
#define FNC_VSUBS 0x82
#define FNC_VSUBD 0x83
#define FNC_VMULS 0x84
#define FNC_VMULD 0x85
#define FNC_VDIVS 0x86
#define FNC_VDIVD 0x87
#define FNC_VSQRTS 0x88
#define FNC_VSQRTD 0x89
#define FNC_VFCMPEQ 0x8c
#define FNC_VFCMPLE 0x8d
#define FNC_VFCMPLT 0x8e
#define FNC_VFCMPUN 0x8f
#define FNC_VCPYS 0x90
#define FNC_VCPYSE 0x91
#define FNC_VCPYSN 0x92
#define FNC_VMAS 0x0
#define FNC_VMAD 0x1
#define FNC_VMSS 0x2
#define FNC_VMSD 0x3
#define FNC_VNMAS 0x4
#define FNC_VNMAD 0x5
#define FNC_VNMSS 0x6
#define FNC_VNMSD 0x7
long simd_fp_emul_s(unsigned long pc);
long simd_fp_emul_d(unsigned long pc);
long mul_add_fp_emul(unsigned long pc);
long simd_cmp_emul_d(unsigned long pc);
long simd_mul_add_fp_emul_d(unsigned long pc);
long simd_mul_add_fp_emul_s(unsigned long pc);
void read_fp_reg_s(unsigned long reg, unsigned long *p0,
unsigned long *p1, unsigned long *p2, unsigned long *p3);
void read_fp_reg_d(unsigned long reg, unsigned long *val_p0,
unsigned long *p1, unsigned long *p2, unsigned long *p3);
void write_fp_reg_s(unsigned long reg, unsigned long val_p0,
unsigned long p1, unsigned long p2, unsigned long p3);
void write_fp_reg_d(unsigned long reg, unsigned long val_p0,
unsigned long p1, unsigned long p2, unsigned long p3);
#define LOW_64_WORKING 1
#define HIGH_64_WORKING 2
/*
* End for sw64
*/
#define OPC_HMC 0x00
#define OPC_INTA 0x10
#define OPC_INTL 0x11
#define OPC_INTS 0x12
#define OPC_INTM 0x13
#define OPC_FLTC 0x14
#define OPC_FLTV 0x15
#define OPC_FLTI 0x16
#define OPC_FLTL 0x17
#define OPC_MISC 0x18
#define OPC_JSR 0x1a
#define FOP_SRC_S 0
#define FOP_SRC_T 2
#define FOP_SRC_Q 3
#define FOP_FNC_ADDx 0
#define FOP_FNC_CVTQL 0
#define FOP_FNC_SUBx 1
#define FOP_FNC_MULx 2
#define FOP_FNC_DIVx 3
#define FOP_FNC_CMPxUN 4
#define FOP_FNC_CMPxEQ 5
#define FOP_FNC_CMPxLT 6
#define FOP_FNC_CMPxLE 7
#define FOP_FNC_SQRTx 11
#define FOP_FNC_CVTxS 12
#define FOP_FNC_CVTxT 14
#define FOP_FNC_CVTxQ 15
/* this is for sw64 added by fire3*/
#define FOP_FNC_ADDS 0
#define FOP_FNC_ADDD 1
#define FOP_FNC_SUBS 2
#define FOP_FNC_SUBD 3
#define FOP_FNC_MULS 4
#define FOP_FNC_MULD 5
#define FOP_FNC_DIVS 6
#define FOP_FNC_DIVD 7
#define FOP_FNC_SQRTS 8
#define FOP_FNC_SQRTD 9
#define FOP_FNC_CMPEQ 0x10
#define FOP_FNC_CMPLE 0x11
#define FOP_FNC_CMPLT 0x12
#define FOP_FNC_CMPUN 0x13
#define FOP_FNC_CVTSD 0x20
#define FOP_FNC_CVTDS 0x21
#define FOP_FNC_CVTLS 0x2D
#define FOP_FNC_CVTLD 0x2F
#define FOP_FNC_CVTDL 0x27
#define FOP_FNC_CVTDL_G 0x22
#define FOP_FNC_CVTDL_P 0x23
#define FOP_FNC_CVTDL_Z 0x24
#define FOP_FNC_CVTDL_N 0x25
#define FOP_FNC_CVTWL 0x28
#define FOP_FNC_CVTLW 0x29
/* fire3 added end */
#define MISC_TRAPB 0x0000
#define MISC_EXCB 0x0400
extern unsigned long sw64_read_fp_reg(unsigned long reg);
extern void sw64_write_fp_reg(unsigned long reg, unsigned long val);
extern unsigned long sw64_read_fp_reg_s(unsigned long reg);
extern void sw64_write_fp_reg_s(unsigned long reg, unsigned long val);
#ifdef MODULE
MODULE_DESCRIPTION("FP Software completion module");
extern long (*sw64_fp_emul_imprecise)(struct pt_regs *, unsigned long);
extern long (*sw64_fp_emul)(unsigned long pc);
static long (*save_emul_imprecise)(struct pt_regs *, unsigned long);
static long (*save_emul)(unsigned long pc);
long do_sw_fp_emul_imprecise(struct pt_regs *, unsigned long);
long do_sw_fp_emul(unsigned long);
int init_module(void)
{
save_emul_imprecise = sw64_fp_emul_imprecise;
save_emul = sw64_fp_emul;
sw64_fp_emul_imprecise = do_sw_fp_emul_imprecise;
sw64_fp_emul = do_sw_fp_emul;
return 0;
}
void cleanup_module(void)
{
sw64_fp_emul_imprecise = save_emul_imprecise;
sw64_fp_emul = save_emul;
}
#undef sw64_fp_emul_imprecise
#define sw64_fp_emul_imprecise do_sw_fp_emul_imprecise
#undef sw64_fp_emul
#define sw64_fp_emul do_sw_fp_emul
#endif /* MODULE */
/*
* Emulate the floating point instruction at address PC. Returns -1 if the
* instruction to be emulated is illegal (such as with the opDEC trap), else
* the SI_CODE for a SIGFPE signal, else 0 if everything's ok.
*
* Notice that the kernel does not and cannot use FP regs. This is good
* because it means that instead of saving/restoring all fp regs, we simply
* stick the result of the operation into the appropriate register.
*/
long sw64_fp_emul(unsigned long pc)
{
FP_DECL_EX;
FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
unsigned long fa, fb, fc, func, mode, mode_bk, src;
unsigned long res, va, vb, vc, swcr, fpcr;
__u32 insn;
long si_code;
unsigned long opcode;
get_user(insn, (__u32 *)pc);
opcode = (insn >> 26) & 0x3f;
fc = (insn >> 0) & 0x1f; /* destination register */
fb = (insn >> 16) & 0x1f;
fa = (insn >> 21) & 0x1f;
func = (insn >> 5) & 0xff;
fpcr = rdfpcr();
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
DEBUG_INFO("======= Entering Floating mathe emulation =====\n");
DEBUG_INFO("Floating math emulation insn = %#lx, opcode=%d, func=%d\n", insn, opcode, func);
DEBUG_INFO("SW64 hardware fpcr = %#lx\n", fpcr);
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
DEBUG_INFO("SW64 software swcr = %#lx\n", swcr);
DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
if (opcode == OP_SIMD_NORMAL) { /* float simd math */
if (func == FNC_VADDS || func == FNC_VSUBS || func == FNC_VSQRTS
|| func == FNC_VMULS || func == FNC_VDIVS)
si_code = simd_fp_emul_s(pc);
if (func == FNC_VADDD || func == FNC_VSUBD || func == FNC_VSQRTD
|| func == FNC_VMULD || func == FNC_VDIVD)
si_code = simd_fp_emul_d(pc);
if (func == FNC_VFCMPUN || func == FNC_VFCMPLT || func == FNC_VFCMPLE
|| func == FNC_VFCMPEQ)
si_code = simd_cmp_emul_d(pc);
return si_code;
}
if (opcode == OP_SIMD_MUL_ADD) {/* simd mul and add */
func = (insn >> 10) & 0x3f;
if (func == FNC_VMAS || func == FNC_VMSS || func == FNC_VNMAS
|| func == FNC_VNMSS) {
si_code = simd_mul_add_fp_emul_s(pc);
return si_code;
}
if (func == FNC_VMAD || func == FNC_VMSD || func == FNC_VNMAD
|| func == FNC_VNMSD) {
si_code = simd_mul_add_fp_emul_d(pc);
return si_code;
}
func = (insn >> 5) & 0xff;
}
if (opcode == OP_MUL_ADD) {
si_code = mul_add_fp_emul(pc);
return si_code;
}
switch (func) {
case FOP_FNC_SUBS:
va = sw64_read_fp_reg_s(fa);
vb = sw64_read_fp_reg_s(fb);
FP_UNPACK_SP(SA, &va);
FP_UNPACK_SP(SB, &vb);
FP_SUB_S(SR, SA, SB);
goto pack_s;
case FOP_FNC_SUBD:
va = sw64_read_fp_reg(fa);
vb = sw64_read_fp_reg(fb);
FP_UNPACK_DP(DA, &va);
FP_UNPACK_DP(DB, &vb);
FP_SUB_D(DR, DA, DB);
goto pack_d;
case FOP_FNC_ADDS:
va = sw64_read_fp_reg_s(fa);
vb = sw64_read_fp_reg_s(fb);
FP_UNPACK_SP(SA, &va);
FP_UNPACK_SP(SB, &vb);
FP_ADD_S(SR, SA, SB);
goto pack_s;
case FOP_FNC_ADDD:
va = sw64_read_fp_reg(fa);
vb = sw64_read_fp_reg(fb);
FP_UNPACK_DP(DA, &va);
FP_UNPACK_DP(DB, &vb);
FP_ADD_D(DR, DA, DB);
goto pack_d;
case FOP_FNC_MULS:
va = sw64_read_fp_reg_s(fa);
vb = sw64_read_fp_reg_s(fb);
FP_UNPACK_SP(SA, &va);
FP_UNPACK_SP(SB, &vb);
FP_MUL_S(SR, SA, SB);
goto pack_s;
case FOP_FNC_MULD:
va = sw64_read_fp_reg(fa);
vb = sw64_read_fp_reg(fb);
FP_UNPACK_DP(DA, &va);
FP_UNPACK_DP(DB, &vb);
FP_MUL_D(DR, DA, DB);
goto pack_d;
case FOP_FNC_DIVS:
DEBUG_INFO("FOP_FNC_DIVS\n");
va = sw64_read_fp_reg_s(fa);
vb = sw64_read_fp_reg_s(fb);
FP_UNPACK_SP(SA, &va);
FP_UNPACK_SP(SB, &vb);
FP_DIV_S(SR, SA, SB);
goto pack_s;
case FOP_FNC_DIVD:
DEBUG_INFO("FOP_FNC_DIVD\n");
va = sw64_read_fp_reg(fa);
vb = sw64_read_fp_reg(fb);
FP_UNPACK_DP(DA, &va);
FP_UNPACK_DP(DB, &vb);
FP_DIV_D(DR, DA, DB);
goto pack_d;
case FOP_FNC_SQRTS:
va = sw64_read_fp_reg_s(fa);
vb = sw64_read_fp_reg_s(fb);
FP_UNPACK_SP(SA, &va);
FP_UNPACK_SP(SB, &vb);
FP_SQRT_S(SR, SB);
goto pack_s;
case FOP_FNC_SQRTD:
va = sw64_read_fp_reg(fa);
vb = sw64_read_fp_reg(fb);
FP_UNPACK_DP(DA, &va);
FP_UNPACK_DP(DB, &vb);
FP_SQRT_D(DR, DB);
goto pack_d;
}
va = sw64_read_fp_reg(fa);
vb = sw64_read_fp_reg(fb);
if ((func & ~0xf) == FOP_FNC_CMPEQ) {
va = sw64_read_fp_reg(fa);
vb = sw64_read_fp_reg(fb);
FP_UNPACK_RAW_DP(DA, &va);
FP_UNPACK_RAW_DP(DB, &vb);
if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
FP_SET_EXCEPTION(FP_EX_DENORM);
if (FP_DENORM_ZERO)
_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
}
if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
FP_SET_EXCEPTION(FP_EX_DENORM);
if (FP_DENORM_ZERO)
_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
}
FP_CMP_D(res, DA, DB, 3);
vc = 0x4000000000000000;
/* CMPTEQ, CMPTUN don't trap on QNaN, while CMPTLT and CMPTLE do */
if (res == 3 && (((func == FOP_FNC_CMPLT) || (func == FOP_FNC_CMPLE))
|| FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) {
DEBUG_INFO("CMPLT CMPLE:func:%d, trap on QNaN.", func);
FP_SET_EXCEPTION(FP_EX_INVALID);
}
switch (func) {
case FOP_FNC_CMPUN:
if (res != 3)
vc = 0;
break;
case FOP_FNC_CMPEQ:
if (res)
vc = 0;
break;
case FOP_FNC_CMPLT:
if (res != -1)
vc = 0;
break;
case FOP_FNC_CMPLE:
if ((long)res > 0)
vc = 0;
break;
}
goto done_d;
}
FP_UNPACK_DP(DA, &va);
FP_UNPACK_DP(DB, &vb);
if (func == FOP_FNC_CVTSD) {
vb = sw64_read_fp_reg_s(fb);
FP_UNPACK_SP(SB, &vb);
DR_c = DB_c;
DR_s = DB_s;
DR_e = DB_e + (1024 - 128);
DR_f = SB_f << (52 - 23);
goto pack_d;
}
if (func == FOP_FNC_CVTDS) {
FP_CONV(S, D, 1, 1, SR, DB);
goto pack_s;
}
if (func == FOP_FNC_CVTDL || func == FOP_FNC_CVTDL_G || func == FOP_FNC_CVTDL_P
|| func == FOP_FNC_CVTDL_Z || func == FOP_FNC_CVTDL_N) {
mode_bk = mode;
if (func == FOP_FNC_CVTDL_Z)
mode = 0x0UL;
else if (func == FOP_FNC_CVTDL_N)
mode = 0x1UL;
else if (func == FOP_FNC_CVTDL_G)
mode = 0x2UL;
else if (func == FOP_FNC_CVTDL_P)
mode = 0x3UL;
if (DB_c == FP_CLS_NAN && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) {
/* AAHB Table B-2 says QNaN should not trigger INV */
vc = 0;
} else
FP_TO_INT_ROUND_D(vc, DB, 64, 2);
mode = mode_bk;
goto done_d;
}
vb = sw64_read_fp_reg(fb);
switch (func) {
case FOP_FNC_CVTLW:
/*
* Notice: We can get here only due to an integer
* overflow. Such overflows are reported as invalid
* ops. We return the result the hw would have
* computed.
*/
vc = ((vb & 0xc0000000) << 32 | /* sign and msb */
(vb & 0x3fffffff) << 29); /* rest of the int */
FP_SET_EXCEPTION(FP_EX_INVALID);
goto done_d;
case FOP_FNC_CVTLS:
FP_FROM_INT_S(SR, ((long)vb), 64, long);
goto pack_s;
case FOP_FNC_CVTLD:
FP_FROM_INT_D(DR, ((long)vb), 64, long);
goto pack_d;
}
goto bad_insn;
pack_s:
FP_PACK_SP(&vc, SR);
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
vc = 0;
DEBUG_INFO("SW64 Emulation S-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
DEBUG_INFO("SW64 Emulation S-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
sw64_write_fp_reg_s(fc, vc);
goto done;
pack_d:
FP_PACK_DP(&vc, DR);
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
vc = 0;
DEBUG_INFO("SW64 Emulation D-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
DEBUG_INFO("SW64 Emulation D-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
done_d:
sw64_write_fp_reg(fc, vc);
goto done;
/*
* Take the appropriate action for each possible
* floating-point result:
*
* - Set the appropriate bits in the FPCR
* - If the specified exception is enabled in the FPCR,
* return. The caller (entArith) will dispatch
* the appropriate signal to the translated program.
*
* In addition, properly track the exception state in software
* as described in the SW64 Architecture Handbook section 4.7.7.3.
*/
done:
if (_fex) {
/* Record exceptions in software control word. */
swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr |= ieee_swcr_to_fpcr(swcr);
DEBUG_INFO("SW64 before write fpcr = %#lx\n", fpcr);
wrfpcr(fpcr);
/* Do we generate a signal? */
_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
si_code = 0;
if (_fex) {
if (_fex & IEEE_TRAP_ENABLE_DNO)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_INE)
si_code = FPE_FLTRES;
if (_fex & IEEE_TRAP_ENABLE_UNF)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_OVF)
si_code = FPE_FLTOVF;
if (_fex & IEEE_TRAP_ENABLE_DZE)
si_code = FPE_FLTDIV;
if (_fex & IEEE_TRAP_ENABLE_INV)
si_code = FPE_FLTINV;
}
return si_code;
}
/*
* We used to write the destination register here, but DEC FORTRAN
* requires that the result *always* be written... so we do the write
* immediately after the operations above.
*/
return 0;
bad_insn:
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
return -1;
}
long sw64_fp_emul_imprecise(struct pt_regs *regs, unsigned long write_mask)
{
unsigned long trigger_pc = regs->pc - 4;
unsigned long insn, opcode, rc, si_code = 0;
/*
* Turn off the bits corresponding to registers that are the
* target of instructions that set bits in the exception
* summary register. We have some slack doing this because a
* register that is the target of a trapping instruction can
* be written at most once in the trap shadow.
*
* Branches, jumps, TRAPBs, EXCBs and calls to HMcode all
* bound the trap shadow, so we need not look any further than
* up to the first occurrence of such an instruction.
*/
while (write_mask) {
get_user(insn, (__u32 *)(trigger_pc));
opcode = insn >> 26;
rc = insn & 0x1f;
switch (opcode) {
case OPC_HMC:
case OPC_JSR:
case 0x30 ... 0x3f: /* branches */
goto egress;
case OPC_MISC:
switch (insn & 0xffff) {
case MISC_TRAPB:
case MISC_EXCB:
goto egress;
default:
break;
}
break;
case OPC_INTA:
case OPC_INTL:
case OPC_INTS:
case OPC_INTM:
write_mask &= ~(1UL << rc);
break;
case OPC_FLTC:
case OPC_FLTV:
case OPC_FLTI:
case OPC_FLTL:
write_mask &= ~(1UL << (rc + 32));
break;
}
if (!write_mask) {
/* Re-execute insns in the trap-shadow. */
regs->pc = trigger_pc + 4;
si_code = sw64_fp_emul(trigger_pc);
goto egress;
}
trigger_pc -= 4;
}
egress:
return si_code;
}
#define WORKING_PART_0 0
#define WORKING_PART_1 1
#define WORKING_PART_2 2
#define WORKING_PART_3 3
/*
* This is for sw64
*/
long simd_cmp_emul_d(unsigned long pc)
{
FP_DECL_EX;
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); FP_DECL_D(DC);
unsigned long fa, fb, fc, func, mode, src;
unsigned long res, va, vb, vc, swcr, fpcr;
__u32 insn;
long si_code;
unsigned long va_p0, va_p1, va_p2, va_p3;
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
int working_part;
get_user(insn, (__u32 *)pc);
fc = (insn >> 0) & 0x1f; /* destination register */
fb = (insn >> 16) & 0x1f;
fa = (insn >> 21) & 0x1f;
func = (insn >> 5) & 0xff;
fpcr = rdfpcr();
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
DEBUG_INFO("======== Entering SIMD floating-CMP math emulation =======\n");
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
DEBUG_INFO("software swcr = %#lx\n", swcr);
DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
read_fp_reg_d(fa, &va_p0, &va_p1, &va_p2, &va_p3);
read_fp_reg_d(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
read_fp_reg_d(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
working_part = WORKING_PART_0;
simd_working:
_fex = 0;
switch (working_part) {
case WORKING_PART_0:
DEBUG_INFO("WORKING_PART_0\n");
va = va_p0;
vb = vb_p0;
vc = vc_p0;
break;
case WORKING_PART_1:
DEBUG_INFO("WORKING_PART_1\n");
va = va_p1;
vb = vb_p1;
vc = vc_p1;
break;
case WORKING_PART_2:
DEBUG_INFO("WORKING_PART_2\n");
va = va_p2;
vb = vb_p2;
vc = vc_p2;
break;
case WORKING_PART_3:
DEBUG_INFO("WORKING_PART_3\n");
va = va_p3;
vb = vb_p3;
vc = vc_p3;
break;
}
DEBUG_INFO("Before unpack va:%#lx, vb:%#lx\n", va, vb);
FP_UNPACK_RAW_DP(DA, &va);
FP_UNPACK_RAW_DP(DB, &vb);
DEBUG_INFO("DA_e:%d, _FP_FRAC_ZEROP_1(DA):%d\n", DA_e, _FP_FRAC_ZEROP_1(DA));
DEBUG_INFO("DB_e:%d, _FP_FRAC_ZEROP_1(DB):%d\n", DA_e, _FP_FRAC_ZEROP_1(DA));
DEBUG_INFO("DA iszero:%d, DB iszero:%d\n", ((!DA_e && _FP_FRAC_ZEROP_1(DA)) ? 1 : 0),
((!DB_e && _FP_FRAC_ZEROP_1(DB))));
if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
FP_SET_EXCEPTION(FP_EX_DENORM);
if (FP_DENORM_ZERO)
_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
}
if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
FP_SET_EXCEPTION(FP_EX_DENORM);
if (FP_DENORM_ZERO)
_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
}
FP_CMP_D(res, DA, DB, 3);
vc = 0x4000000000000000;
/* CMPTEQ, CMPTUN don't trap on QNaN, while CMPTLT and CMPTLE do */
if (res == 3 && (((func == FOP_FNC_CMPLT) || (func == FOP_FNC_CMPLE))
|| FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) {
DEBUG_INFO("CMPLT CMPLE:func:%d, trap on QNaN.", func);
FP_SET_EXCEPTION(FP_EX_INVALID);
}
DEBUG_INFO("res:%d\n", res);
switch (func) {
case FNC_VFCMPUN:
if (res != 3)
vc = 0;
break;
case FNC_VFCMPEQ:
if (res)
vc = 0;
break;
case FNC_VFCMPLT:
if (res != -1)
vc = 0;
break;
case FNC_VFCMPLE:
if ((long)res > 0)
vc = 0;
break;
}
next_working_s:
switch (working_part) {
case WORKING_PART_0:
working_part = WORKING_PART_1;
vc_p0 = vc;
fex_p0 = _fex;
goto simd_working;
case WORKING_PART_1:
working_part = WORKING_PART_2;
vc_p1 = vc;
fex_p1 = _fex;
goto simd_working;
case WORKING_PART_2:
working_part = WORKING_PART_3;
vc_p2 = vc;
fex_p2 = _fex;
goto simd_working;
case WORKING_PART_3:
vc_p3 = vc;
fex_p3 = _fex;
goto done;
}
done:
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
/* manage fpcr_p0 */
if (fex_p0) {
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p0 = fpcr;
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
}
if (fex_p1) {
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p1 = fpcr;
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
}
if (fex_p2) {
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p2 = fpcr;
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
}
if (fex_p3) {
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p3 = fpcr;
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
}
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
wrfpcr(fpcr);
DEBUG_INFO("Before write fp: vc_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
write_fp_reg_d(fc, vc_p0, vc_p1, vc_p2, vc_p3);
/* Do we generate a signal? */
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
si_code = 0;
if (_fex) {
if (_fex & IEEE_TRAP_ENABLE_DNO)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_INE)
si_code = FPE_FLTRES;
if (_fex & IEEE_TRAP_ENABLE_UNF)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_OVF)
si_code = FPE_FLTOVF;
if (_fex & IEEE_TRAP_ENABLE_DZE)
si_code = FPE_FLTDIV;
if (_fex & IEEE_TRAP_ENABLE_INV)
si_code = FPE_FLTINV;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return si_code;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return 0;
bad_insn:
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
return -1;
}
long simd_fp_emul_d(unsigned long pc)
{
FP_DECL_EX;
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); FP_DECL_D(DC);
unsigned long fa, fb, fc, func, mode, src;
unsigned long res, va, vb, vc, swcr, fpcr;
__u32 insn;
long si_code;
unsigned long va_p0, va_p1, va_p2, va_p3;
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
int working_part;
get_user(insn, (__u32 *)pc);
fc = (insn >> 0) & 0x1f; /* destination register */
fb = (insn >> 16) & 0x1f;
fa = (insn >> 21) & 0x1f;
func = (insn >> 5) & 0xff;
fpcr = rdfpcr();
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
DEBUG_INFO("======== Entering SIMD D-floating math emulation =======\n");
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
DEBUG_INFO("software swcr = %#lx\n", swcr);
DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
read_fp_reg_d(fa, &va_p0, &va_p1, &va_p2, &va_p3);
read_fp_reg_d(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
read_fp_reg_d(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
working_part = WORKING_PART_0;
simd_working:
_fex = 0;
switch (working_part) {
case WORKING_PART_0:
DEBUG_INFO("WORKING_PART_0\n");
va = va_p0;
vb = vb_p0;
vc = vc_p0;
if ((fpcr & FPCR_STATUS_MASK0) == 0) {
SW64_FP_NORMAL_D(DA, &va);
SW64_FP_NORMAL_D(DB, &vb);
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("LOW: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
} else {
SW64_FP_NAN_D(DA, &va);
SW64_FP_NAN_D(DB, &vb);
if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
goto next_working_s;
}
break;
case WORKING_PART_1:
DEBUG_INFO("WORKING_PART_1\n");
va = va_p1;
vb = vb_p1;
vc = vc_p1;
if ((fpcr & FPCR_STATUS_MASK1) == 0) {
SW64_FP_NORMAL_D(DA, &va);
SW64_FP_NORMAL_D(DB, &vb);
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
} else {
SW64_FP_NAN_D(DA, &va);
SW64_FP_NAN_D(DB, &vb);
if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
goto next_working_s;
}
break;
case WORKING_PART_2:
DEBUG_INFO("WORKING_PART_2\n");
va = va_p2;
vb = vb_p2;
vc = vc_p2;
if ((fpcr & FPCR_STATUS_MASK2) == 0) {
SW64_FP_NORMAL_D(DA, &va);
SW64_FP_NORMAL_D(DB, &vb);
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
} else {
SW64_FP_NAN_D(DA, &va);
SW64_FP_NAN_D(DB, &vb);
if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
goto next_working_s;
}
break;
case WORKING_PART_3:
DEBUG_INFO("WORKING_PART_3\n");
va = va_p3;
vb = vb_p3;
vc = vc_p3;
if ((fpcr & FPCR_STATUS_MASK3) == 0) {
SW64_FP_NORMAL_D(DA, &va);
SW64_FP_NORMAL_D(DB, &vb);
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
} else {
SW64_FP_NAN_D(DA, &va);
SW64_FP_NAN_D(DB, &vb);
if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
goto next_working_s;
}
break;
}
FP_UNPACK_DP(DA, &va);
FP_UNPACK_DP(DB, &vb);
switch (func) {
case FNC_VSUBD:
DEBUG_INFO("FNC_VSUBD\n");
FP_SUB_D(DR, DA, DB);
goto pack_d;
case FNC_VMULD:
DEBUG_INFO("FNC_VMULD\n");
FP_MUL_D(DR, DA, DB);
goto pack_d;
case FNC_VADDD:
DEBUG_INFO("FNC_VADDD\n");
FP_ADD_D(DR, DA, DB);
goto pack_d;
case FNC_VDIVD:
DEBUG_INFO("FNC_VDIVD\n");
FP_DIV_D(DR, DA, DB);
goto pack_d;
case FNC_VSQRTD:
DEBUG_INFO("FNC_VSQRTD\n");
FP_SQRT_D(DR, DB);
goto pack_d;
}
pack_d:
FP_PACK_DP(&vc, DR);
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) {
DEBUG_INFO("pack_d, vc=0 !!!!\n");
vc = 0;
}
DEBUG_INFO("SW64 SIMD Emulation D-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
DEBUG_INFO("SW64 SIMD Emulation D-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
next_working_s:
switch (working_part) {
case WORKING_PART_0:
working_part = WORKING_PART_1;
vc_p0 = vc;
fex_p0 = _fex;
goto simd_working;
case WORKING_PART_1:
working_part = WORKING_PART_2;
vc_p1 = vc;
fex_p1 = _fex;
goto simd_working;
case WORKING_PART_2:
working_part = WORKING_PART_3;
vc_p2 = vc;
fex_p2 = _fex;
goto simd_working;
case WORKING_PART_3:
vc_p3 = vc;
fex_p3 = _fex;
goto done;
}
done:
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
/* manage fpcr_p0 */
if (fex_p0) {
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p0 = fpcr;
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
}
if (fex_p1) {
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p1 = fpcr;
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
}
if (fex_p2) {
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p2 = fpcr;
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
}
if (fex_p3) {
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p3 = fpcr;
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
}
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
wrfpcr(fpcr);
DEBUG_INFO("Before write fp: vp_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
write_fp_reg_d(fc, vc_p0, vc_p1, vc_p2, vc_p3);
/* Do we generate a signal? */
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
si_code = 0;
if (_fex) {
if (_fex & IEEE_TRAP_ENABLE_DNO)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_INE)
si_code = FPE_FLTRES;
if (_fex & IEEE_TRAP_ENABLE_UNF)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_OVF)
si_code = FPE_FLTOVF;
if (_fex & IEEE_TRAP_ENABLE_DZE)
si_code = FPE_FLTDIV;
if (_fex & IEEE_TRAP_ENABLE_INV)
si_code = FPE_FLTINV;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return si_code;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return 0;
bad_insn:
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
return -1;
}
long simd_fp_emul_s(unsigned long pc)
{
FP_DECL_EX;
FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
unsigned long fa, fb, fc, func, mode, src;
unsigned long res, va, vb, vc, swcr, fpcr;
__u32 insn;
long si_code;
unsigned long va_p0, va_p1, va_p2, va_p3;
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
int working_part;
get_user(insn, (__u32 *)pc);
fc = (insn >> 0) & 0x1f; /* destination register */
fb = (insn >> 16) & 0x1f;
fa = (insn >> 21) & 0x1f;
func = (insn >> 5) & 0xff;
fpcr = rdfpcr();
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
DEBUG_INFO("======== Entering SIMD S-floating math emulation =======\n");
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
DEBUG_INFO("software swcr = %#lx\n", swcr);
DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
read_fp_reg_s(fa, &va_p0, &va_p1, &va_p2, &va_p3);
read_fp_reg_s(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
read_fp_reg_s(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
working_part = WORKING_PART_0;
simd_working:
_fex = 0;
switch (working_part) {
case WORKING_PART_0:
DEBUG_INFO("WORKING_PART_0\n");
va = va_p0;
vb = vb_p0;
vc = vc_p0;
if ((fpcr & FPCR_STATUS_MASK0) == 0) {
SW64_FP_NORMAL_S(SA, &va);
SW64_FP_NORMAL_S(SB, &vb);
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("PART0: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
} else {
SW64_FP_NAN_S(SA, &va);
SW64_FP_NAN_S(SB, &vb);
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
goto next_working_s;
}
break;
case WORKING_PART_1:
DEBUG_INFO("WORKING_PART_1\n");
va = va_p1;
vb = vb_p1;
vc = vc_p1;
if ((fpcr & FPCR_STATUS_MASK1) == 0) {
SW64_FP_NORMAL_S(SA, &va);
SW64_FP_NORMAL_S(SB, &vb);
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("PART1: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
} else {
SW64_FP_NAN_S(SA, &va);
SW64_FP_NAN_S(SB, &vb);
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
goto next_working_s;
}
break;
case WORKING_PART_2:
DEBUG_INFO("WORKING_PART_2\n");
va = va_p2;
vb = vb_p2;
vc = vc_p2;
if ((fpcr & FPCR_STATUS_MASK2) == 0) {
SW64_FP_NORMAL_S(SA, &va);
SW64_FP_NORMAL_S(SB, &vb);
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("PART2: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
} else {
SW64_FP_NAN_S(SA, &va);
SW64_FP_NAN_S(SB, &vb);
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
goto next_working_s;
}
break;
case WORKING_PART_3:
DEBUG_INFO("WORKING_PART_3\n");
va = va_p3;
vb = vb_p3;
vc = vc_p3;
if ((fpcr & FPCR_STATUS_MASK3) == 0) {
SW64_FP_NORMAL_S(SA, &va);
SW64_FP_NORMAL_S(SB, &vb);
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("PART3: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
} else {
SW64_FP_NAN_S(SA, &va);
SW64_FP_NAN_S(SB, &vb);
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
goto next_working_s;
}
break;
}
FP_UNPACK_SP(SA, &va);
FP_UNPACK_SP(SB, &vb);
switch (func) {
case FNC_VSUBS:
DEBUG_INFO("FNC_VSUBS\n");
FP_SUB_S(SR, SA, SB);
goto pack_s;
case FNC_VMULS:
DEBUG_INFO("FNC_VMULS\n");
FP_MUL_S(SR, SA, SB);
goto pack_s;
case FNC_VADDS:
DEBUG_INFO("FNC_VADDS\n");
FP_ADD_S(SR, SA, SB);
goto pack_s;
case FNC_VDIVS:
DEBUG_INFO("FNC_VDIVS\n");
FP_DIV_S(SR, SA, SB);
goto pack_s;
case FNC_VSQRTS:
DEBUG_INFO("FNC_VSQRTS\n");
FP_SQRT_S(SR, SB);
goto pack_s;
}
pack_s:
FP_PACK_SP(&vc, SR);
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) {
DEBUG_INFO("pack_s, vc=0 !!!!\n");
vc = 0;
}
DEBUG_INFO("SW64 SIMD Emulation S-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
DEBUG_INFO("SW64 SIMD Emulation S-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
next_working_s:
switch (working_part) {
case WORKING_PART_0:
working_part = WORKING_PART_1;
vc_p0 = vc;
fex_p0 = _fex;
goto simd_working;
case WORKING_PART_1:
working_part = WORKING_PART_2;
vc_p1 = vc;
fex_p1 = _fex;
goto simd_working;
case WORKING_PART_2:
working_part = WORKING_PART_3;
vc_p2 = vc;
fex_p2 = _fex;
goto simd_working;
case WORKING_PART_3:
vc_p3 = vc;
fex_p3 = _fex;
goto done;
}
done:
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
/* manage fpcr_p0 */
if (fex_p0) {
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p0 = fpcr;
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
DEBUG_INFO("fex_p0: fpcr_p0:%#lx\n", fpcr_p0);
}
if (fex_p1) {
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p1 = fpcr;
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
DEBUG_INFO("fex_p1: fpcr_p1:%#lx\n", fpcr_p1);
}
if (fex_p2) {
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p2 = fpcr;
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
DEBUG_INFO("fex_p2: fpcr_p2:%#lx\n", fpcr_p2);
}
if (fex_p3) {
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p3 = fpcr;
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
DEBUG_INFO("fex_p3: fpcr_p3:%#lx\n", fpcr_p3);
}
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
wrfpcr(fpcr);
DEBUG_INFO("Before write fp: vc_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
write_fp_reg_s(fc, vc_p0, vc_p1, vc_p2, vc_p3);
/* Do we generate a signal? */
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
si_code = 0;
if (_fex) {
if (_fex & IEEE_TRAP_ENABLE_DNO)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_INE)
si_code = FPE_FLTRES;
if (_fex & IEEE_TRAP_ENABLE_UNF)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_OVF)
si_code = FPE_FLTOVF;
if (_fex & IEEE_TRAP_ENABLE_DZE)
si_code = FPE_FLTDIV;
if (_fex & IEEE_TRAP_ENABLE_INV)
si_code = FPE_FLTINV;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return si_code;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return 0;
bad_insn:
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
return -1;
}
static inline unsigned long negative_value(unsigned long va)
{
return (va ^ 0x8000000000000000UL);
}
static inline unsigned long s_negative_value(unsigned long va)
{
return (va ^ 0x80000000UL);
}
/*
* sw64 mul-add floating emulation
*/
long mul_add_fp_emul(unsigned long pc)
{
FP_DECL_EX;
FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(S_TMP); FP_DECL_S(SR);
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(D_TMP); FP_DECL_D(DR);
FP_DECL_S(S_ZERO);
FP_DECL_D(D_ZERO);
FP_DECL_S(S_TMP2);
FP_DECL_D(D_TMP2);
unsigned long fa, fb, fc, fd, func, mode, src;
unsigned long res, va, vb, vc, vd, vtmp, vtmp2, swcr, fpcr;
__u32 insn;
long si_code;
unsigned long vzero = 0;
get_user(insn, (__u32 *)pc);
fd = (insn >> 0) & 0x1f; /* destination register */
fc = (insn >> 5) & 0x1f;
fb = (insn >> 16) & 0x1f;
fa = (insn >> 21) & 0x1f;
func = (insn >> 10) & 0x3f;
fpcr = rdfpcr();
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
DEBUG_INFO("===== Entering SW64 MUL-ADD Emulation =====\n");
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
DEBUG_INFO("software swcr = %#lx\n", swcr);
if (func == FNC_FMAS || func == FNC_FMSS || func == FNC_FNMAS || func == FNC_FNMSS) {
va = sw64_read_fp_reg_s(fa);
vb = sw64_read_fp_reg_s(fb);
vc = sw64_read_fp_reg_s(fc);
FP_UNPACK_SP(SA, &va);
FP_UNPACK_SP(SB, &vb);
FP_UNPACK_SP(SC, &vc);
FP_UNPACK_SP(S_ZERO, &vzero);
}
if (func == FNC_FMAD || func == FNC_FMSD || func == FNC_FNMAD || func == FNC_FNMSD) {
va = sw64_read_fp_reg(fa);
vb = sw64_read_fp_reg(fb);
vc = sw64_read_fp_reg(fc);
FP_UNPACK_DP(DA, &va);
FP_UNPACK_DP(DB, &vb);
FP_UNPACK_DP(DC, &vc);
FP_UNPACK_DP(D_ZERO, &vzero);
}
DEBUG_INFO("va = %#lx, vb = %#lx, vc = %#lx\n", va, vb, vc);
switch (func) {
case FNC_FMAS:
FP_MUL_S(S_TMP, SA, SB);
FP_ADD_S(SR, S_TMP, SC);
goto pack_s;
case FNC_FMSS:
FP_MUL_S(S_TMP, SA, SB);
FP_SUB_S(SR, S_TMP, SC);
goto pack_s;
case FNC_FNMAS: /* (-va*vb) + vc */
va = s_negative_value(va);
FP_UNPACK_SP(SA, &va);
FP_MUL_S(S_TMP, SA, SB);
FP_ADD_S(SR, S_TMP, SC);
goto pack_s;
case FNC_FNMSS: /* (-va*vb) - vc */
va = s_negative_value(va);
FP_UNPACK_SP(SA, &va);
FP_MUL_S(S_TMP, SA, SB);
FP_SUB_S(SR, S_TMP, SC);
goto pack_s;
case FNC_FMAD:
FP_MUL_D(D_TMP, DA, DB);
FP_ADD_D(DR, D_TMP, DC);
goto pack_d;
case FNC_FMSD:
FP_MUL_D(D_TMP, DA, DB);
FP_SUB_D(DR, D_TMP, DC);
goto pack_d;
case FNC_FNMAD:
va = negative_value(va);
FP_UNPACK_DP(DA, &va);
FP_MUL_D(D_TMP, DA, DB);
FP_ADD_D(DR, D_TMP, DC);
goto pack_d;
case FNC_FNMSD:
va = negative_value(va);
FP_UNPACK_DP(DA, &va);
FP_MUL_D(D_TMP, DA, DB);
FP_SUB_D(DR, D_TMP, DC);
goto pack_d;
default:
goto bad_insn;
}
pack_s:
FP_PACK_SP(&vd, SR);
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
vd = 0;
sw64_write_fp_reg_s(fd, vd);
goto done;
pack_d:
FP_PACK_DP(&vd, DR);
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
vd = 0;
sw64_write_fp_reg(fd, vd);
done:
DEBUG_INFO("vd = %#lx\n", vd);
if (_fex) {
/* Record exceptions in software control word. */
swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr |= ieee_swcr_to_fpcr(swcr);
wrfpcr(fpcr); /** wrfpcr will destroy vector register! */
if (func == FNC_FMAS || func == FNC_FMSS || func == FNC_FNMAS || func == FNC_FNMSS)
sw64_write_fp_reg_s(fd, vd);
if (func == FNC_FMAD || func == FNC_FMSD || func == FNC_FNMAD || func == FNC_FNMSD)
sw64_write_fp_reg(fd, vd);
/* Do we generate a signal? */
_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
si_code = 0;
if (_fex) {
if (_fex & IEEE_TRAP_ENABLE_DNO)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_INE)
si_code = FPE_FLTRES;
if (_fex & IEEE_TRAP_ENABLE_UNF)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_OVF)
si_code = FPE_FLTOVF;
if (_fex & IEEE_TRAP_ENABLE_DZE)
si_code = FPE_FLTDIV;
if (_fex & IEEE_TRAP_ENABLE_INV)
si_code = FPE_FLTINV;
}
return si_code;
}
/*
* We used to write the destination register here, but DEC FORTRAN
* requires that the result *always* be written... so we do the write
* immediately after the operations above.
*/
return 0;
bad_insn:
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
return -1;
}
long simd_mul_add_fp_emul_s(unsigned long pc)
{
FP_DECL_EX;
FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(S_TMP); FP_DECL_S(SR);
FP_DECL_S(S_ZERO);
FP_DECL_S(S_TMP2);
unsigned long fa, fb, fc, fd, func, mode, src;
unsigned long res, va, vb, vc, vd, vtmp, vtmp2, swcr, fpcr;
__u32 insn;
long si_code;
unsigned long vzero = 0;
get_user(insn, (__u32 *)pc);
fd = (insn >> 0) & 0x1f; /* destination register */
fc = (insn >> 5) & 0x1f;
fb = (insn >> 16) & 0x1f;
fa = (insn >> 21) & 0x1f;
func = (insn >> 10) & 0x3f;
fpcr = rdfpcr();
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
unsigned long va_p0, va_p1, va_p2, va_p3;
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
unsigned long vd_p0, vd_p1, vd_p2, vd_p3;
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
int working_part;
DEBUG_INFO("======== Entering SIMD S-floating mul-add emulation =======\n");
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
DEBUG_INFO("software swcr = %#lx\n", swcr);
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
read_fp_reg_s(fa, &va_p0, &va_p1, &va_p2, &va_p3);
read_fp_reg_s(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
read_fp_reg_s(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
read_fp_reg_s(fd, &vd_p0, &vd_p1, &vd_p2, &vd_p3);
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
DEBUG_INFO("vd_p0:%#lx, vd_p1:%#lx, vd_p2:%#lx, vd_p3:%#lx\n", vd_p0, vd_p1, vd_p2, vd_p3);
working_part = WORKING_PART_0;
simd_working:
_fex = 0;
switch (working_part) {
case WORKING_PART_0:
DEBUG_INFO("WORKING_PART_0\n");
va = va_p0;
vb = vb_p0;
vc = vc_p0;
DEBUG_INFO("FPCR_STATUS_MASK0 : %#lx, fpcr :%#lx\n", FPCR_STATUS_MASK0, fpcr);
if ((fpcr & FPCR_STATUS_MASK0) == 0) {
SW64_FP_NORMAL_S(SA, &va);
SW64_FP_NORMAL_S(SB, &vb);
SW64_FP_NORMAL_S(SC, &vc);
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("LOW: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
} else {
SW64_FP_NAN_S(SA, &va);
SW64_FP_NAN_S(SB, &vb);
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
goto next_working_s;
}
break;
case WORKING_PART_1:
DEBUG_INFO("WORKING_PART_1\n");
va = va_p1;
vb = vb_p1;
vc = vc_p1;
DEBUG_INFO("FPCR_STATUS_MASK1 : %#lx, fpcr :%#lx\n", FPCR_STATUS_MASK0, fpcr);
if ((fpcr & FPCR_STATUS_MASK1) == 0) {
SW64_FP_NORMAL_S(SA, &va);
SW64_FP_NORMAL_S(SB, &vb);
SW64_FP_NORMAL_S(SC, &vc);
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("HIGH: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
} else {
SW64_FP_NAN_S(SA, &va);
SW64_FP_NAN_S(SB, &vb);
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
goto next_working_s;
}
break;
case WORKING_PART_2:
DEBUG_INFO("WORKING_PART_2\n");
va = va_p2;
vb = vb_p2;
vc = vc_p2;
if ((fpcr & FPCR_STATUS_MASK2) == 0) {
SW64_FP_NORMAL_S(SA, &va);
SW64_FP_NORMAL_S(SB, &vb);
SW64_FP_NORMAL_S(SC, &vc);
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("HIGH: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
} else {
SW64_FP_NAN_S(SA, &va);
SW64_FP_NAN_S(SB, &vb);
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
goto next_working_s;
}
break;
case WORKING_PART_3:
DEBUG_INFO("WORKING_PART_3\n");
va = va_p3;
vb = vb_p3;
vc = vc_p3;
if ((fpcr & FPCR_STATUS_MASK3) == 0) {
SW64_FP_NORMAL_S(SA, &va);
SW64_FP_NORMAL_S(SB, &vb);
SW64_FP_NORMAL_S(SC, &vc);
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("HIGH: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
} else {
SW64_FP_NAN_S(SA, &va);
SW64_FP_NAN_S(SB, &vb);
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
goto next_working_s;
}
break;
}
FP_UNPACK_SP(SA, &va);
FP_UNPACK_SP(SB, &vb);
FP_UNPACK_SP(SC, &vc);
FP_UNPACK_SP(S_ZERO, &vzero);
switch (func) {
case FNC_FMAS:
FP_MUL_S(S_TMP, SA, SB);
FP_ADD_S(SR, S_TMP, SC);
goto pack_s;
case FNC_FMSS:
FP_MUL_S(S_TMP, SA, SB);
FP_SUB_S(SR, S_TMP, SC);
goto pack_s;
case FNC_FNMAS: /* (-va*vb) + vc */
va = s_negative_value(va);
FP_UNPACK_SP(SA, &va);
FP_MUL_S(S_TMP, SA, SB);
FP_ADD_S(SR, S_TMP, SC);
goto pack_s;
case FNC_FNMSS: /* (-va*vb) - vc */
va = s_negative_value(va);
FP_UNPACK_SP(SA, &va);
FP_MUL_S(S_TMP, SA, SB);
FP_SUB_S(SR, S_TMP, SC);
goto pack_s;
default:
goto bad_insn;
}
pack_s:
FP_PACK_SP(&vd, SR);
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
vd = 0;
DEBUG_INFO("SW64 SIMD Emulation S-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
DEBUG_INFO("SW64 SIMD Emulation S-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
next_working_s:
switch (working_part) {
case WORKING_PART_0:
working_part = WORKING_PART_1;
vd_p0 = vd;
fex_p0 = _fex;
goto simd_working;
case WORKING_PART_1:
working_part = WORKING_PART_2;
vd_p1 = vd;
fex_p1 = _fex;
goto simd_working;
case WORKING_PART_2:
working_part = WORKING_PART_3;
vd_p2 = vd;
fex_p2 = _fex;
goto simd_working;
case WORKING_PART_3:
vd_p3 = vd;
fex_p3 = _fex;
goto done;
}
done:
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
/* manage fpcr_p0 */
if (fex_p0) {
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p0 = fpcr;
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
}
if (fex_p1) {
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p1 = fpcr;
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
}
if (fex_p2) {
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p2 = fpcr;
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
}
if (fex_p3) {
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p3 = fpcr;
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
}
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
wrfpcr(fpcr);
DEBUG_INFO("Before write fp: vp_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
write_fp_reg_s(fd, vd_p0, vd_p1, vd_p2, vd_p3); /* write to fd */
/* Do we generate a signal? */
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
si_code = 0;
if (_fex) {
if (_fex & IEEE_TRAP_ENABLE_DNO)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_INE)
si_code = FPE_FLTRES;
if (_fex & IEEE_TRAP_ENABLE_UNF)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_OVF)
si_code = FPE_FLTOVF;
if (_fex & IEEE_TRAP_ENABLE_DZE)
si_code = FPE_FLTDIV;
if (_fex & IEEE_TRAP_ENABLE_INV)
si_code = FPE_FLTINV;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return si_code;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return 0;
bad_insn:
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
return -1;
}
long simd_mul_add_fp_emul_d(unsigned long pc)
{
FP_DECL_EX;
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(D_TMP); FP_DECL_D(DR);
FP_DECL_D(D_ZERO);
FP_DECL_D(D_TMP2);
unsigned long fa, fb, fc, fd, func, mode, src;
unsigned long res, va, vb, vc, vd, vtmp, vtmp2, swcr, fpcr;
__u32 insn;
long si_code;
unsigned long vzero = 0;
get_user(insn, (__u32 *)pc);
fd = (insn >> 0) & 0x1f; /* destination register */
fc = (insn >> 5) & 0x1f;
fb = (insn >> 16) & 0x1f;
fa = (insn >> 21) & 0x1f;
func = (insn >> 10) & 0x3f;
fpcr = rdfpcr();
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
unsigned long va_p0, va_p1, va_p2, va_p3;
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
unsigned long vd_p0, vd_p1, vd_p2, vd_p3;
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
int working_part;
DEBUG_INFO("======== Entering SIMD D-floating mul-add emulation =======\n");
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
DEBUG_INFO("software swcr = %#lx\n", swcr);
read_fp_reg_d(fa, &va_p0, &va_p1, &va_p2, &va_p3);
read_fp_reg_d(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
read_fp_reg_d(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
read_fp_reg_d(fd, &vd_p0, &vd_p1, &vd_p2, &vd_p3);
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
DEBUG_INFO("vd_p0:%#lx, vd_p1:%#lx, vd_p2:%#lx, vd_p3:%#lx\n", vd_p0, vd_p1, vd_p2, vd_p3);
working_part = WORKING_PART_0;
simd_working:
_fex = 0;
switch (working_part) {
case WORKING_PART_0:
DEBUG_INFO("WORKING_PART_0\n");
va = va_p0;
vb = vb_p0;
vc = vc_p0;
vd = vd_p0;
if ((fpcr & FPCR_STATUS_MASK0) == 0) {
SW64_FP_NORMAL_D(DA, &va);
SW64_FP_NORMAL_D(DB, &vb);
SW64_FP_NORMAL_D(DC, &vc);
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("LOW: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
} else {
SW64_FP_NAN_D(DA, &va);
SW64_FP_NAN_D(DB, &vb);
SW64_FP_NAN_D(DC, &vc);
if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
goto next_working_s;
}
break;
case WORKING_PART_1:
DEBUG_INFO("WORKING_PART_1\n");
va = va_p1;
vb = vb_p1;
vc = vc_p1;
vd = vd_p1;
if ((fpcr & FPCR_STATUS_MASK1) == 0) {
SW64_FP_NORMAL_D(DA, &va);
SW64_FP_NORMAL_D(DB, &vb);
SW64_FP_NORMAL_D(DC, &vc);
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
} else {
SW64_FP_NAN_D(DA, &va);
SW64_FP_NAN_D(DB, &vb);
SW64_FP_NAN_D(DC, &vc);
if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
goto next_working_s;
}
break;
case WORKING_PART_2:
DEBUG_INFO("WORKING_PART_2\n");
va = va_p2;
vb = vb_p2;
vc = vc_p2;
vd = vd_p2;
if ((fpcr & FPCR_STATUS_MASK2) == 0) {
SW64_FP_NORMAL_D(DA, &va);
SW64_FP_NORMAL_D(DB, &vb);
SW64_FP_NORMAL_D(DC, &vc);
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
} else {
SW64_FP_NAN_D(DA, &va);
SW64_FP_NAN_D(DB, &vb);
SW64_FP_NAN_D(DC, &vc);
if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
goto next_working_s;
}
break;
case WORKING_PART_3:
DEBUG_INFO("WORKING_PART_3\n");
va = va_p3;
vb = vb_p3;
vc = vc_p3;
vd = vd_p3;
if ((fpcr & FPCR_STATUS_MASK3) == 0) {
SW64_FP_NORMAL_D(DA, &va);
SW64_FP_NORMAL_D(DB, &vb);
SW64_FP_NORMAL_D(DC, &vc);
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
goto next_working_s;
else
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
} else {
SW64_FP_NAN_D(DA, &va);
SW64_FP_NAN_D(DB, &vb);
SW64_FP_NAN_D(DC, &vc);
if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
goto next_working_s;
}
break;
}
FP_UNPACK_DP(DA, &va);
FP_UNPACK_DP(DB, &vb);
FP_UNPACK_DP(DC, &vc);
FP_UNPACK_DP(D_ZERO, &vzero);
switch (func) {
case FNC_FMAD:
FP_MUL_D(D_TMP, DA, DB);
FP_ADD_D(DR, D_TMP, DC);
goto pack_d;
case FNC_FMSD:
FP_MUL_D(D_TMP, DA, DB);
FP_SUB_D(DR, D_TMP, DC);
goto pack_d;
case FNC_FNMAD:
va = negative_value(va);
FP_UNPACK_DP(DA, &va);
FP_MUL_D(D_TMP, DA, DB);
FP_ADD_D(DR, D_TMP, DC);
goto pack_d;
case FNC_FNMSD:
va = negative_value(va);
FP_UNPACK_DP(DA, &va);
FP_MUL_D(D_TMP, DA, DB);
FP_SUB_D(DR, D_TMP, DC);
goto pack_d;
default:
goto bad_insn;
}
pack_d:
FP_PACK_DP(&vd, DR);
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
vd = 0;
DEBUG_INFO("SW64 SIMD Emulation D-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
DEBUG_INFO("SW64 SIMD Emulation D-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
next_working_s:
switch (working_part) {
case WORKING_PART_0:
working_part = WORKING_PART_1;
vd_p0 = vd;
fex_p0 = _fex;
goto simd_working;
case WORKING_PART_1:
working_part = WORKING_PART_2;
vd_p1 = vd;
fex_p1 = _fex;
goto simd_working;
case WORKING_PART_2:
working_part = WORKING_PART_3;
vd_p2 = vd;
fex_p2 = _fex;
goto simd_working;
case WORKING_PART_3:
vd_p3 = vd;
fex_p3 = _fex;
goto done;
}
done:
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
/* manage fpcr_p0 */
if (fex_p0) {
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p0 = fpcr;
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
}
if (fex_p1) {
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p1 = fpcr;
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
}
if (fex_p2) {
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p2 = fpcr;
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
}
if (fex_p3) {
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
current_thread_info()->ieee_state
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
/* Update hardware control register. */
fpcr_p3 = fpcr;
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
}
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
wrfpcr(fpcr);
DEBUG_INFO("Before write fp: vp_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
write_fp_reg_d(fd, vd_p0, vd_p1, vd_p2, vd_p3); /* write to fd */
/* Do we generate a signal? */
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
si_code = 0;
if (_fex) {
if (_fex & IEEE_TRAP_ENABLE_DNO)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_INE)
si_code = FPE_FLTRES;
if (_fex & IEEE_TRAP_ENABLE_UNF)
si_code = FPE_FLTUND;
if (_fex & IEEE_TRAP_ENABLE_OVF)
si_code = FPE_FLTOVF;
if (_fex & IEEE_TRAP_ENABLE_DZE)
si_code = FPE_FLTDIV;
if (_fex & IEEE_TRAP_ENABLE_INV)
si_code = FPE_FLTINV;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return si_code;
}
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
return 0;
bad_insn:
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
return -1;
}
void read_fp_reg_s(unsigned long reg, unsigned long *val_p0,
unsigned long *val_p1, unsigned long *val_p2, unsigned long *val_p3)
{
unsigned long fp[2];
sw64_read_simd_fp_m_s(reg, fp);
*val_p0 = fp[0] & 0xffffffffUL;
*val_p1 = (fp[0] >> 32) & 0xffffffffUL;
*val_p2 = fp[1] & 0xffffffffUL;
*val_p3 = (fp[1] >> 32) & 0xffffffffUL;
}
void read_fp_reg_d(unsigned long reg, unsigned long *val_p0,
unsigned long *val_p1, unsigned long *val_p2, unsigned long *val_p3)
{
unsigned long fp[4];
sw64_read_simd_fp_m_d(reg, fp);
*val_p0 = fp[0];
*val_p1 = fp[1];
*val_p2 = fp[2];
*val_p3 = fp[3];
}
void write_fp_reg_s(unsigned long reg, unsigned long val_p0,
unsigned long val_p1, unsigned long val_p2, unsigned long val_p3)
{
unsigned long fp[2];
fp[0] = ((val_p1 & 0xffffffffUL) << 32) | (val_p0 & 0xffffffffUL);
fp[1] = ((val_p3 & 0xffffffffUL) << 32) | (val_p2 & 0xffffffffUL);
sw64_write_simd_fp_reg_s(reg, fp[0], fp[1]);
}
void write_fp_reg_d(unsigned long reg, unsigned long val_p0,
unsigned long val_p1, unsigned long val_p2, unsigned long val_p3)
{
sw64_write_simd_fp_reg_d(reg, val_p0, val_p1, val_p2, val_p3);
}