Denis Drakhnya
52e67df3a1
Till then we had separate stage to write results of an operations to registers. Now operations will write result immediately to a destination register. Registers will be saved to a temporary location if following operations in a bundle must read them. The change elliminates additional branches for conditional operations in the write stage and reduces data movements. Signed-off-by: Denis Drakhnya <numas13@gmail.com>
609 lines
21 KiB
C
609 lines
21 KiB
C
#include "qemu/osdep.h"
|
|
#include "qemu/log.h"
|
|
#include "cpu.h"
|
|
#include "helper-tcg.h"
|
|
#include "exec/exec-all.h"
|
|
#include "qemu/host-utils.h"
|
|
#include "exec/helper-proto.h"
|
|
|
|
static FloatRoundMode rm[] = {
|
|
float_round_nearest_even,
|
|
float_round_down,
|
|
float_round_up,
|
|
float_round_to_zero,
|
|
};
|
|
|
|
static inline int fpu_flags(int flags)
|
|
{
|
|
return (flags & float_flag_invalid ? FP_IE : 0) |
|
|
(flags & float_flag_divbyzero ? FP_ZE : 0) |
|
|
(flags & float_flag_overflow ? FP_OE : 0) |
|
|
(flags & float_flag_underflow ? FP_UE : 0) |
|
|
(flags & float_flag_inexact ? FP_PE : 0) |
|
|
(flags & float_flag_input_denormal ? FP_DE : 0);
|
|
}
|
|
|
|
static inline void fx_merge_exception_flags(CPUE2KState *env, int old_flags)
|
|
{
|
|
int new_flags = get_float_exception_flags(&env->fx_status);
|
|
float_raise(old_flags, &env->fx_status);
|
|
env->fpsr.ef |= fpu_flags(new_flags);
|
|
if (env->fpsr.ef & (~env->fpcr.em & FP_EM)) {
|
|
env->fpsr.es = 1;
|
|
env->fpsr.b = 1;
|
|
}
|
|
}
|
|
|
|
static inline void fp_merge_exception_flags(CPUE2KState *env, int old_flags)
|
|
{
|
|
int new_flags = get_float_exception_flags(&env->fp_status);
|
|
float_raise(old_flags, &env->fp_status);
|
|
env->pfpfr.ef |= fpu_flags(new_flags);
|
|
}
|
|
|
|
static inline int save_exception_flags(float_status *s)
|
|
{
|
|
int old_flags = get_float_exception_flags(s);
|
|
set_float_exception_flags(0, s);
|
|
return old_flags;
|
|
}
|
|
|
|
#define fx_save_exception_flags(e) save_exception_flags(&(e)->fx_status)
|
|
#define fp_save_exception_flags(e) save_exception_flags(&(e)->fp_status)
|
|
|
|
void e2k_update_fx_status(CPUE2KState *env)
|
|
{
|
|
int x;
|
|
|
|
set_float_rounding_mode(rm[env->fpcr.rc], &env->fx_status);
|
|
|
|
switch(env->fpcr.pc) {
|
|
case FPCR_PC_XP: x = floatx80_precision_x; break;
|
|
case FPCR_PC_DP: x = floatx80_precision_d; break;
|
|
case FPCR_PC_SP: x = floatx80_precision_s; break;
|
|
case FPCR_PC_RESERVED:
|
|
default:
|
|
qemu_log_mask(LOG_UNIMP, "unknown precision mode 0x%x\n", env->fpcr.pc);
|
|
abort();
|
|
}
|
|
|
|
set_floatx80_rounding_precision(x, &env->fx_status);
|
|
}
|
|
|
|
void e2k_update_fp_status(CPUE2KState *env)
|
|
{
|
|
set_float_rounding_mode(rm[env->pfpfr.rc], &env->fp_status);
|
|
}
|
|
|
|
#define TOIF_RC_CURRENT 0x4
|
|
#define TOIF_RC_IGNORE_INEXACT 0x8
|
|
|
|
static inline void toif_set_round_mode(uint32_t flags, float_status *s)
|
|
{
|
|
if ((flags & TOIF_RC_CURRENT) == 0) {
|
|
set_float_rounding_mode(rm[flags & FP_RC_CHOP], s);
|
|
}
|
|
}
|
|
|
|
static inline void toif_clear_inexact(uint32_t flags, float_status *s)
|
|
{
|
|
if(flags & TOIF_RC_IGNORE_INEXACT) {
|
|
int new_flags = get_float_exception_flags(s);
|
|
new_flags = new_flags & (~float_flag_inexact);
|
|
set_float_exception_flags(new_flags, s);
|
|
}
|
|
}
|
|
|
|
#ifdef TARGET_E2K_PRECISE_FSQRTID
|
|
uint64 HELPER(fsqrtid)(CPUE2KState *env, uint64_t x)
|
|
{
|
|
#error implement
|
|
return x;
|
|
}
|
|
#endif
|
|
|
|
#define IMPL_FSCALE(name, ty, exp_len, exp_off, mul, cvt) \
|
|
ty HELPER(name)(CPUE2KState *env, ty src1, uint32_t src2) \
|
|
{ \
|
|
int32_t p = (int32_t) src2; \
|
|
ty max = (1 << exp_len) - 1; \
|
|
int32_t bias = max >> 1; \
|
|
ty exp; \
|
|
ty m = 0; \
|
|
if (p <= -bias) { \
|
|
exp = 0; \
|
|
p += bias + exp_off - 1; \
|
|
if (p >= 0 && p < exp_off) { \
|
|
m = 1ULL << p; \
|
|
} else { \
|
|
m = p == -1; \
|
|
} \
|
|
} else if (p > bias) { \
|
|
exp = max; \
|
|
} else { \
|
|
exp = bias + p; \
|
|
} \
|
|
ty s2 = (exp << exp_off) | m; \
|
|
return mul(env, src1, s2); \
|
|
}
|
|
|
|
IMPL_FSCALE(fscaled, uint64_t, 11, 52, helper_fmuld, uint64_to_float64)
|
|
IMPL_FSCALE(fscales, uint32_t, 8, 23, helper_fmuls, uint32_to_float32)
|
|
|
|
void HELPER(fxscalesx)(floatx80 *r, CPUE2KState *env, floatx80 *a, uint32_t b)
|
|
{
|
|
floatx80 v;
|
|
int32_t p = (int32_t) b;
|
|
uint16_t max = (1 << 15) - 1;
|
|
int16_t bias = max >> 1;
|
|
if (p <= -bias) {
|
|
v.high = 0;
|
|
p += bias + 62;
|
|
if (p >= 0 && p < 63) {
|
|
v.low = 1ULL << p;
|
|
} else {
|
|
v.low = p == -1;
|
|
}
|
|
} else if (p > bias) {
|
|
v.low = 1UL << 63;
|
|
v.high = max;
|
|
} else {
|
|
v.low = 1UL << 63;
|
|
v.high = bias + p;
|
|
}
|
|
helper_fxmulxx(r, env, a, &v);
|
|
}
|
|
|
|
#define type_name_i32 int32
|
|
#define type_name_i64 int64
|
|
#define type_name_f32 float32
|
|
#define type_name_f64 float64
|
|
#define type_name_f80 floatx80
|
|
#define type_name(S) glue(type_name_, S)
|
|
|
|
#define arg_type_i32 uint32_t
|
|
#define arg_type_i64 uint64_t
|
|
#define arg_type_f32 uint32_t
|
|
#define arg_type_f64 uint64_t
|
|
#define arg_type_f80 floatx80 *
|
|
#define arg_type(S) glue(arg_type_, S)
|
|
|
|
#define ret_arg_i32
|
|
#define ret_arg_i64
|
|
#define ret_arg_f32
|
|
#define ret_arg_f64
|
|
#define ret_arg_f80 floatx80 *ret,
|
|
#define ret_arg(S) glue(ret_arg_, S)
|
|
|
|
#define ret_type_i32 uint32_t
|
|
#define ret_type_i64 uint64_t
|
|
#define ret_type_f32 uint32_t
|
|
#define ret_type_f64 uint64_t
|
|
#define ret_type_f80 void
|
|
#define ret_type(S) glue(ret_type_, S)
|
|
|
|
#define make_i32(v) (v)
|
|
#define make_i64(v) (v)
|
|
#define make_f32(v) make_float32(v)
|
|
#define make_f64(v) make_float64(v)
|
|
#define make_f80(v) (*(v))
|
|
#define make(S, v) glue(make_, S)(v)
|
|
|
|
#define float_val_f32(v) float32_val(v)
|
|
#define float_val_f64(v) float64_val(v)
|
|
#define float_val(S, v) glue(float_val_, S)(v)
|
|
|
|
#define type_i32 uint32_t
|
|
#define type_i64 uint64_t
|
|
#define type_f32 float32
|
|
#define type_f64 float64
|
|
#define type_f80 floatx80
|
|
#define type(S) glue(type_, S)
|
|
|
|
#define int32_to_int32(v, s) (v)
|
|
#define int64_to_int64(v, s) (v)
|
|
#define float32_to_float32(v, s) (v)
|
|
#define float64_to_float64(v, s) (v)
|
|
#define floatx80_to_floatx80(v, s) (v)
|
|
#define convert(F, T, v, s) glue3(type_name(F), _to_, type_name(T))(v, s)
|
|
|
|
#define ret_i32(v) return (v)
|
|
#define ret_i64(v) return (v)
|
|
#define ret_f32(v) return float32_val(v)
|
|
#define ret_f64(v) return float64_val(v)
|
|
#define ret_f80(v) *ret = v
|
|
#define ret(S, v) glue(ret_, S)(v)
|
|
|
|
#define fpu_mov(x, s) (x)
|
|
#define ident(x) (x)
|
|
#define not(x) (!(x))
|
|
|
|
#define IMPL_ALOPF2_FPU_BASIC(FPU, name, S2, R, T2, TR, op) \
|
|
ret_type(R) HELPER(name)(ret_arg(R) CPUE2KState *env, arg_type(S2) s2) \
|
|
{ \
|
|
int old_flags = glue(FPU, _save_exception_flags)(env); \
|
|
float_status *s = &env->glue(FPU, _status); \
|
|
type(T2) t2 = convert(S2, T2, make(S2, s2), s); \
|
|
type(TR) tr = op(t2, s); \
|
|
type(R) r = convert(TR, R, tr, s); \
|
|
glue(FPU, _merge_exception_flags)(env, old_flags); \
|
|
ret(R, r); \
|
|
}
|
|
|
|
#define IMPL_ALOPF2_FPU(FPU, name, S2, R, op) \
|
|
IMPL_ALOPF2_FPU_BASIC(FPU, name, S2, R, S2, R, op)
|
|
|
|
#define IMPL_ALOPF2_FP(name, S2, R, op) \
|
|
IMPL_ALOPF2_FPU_BASIC(fp, name, R, S2, R, S2, op)
|
|
|
|
#define IMPL_ALOPF2_FPU_CVT_OP(FPU, name, S2, R, op) \
|
|
ret_type(R) HELPER(name)(ret_arg(R) CPUE2KState *env, arg_type(S2) s2) \
|
|
{ \
|
|
int old_flags = glue(FPU, _save_exception_flags)(env); \
|
|
float_status *s = &env->glue(FPU, _status); \
|
|
type(R) r = op(make(S2, s2), s); \
|
|
glue(FPU, _merge_exception_flags)(env, old_flags); \
|
|
ret(R, r); \
|
|
}
|
|
|
|
#define IMPL_ALOPF2_FPU_CVT(FPU, name, S2, R) \
|
|
ret_type(R) HELPER(name)(ret_arg(R) CPUE2KState *env, arg_type(S2) s2) \
|
|
{ \
|
|
int old_flags = glue(FPU, _save_exception_flags)(env); \
|
|
float_status *s = &env->glue(FPU, _status); \
|
|
type(R) r = convert(S2, R, make(S2, s2), s); \
|
|
glue(FPU, _merge_exception_flags)(env, old_flags); \
|
|
ret(R, r); \
|
|
}
|
|
|
|
#define IMPL_ALOPF1_FPU(FPU, name, S1, S2, R, T1, T2, TR, op) \
|
|
ret_type(R) HELPER(name)(ret_arg(R) CPUE2KState *env, \
|
|
arg_type(S1) s1, arg_type(S2) s2) \
|
|
{ \
|
|
int old_flags = glue(FPU, _save_exception_flags)(env); \
|
|
float_status *s = &env->glue(FPU, _status); \
|
|
type(T1) t1 = convert(S1, T1, make(S1, s1), s); \
|
|
type(T2) t2 = convert(S2, T2, make(S2, s2), s); \
|
|
type(TR) tr = op(t1, t2, s); \
|
|
type(R) r = convert(TR, R, tr, s); \
|
|
glue(FPU, _merge_exception_flags)(env, old_flags); \
|
|
ret(R, r); \
|
|
}
|
|
|
|
#define IMPL_ALOPF1_FX(name, S1, S2, R, op) \
|
|
IMPL_ALOPF1_FPU(fx, name, S1, S2, R, f80, f80, f80, op)
|
|
|
|
#define IMPL_ALOPF1_FP(name, S1, S2, R, op) \
|
|
IMPL_ALOPF1_FPU(fp, name, S1, S2, R, S1, S2, R, op)
|
|
|
|
#define IMPL_ALOPF1_FPU_CMP_BASIC(FPU, name, S1, S2, R, T1, T2, TR, op1, op2) \
|
|
ret_type(R) HELPER(name)(ret_arg(R) CPUE2KState *env, \
|
|
arg_type(S1) s1, arg_type(S2) s2) \
|
|
{ \
|
|
int old_flags = glue(FPU, _save_exception_flags)(env); \
|
|
float_status *s = &env->glue(FPU, _status); \
|
|
type(T1) t1 = convert(S1, T1, make(S1, s1), s); \
|
|
type(T2) t2 = convert(S2, T2, make(S2, s2), s); \
|
|
type(TR) tr = op2(op1(t1, t2, s)) ? -1 : 0; \
|
|
type(R) r = convert(TR, R, tr, s); \
|
|
glue(FPU, _merge_exception_flags)(env, old_flags); \
|
|
ret(R, r); \
|
|
}
|
|
|
|
#define IMPL_ALOPF1_FPU_CMP(FPU, name, S1, S2, R, T, op1, op2) \
|
|
IMPL_ALOPF1_FPU_CMP_BASIC(FPU, name, S1, S2, R, T, T, R, op1, op2)
|
|
|
|
#define IMPL_ALOPF1_FP_S(name, S1, S2, R, op) \
|
|
ret_type(R) HELPER(name)(CPUE2KState *env, arg_type(S1) s1, \
|
|
arg_type(S2) s2) \
|
|
{ \
|
|
int old_flags = fp_save_exception_flags(env); \
|
|
type(R) r = op(R, make(S1, s1), make(S2, s2), &env->fp_status); \
|
|
fp_merge_exception_flags(env, old_flags); \
|
|
return float_val(R, r); \
|
|
}
|
|
|
|
#define IMPL_ALOPF21_FPU(FPU, name, S1, S2, S3, R, T1, T2, T3, TR, op) \
|
|
ret_type(R) HELPER(name)(ret_arg(R) CPUE2KState *env, \
|
|
arg_type(S1) s1, arg_type(S2) s2, arg_type(S3) s3) \
|
|
{ \
|
|
int old_flags = glue(FPU, _save_exception_flags)(env); \
|
|
float_status *s = &env->glue(FPU, _status); \
|
|
type(T1) t1 = convert(S1, T1, make(S1, s1), s); \
|
|
type(T2) t2 = convert(S2, T2, make(S2, s2), s); \
|
|
type(T3) t3 = convert(S3, T3, make(S3, s3), s); \
|
|
type(TR) tr = op(t1, t2, t3, s); \
|
|
type(R) r = convert(TR, R, tr, s); \
|
|
glue(FPU, _merge_exception_flags)(env, old_flags); \
|
|
ret(R, r); \
|
|
}
|
|
|
|
#define IMPL_ALOPF21_FP(name, T, op) \
|
|
IMPL_ALOPF21_FPU(fp, name, T, T, T, T, T, T, T, T, op)
|
|
|
|
#define IMPL_ALOPF1_FX_MANY(name, op) \
|
|
IMPL_ALOPF1_FX(glue3(fx, name, ss), f80, f32, f32, op) \
|
|
IMPL_ALOPF1_FX(glue3(fx, name, dd), f80, f64, f64, op) \
|
|
IMPL_ALOPF1_FX(glue3(fx, name, sx), f80, f32, f80, op) \
|
|
IMPL_ALOPF1_FX(glue3(fx, name, dx), f80, f64, f80, op) \
|
|
IMPL_ALOPF1_FX(glue3(fx, name, xx), f80, f80, f80, op) \
|
|
IMPL_ALOPF1_FX(glue3(fx, name, xd), f80, f80, f64, op) \
|
|
IMPL_ALOPF1_FX(glue3(fx, name, xs), f80, f80, f32, op)
|
|
|
|
IMPL_ALOPF1_FX_MANY(add, floatx80_add)
|
|
IMPL_ALOPF1_FX_MANY(sub, floatx80_sub)
|
|
IMPL_ALOPF1_FX_MANY(mul, floatx80_mul)
|
|
IMPL_ALOPF1_FX_MANY(div, floatx80_div)
|
|
|
|
#define floatx80_rsub(x, y, s) floatx80_sub(y, x, s)
|
|
|
|
IMPL_ALOPF1_FX(fxrsubss, f80, f32, f32, floatx80_rsub)
|
|
IMPL_ALOPF1_FX(fxrsubdd, f80, f64, f64, floatx80_rsub)
|
|
IMPL_ALOPF1_FX(fxrsubsx, f80, f32, f80, floatx80_rsub)
|
|
IMPL_ALOPF1_FX(fxrsubdx, f80, f64, f80, floatx80_rsub)
|
|
|
|
IMPL_ALOPF1_FX(fxdivtss, f32, f80, f32, floatx80_div)
|
|
IMPL_ALOPF1_FX(fxdivtdd, f64, f80, f64, floatx80_div)
|
|
IMPL_ALOPF1_FX(fxdivtsx, f32, f80, f80, floatx80_div)
|
|
IMPL_ALOPF1_FX(fxdivtdx, f64, f80, f80, floatx80_div)
|
|
|
|
static floatx80 fxsqrtt(floatx80 a, floatx80 b, float_status *s)
|
|
{
|
|
// TODO: fxsqrtt
|
|
return floatx80_sqrt(a, s);
|
|
}
|
|
|
|
IMPL_ALOPF1_FX(fxsqrttsx, f32, f80, f80, fxsqrtt)
|
|
IMPL_ALOPF1_FX(fxsqrttdx, f64, f80, f80, fxsqrtt)
|
|
IMPL_ALOPF1_FX(fxsqrttxx, f80, f80, f80, fxsqrtt)
|
|
|
|
#define CMPODF_BASIC(p, x, y, s) ({ \
|
|
uint32_t ret = 0x45; \
|
|
if (!glue(p, _is_any_nan)(x) && !glue(p, _is_any_nan)(y)) { \
|
|
FloatRelation relation = glue(p, _compare_quiet)(x, y, s); \
|
|
switch (relation) { \
|
|
case float_relation_greater: ret = 0x00; break; \
|
|
case float_relation_less: ret = 0x01; break; \
|
|
case float_relation_equal: ret = 0x40; break; \
|
|
case float_relation_unordered: ret = 0x45; break; \
|
|
} \
|
|
} \
|
|
ret;\
|
|
})
|
|
|
|
#define FXCMPODF(x, y, s) CMPODF_BASIC(floatx80, x, y, s)
|
|
#define FCMPODDF(x, y, s) CMPODF_BASIC(float64, x, y, s)
|
|
#define FCMPODSF(x, y, s) CMPODF_BASIC(float32, x, y, s)
|
|
// TODO: cmpudf
|
|
#define FXCMPUDF FXCMPODF
|
|
#define FCMPUDSF FCMPODSF
|
|
#define FCMPUDDF FCMPODDF
|
|
|
|
#define IMPL_CMPF_FX(name, S1, S2, op) \
|
|
IMPL_ALOPF1_FPU(fx, name, S1, S2, i32, f80, f80, i32, op)
|
|
|
|
IMPL_CMPF_FX(fxcmpodsf, f80, f32, FXCMPODF)
|
|
IMPL_CMPF_FX(fxcmpoddf, f80, f64, FXCMPODF)
|
|
IMPL_CMPF_FX(fxcmpodxf, f80, f80, FXCMPODF)
|
|
|
|
IMPL_CMPF_FX(fxcmpudsf, f80, f32, FXCMPUDF)
|
|
IMPL_CMPF_FX(fxcmpuddf, f80, f64, FXCMPUDF)
|
|
IMPL_CMPF_FX(fxcmpudxf, f80, f80, FXCMPUDF)
|
|
|
|
#define IMPL_CMPF_FP(name, T, R, op) \
|
|
IMPL_ALOPF1_FPU(fp, name, T, T, R, T, T, R, op)
|
|
|
|
IMPL_CMPF_FP(fcmpodsf, f32, i32, FCMPODSF)
|
|
IMPL_CMPF_FP(fcmpoddf, f64, i32, FCMPODDF)
|
|
|
|
IMPL_CMPF_FP(fcmpudsf, f32, i32, FCMPUDSF)
|
|
IMPL_CMPF_FP(fcmpuddf, f64, i32, FCMPUDDF)
|
|
|
|
#define cmp_op(T, op) glue3(type_name(T), _, op)
|
|
|
|
#define IMPL_ALOPF1_FPU_CMP_ALL(F, P, S, T, R, A, B) \
|
|
IMPL_ALOPF1_FPU_CMP(F, glue3(P, cmpeq, S), A, B, R, T, cmp_op(T, eq), ident) \
|
|
IMPL_ALOPF1_FPU_CMP(F, glue3(P, cmpneq, S), A, B, R, T, cmp_op(T, eq), not) \
|
|
IMPL_ALOPF1_FPU_CMP(F, glue3(P, cmple, S), A, B, R, T, cmp_op(T, le), ident) \
|
|
IMPL_ALOPF1_FPU_CMP(F, glue3(P, cmpnle, S), A, B, R, T, cmp_op(T, le), not) \
|
|
IMPL_ALOPF1_FPU_CMP(F, glue3(P, cmplt, S), A, B, R, T, cmp_op(T, lt), ident) \
|
|
IMPL_ALOPF1_FPU_CMP(F, glue3(P, cmpnlt, S), A, B, R, T, cmp_op(T, lt), not) \
|
|
IMPL_ALOPF1_FPU_CMP(F, glue3(P, cmpuod, S), A, B, R, T, cmp_op(T, unordered), ident) \
|
|
IMPL_ALOPF1_FPU_CMP(F, glue3(P, cmpod, S), A, B, R, T, cmp_op(T, unordered), not)
|
|
|
|
IMPL_ALOPF1_FPU_CMP_ALL(fp, f, s, f32, i32, f32, f32)
|
|
IMPL_ALOPF1_FPU_CMP_ALL(fp, f, d, f64, i64, f64, f64)
|
|
IMPL_ALOPF1_FPU_CMP_ALL(fx, fx, s, f80, i64, f80, f32)
|
|
IMPL_ALOPF1_FPU_CMP_ALL(fx, fx, d, f80, i64, f80, f64)
|
|
IMPL_ALOPF1_FPU_CMP_ALL(fx, fx, x, f80, i64, f80, f80)
|
|
|
|
IMPL_ALOPF2_FPU_CVT(fp, fstofd, f32, f64)
|
|
IMPL_ALOPF2_FPU_CVT(fx, fstofx, f32, f80)
|
|
IMPL_ALOPF2_FPU_CVT(fp, fdtofs, f64, f32)
|
|
IMPL_ALOPF2_FPU_CVT(fx, fdtofx, f64, f80)
|
|
IMPL_ALOPF2_FPU_CVT(fx, fxtofs, f80, f32)
|
|
IMPL_ALOPF2_FPU_CVT(fx, fxtofd, f80, f64)
|
|
|
|
IMPL_ALOPF2_FPU_CVT(fp, istofs, i32, f32)
|
|
IMPL_ALOPF2_FPU_CVT(fp, istofd, i32, f64)
|
|
IMPL_ALOPF2_FPU_CVT(fx, istofx, i32, f80)
|
|
|
|
IMPL_ALOPF2_FPU_CVT(fp, idtofs, i64, f32)
|
|
IMPL_ALOPF2_FPU_CVT(fp, idtofd, i64, f64)
|
|
IMPL_ALOPF2_FPU_CVT(fx, idtofx, i64, f80)
|
|
|
|
/*
|
|
* I assume e2k behaves like x86.
|
|
*
|
|
* e2k/x86 mandates that we return the indefinite integer value for the result
|
|
* of any float-to-integer conversion that raises the 'invalid' exception.
|
|
* Wrap the softfloat functions to get this behaviour.
|
|
*/
|
|
#define WRAP_F2I(RET, func, T, defval) \
|
|
static RET glue(e2k_, func)(T x, float_status *s) \
|
|
{ \
|
|
int oldflags, newflags; \
|
|
RET r; \
|
|
\
|
|
oldflags = get_float_exception_flags(s); \
|
|
set_float_exception_flags(0, s); \
|
|
r = func(x, s); \
|
|
newflags = get_float_exception_flags(s); \
|
|
if (newflags & float_flag_invalid) { \
|
|
r = defval; \
|
|
} \
|
|
set_float_exception_flags(newflags | oldflags, s); \
|
|
return r; \
|
|
}
|
|
|
|
WRAP_F2I(int32_t, float32_to_int32, float32, INT32_MIN)
|
|
WRAP_F2I(int32_t, float32_to_int32_round_to_zero, float32, INT32_MIN)
|
|
WRAP_F2I(int64_t, float32_to_int64, float32, INT64_MIN)
|
|
WRAP_F2I(int64_t, float32_to_int64_round_to_zero, float32, INT64_MIN)
|
|
WRAP_F2I(int32_t, float64_to_int32, float64, INT32_MIN)
|
|
WRAP_F2I(int32_t, float64_to_int32_round_to_zero, float64, INT32_MIN)
|
|
WRAP_F2I(int64_t, float64_to_int64, float64, INT64_MIN)
|
|
WRAP_F2I(int64_t, float64_to_int64_round_to_zero, float64, INT64_MIN)
|
|
WRAP_F2I(int32_t, floatx80_to_int32, floatx80, INT32_MIN)
|
|
WRAP_F2I(int32_t, floatx80_to_int32_round_to_zero, floatx80, INT32_MIN)
|
|
WRAP_F2I(int64_t, floatx80_to_int64, floatx80, INT64_MIN)
|
|
WRAP_F2I(int64_t, floatx80_to_int64_round_to_zero, floatx80, INT64_MIN)
|
|
|
|
IMPL_ALOPF2_FPU_CVT_OP(fp, fstois, f32, i32, e2k_float32_to_int32)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fp, fdtois, f64, i32, e2k_float64_to_int32)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fx, fxtois, f80, i32, e2k_floatx80_to_int32)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fp, fstoid, f32, i64, e2k_float32_to_int64)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fp, fdtoid, f64, i64, e2k_float64_to_int64)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fx, fxtoid, f80, i64, e2k_floatx80_to_int64)
|
|
|
|
IMPL_ALOPF2_FPU_CVT_OP(fp, fstoistr, f32, i32, e2k_float32_to_int32_round_to_zero)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fp, fstoidtr, f32, i64, e2k_float32_to_int64_round_to_zero)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fp, fdtoistr, f64, i32, e2k_float64_to_int32_round_to_zero)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fp, fdtoidtr, f64, i64, e2k_float64_to_int64_round_to_zero)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fx, fxtoistr, f80, i32, e2k_floatx80_to_int32_round_to_zero)
|
|
IMPL_ALOPF2_FPU_CVT_OP(fx, fxtoidtr, f80, i64, e2k_floatx80_to_int64_round_to_zero)
|
|
|
|
#define IMPL_FTOIF(name, A, B) \
|
|
static type(B) name(type(A) flags, type(B) f, float_status *s) \
|
|
{ \
|
|
FloatRoundMode oldrm = get_float_rounding_mode(s); \
|
|
type(B) ret; \
|
|
\
|
|
toif_set_round_mode(flags, s); \
|
|
ret = glue(type_name(B), _round_to_int)(f, s); \
|
|
set_float_rounding_mode(oldrm, s); \
|
|
toif_clear_inexact(flags, s); \
|
|
return ret; \
|
|
}
|
|
|
|
IMPL_FTOIF(fstoifs, i32, f32)
|
|
IMPL_FTOIF(fdtoifd, i64, f64)
|
|
|
|
IMPL_ALOPF1_FP(fstoifs, i32, f32, f32, fstoifs)
|
|
IMPL_ALOPF1_FP(fdtoifd, i64, f64, f64, fdtoifd)
|
|
|
|
/* TODO: frcps: test if valid, test exception flags */
|
|
#define frcps(a, s) float32_div(float32_one, a, s)
|
|
#define frsqrts(a, s) float32_div(float32_one, float32_sqrt(a, s), s)
|
|
|
|
static float64 fsqrttd(float64 a, float64 b, float_status *s)
|
|
{
|
|
// TODO: fxsqrtt
|
|
return float64_sqrt(a, s);
|
|
}
|
|
|
|
/*
|
|
* I assume e2k behaves like x86.
|
|
*
|
|
* Note that the choice of comparison op here is important to get the
|
|
* special cases right: for min and max Intel specifies that (-0,0),
|
|
* (NaN, anything) and (anything, NaN) return the second argument.
|
|
*/
|
|
#define FPU_MIN(T, a, b, s) \
|
|
(glue(type_name(T), _lt)(a, b, s) ? (a) : (b))
|
|
#define FPU_MAX(T, a, b, s) \
|
|
(glue(type_name(T), _lt)(b, a, s) ? (a) : (b))
|
|
|
|
#define IMPL_ALOPF1_FP_MANY(S, T) \
|
|
IMPL_ALOPF1_FP(glue(fadd, S), T, T, T, glue(type_name(T), _add)) \
|
|
IMPL_ALOPF1_FP(glue(fsub, S), T, T, T, glue(type_name(T), _sub)) \
|
|
IMPL_ALOPF1_FP(glue(fmul, S), T, T, T, glue(type_name(T), _mul)) \
|
|
IMPL_ALOPF1_FP(glue(fdiv, S), T, T, T, glue(type_name(T), _div)) \
|
|
IMPL_ALOPF1_FP_S(glue(fmin, S), T, T, T, FPU_MIN) \
|
|
IMPL_ALOPF1_FP_S(glue(fmax, S), T, T, T, FPU_MAX)
|
|
|
|
IMPL_ALOPF1_FP_MANY(s, f32)
|
|
IMPL_ALOPF1_FP_MANY(d, f64)
|
|
|
|
IMPL_ALOPF2_FP(fsqrts, f32, f32, float32_sqrt)
|
|
IMPL_ALOPF2_FP(frcps, f32, f32, frcps)
|
|
IMPL_ALOPF2_FP(frsqrts, f32, f32, frsqrts)
|
|
IMPL_ALOPF1_FP(fsqrttd, f64, f64, f64, fsqrttd)
|
|
|
|
#define FMA(T, x, y, z, f, s) \
|
|
glue(type_name(T), _muladd)(x, y, z, f, s)
|
|
|
|
#define FMAS(x, y, z, s) FMA(f32, x, y, z, 0, s)
|
|
#define FMAD(x, y, z, s) FMA(f64, x, y, z, 0, s)
|
|
#define FMSS(x, y, z, s) FMA(f32, x, y, z, float_muladd_negate_c, s)
|
|
#define FMSD(x, y, z, s) FMA(f64, x, y, z, float_muladd_negate_c, s)
|
|
#define FNMAS(x, y, z, s) FMA(f32, x, y, z, float_muladd_negate_product, s)
|
|
#define FNMAD(x, y, z, s) FMA(f64, x, y, z, float_muladd_negate_product, s)
|
|
#define FNMSS(x, y, z, s) FMA(f32, x, y, z, float_muladd_negate_product | float_muladd_negate_c, s)
|
|
#define FNMSD(x, y, z, s) FMA(f64, x, y, z, float_muladd_negate_product | float_muladd_negate_c, s)
|
|
|
|
IMPL_ALOPF21_FP(fmas, f32, FMAS)
|
|
IMPL_ALOPF21_FP(fmss, f32, FMSS)
|
|
IMPL_ALOPF21_FP(fnmas, f32, FNMAS)
|
|
IMPL_ALOPF21_FP(fnmss, f32, FNMSS)
|
|
IMPL_ALOPF21_FP(fmad, f64, FMAD)
|
|
IMPL_ALOPF21_FP(fmsd, f64, FMSD)
|
|
IMPL_ALOPF21_FP(fnmad, f64, FNMAD)
|
|
IMPL_ALOPF21_FP(fnmsd, f64, FNMSD)
|
|
|
|
#define qp_len_uw 4
|
|
#define qp_len_ud 2
|
|
#define qp_len(T) glue(qp_len_, T)
|
|
|
|
#define IMPL_OP3_QP_ENV(name, T, F, code) \
|
|
void HELPER(name)(E2KReg *r, CPUE2KState *env, E2KReg *s1, \
|
|
E2KReg *s2, E2KReg *s3) \
|
|
{ \
|
|
int old_flags = fp_save_exception_flags(env); \
|
|
int i; \
|
|
\
|
|
for (i = 0; i < qp_len(T); i++) { \
|
|
type(F) a = make(F, s1->T[i]); \
|
|
type(F) b = make(F, s2->T[i]); \
|
|
type(F) c = make(F, s3->T[i]); \
|
|
{ code; } \
|
|
} \
|
|
\
|
|
fp_merge_exception_flags(env, old_flags); \
|
|
}
|
|
|
|
#define IMPL_OP3_QP_ENV_OP(name, T, F, op) \
|
|
IMPL_OP3_QP_ENV(name, T, F, { \
|
|
r->T[i] = float_val(F, op(a, b, c, &env->fp_status)); \
|
|
})
|
|
|
|
IMPL_OP3_QP_ENV_OP(qpfmas, uw, f32, FMAS)
|
|
IMPL_OP3_QP_ENV_OP(qpfmss, uw, f32, FMSS)
|
|
IMPL_OP3_QP_ENV_OP(qpfnmas, uw, f32, FNMAS)
|
|
IMPL_OP3_QP_ENV_OP(qpfnmss, uw, f32, FNMSS)
|
|
IMPL_OP3_QP_ENV_OP(qpfmad, ud, f64, FMAD)
|
|
IMPL_OP3_QP_ENV_OP(qpfmsd, ud, f64, FMSD)
|
|
IMPL_OP3_QP_ENV_OP(qpfnmad, ud, f64, FNMAD)
|
|
IMPL_OP3_QP_ENV_OP(qpfnmsd, ud, f64, FNMSD)
|
|
|
|
#define IMPL_OP3_QP_ENV_OP_ALT(name, T, F, op1, op2) \
|
|
IMPL_OP3_QP_ENV(name, T, F, { \
|
|
r->T[i] = float_val(F, i & 1 ? op2(a, b, c, &env->fp_status) : \
|
|
op1(a, b, c, &env->fp_status)); \
|
|
})
|
|
|
|
IMPL_OP3_QP_ENV_OP_ALT(qpfmass, uw, f32, FMAS, FMSS)
|
|
IMPL_OP3_QP_ENV_OP_ALT(qpfmsas, uw, f32, FMSS, FMAS)
|
|
IMPL_OP3_QP_ENV_OP_ALT(qpfmasd, ud, f64, FMAD, FMSD)
|
|
IMPL_OP3_QP_ENV_OP_ALT(qpfmsad, ud, f64, FMSD, FMAD)
|