2002-06-07 Chris Demetriou <cgd@broadcom.com>

Ed Satterthwaite <ehs@broadcom.com> * cp1.c (inner_mac, fp_mac, inner_rsqrt, fp_inv_sqrt) (fp_rsqrt, fp_madd, fp_msub, fp_nmadd, fp_nmsub): New functions. * sim-main.h (fp_rsqrt, fp_madd, fp_msub, fp_nmadd) (fp_nmsub): New prototypes. (RSquareRoot, MultiplyAdd, MultiplySub, NegMultiplyAdd) (NegMultiplySub): New defines. * mips.igen (RSQRT.fmt): Use RSquareRoot(). (MADD.D, MADD.S): Replace with... (MADD.fmt): New instruction. (MSUB.D, MSUB.S): Replace with... (MSUB.fmt): New instruction. (NMADD.D, NMADD.S): Replace with... (NMADD.fmt): New instruction. (NMSUB.D, MSUB.S): Replace with... (NMSUB.fmt): New instruction.
2002-06-08 03:05:23 +00:00 · 2002-06-08 03:05:23 +00:00 · f3c08b7e16
commit f3c08b7e16
parent 986b7daaef
4 changed files with 315 additions and 74 deletions
--- a/sim/mips/ChangeLog
+++ b/sim/mips/ChangeLog
@ -1,3 +1,22 @@
+2002-06-07  Chris Demetriou  <cgd@broadcom.com>
+            Ed Satterthwaite  <ehs@broadcom.com>
+
+	* cp1.c (inner_mac, fp_mac, inner_rsqrt, fp_inv_sqrt)
+	(fp_rsqrt, fp_madd, fp_msub, fp_nmadd, fp_nmsub): New functions.
+	* sim-main.h (fp_rsqrt, fp_madd, fp_msub, fp_nmadd)
+	(fp_nmsub): New prototypes.
+	(RSquareRoot, MultiplyAdd, MultiplySub, NegMultiplyAdd)
+	(NegMultiplySub): New defines.
+	* mips.igen (RSQRT.fmt): Use RSquareRoot().
+	(MADD.D, MADD.S): Replace with...
+	(MADD.fmt): New instruction.
+	(MSUB.D, MSUB.S): Replace with...
+	(MSUB.fmt): New instruction.
+	(NMADD.D, NMADD.S): Replace with...
+	(NMADD.fmt): New instruction.
+	(NMSUB.D, MSUB.S): Replace with...
+	(NMSUB.fmt): New instruction.
+
 2002-06-07  Chris Demetriou  <cgd@broadcom.com>
            Ed Satterthwaite  <ehs@broadcom.com>

--- a/sim/mips/cp1.c
+++ b/sim/mips/cp1.c
@ -672,6 +672,206 @@ fp_binary(sim_cpu *cpu,
  return result;
 }

+/* Common MAC code for single operands (.s or .d), defers setting FCSR.  */
+static sim_fpu_status
+inner_mac(int (*sim_fpu_op)(sim_fpu *, const sim_fpu *, const sim_fpu *),
+	  unsigned64 op1,
+	  unsigned64 op2,
+	  unsigned64 op3,
+	  int scale,
+	  int negate,
+	  FP_formats fmt,
+	  sim_fpu_round round,
+	  sim_fpu_denorm denorm,
+	  unsigned64 *result)
+{
+  sim_fpu wop1;
+  sim_fpu wop2;
+  sim_fpu ans;
+  sim_fpu_status status = 0;
+  sim_fpu_status op_status;
+  unsigned64 temp = 0;
+
+  switch (fmt)
+    {
+    case fmt_single:
+      {
+	unsigned32 res;
+	sim_fpu_32to (&wop1, op1);
+	sim_fpu_32to (&wop2, op2);
+	status |= sim_fpu_mul (&ans, &wop1, &wop2);
+	if (scale != 0 && sim_fpu_is_number (&ans))  /* number or denorm */
+	  ans.normal_exp += scale;
+	status |= sim_fpu_round_32 (&ans, round, denorm);
+	wop1 = ans;
+        op_status = 0;
+	sim_fpu_32to (&wop2, op3);
+	op_status |= (*sim_fpu_op) (&ans, &wop1, &wop2);
+	op_status |= sim_fpu_round_32 (&ans, round, denorm);
+	status |= op_status;
+	if (negate)
+	  {
+	    wop1 = ans;
+	    op_status = sim_fpu_neg (&ans, &wop1);
+	    op_status |= sim_fpu_round_32 (&ans, round, denorm);
+	    status |= op_status;
+	  }
+	sim_fpu_to32 (&res, &ans);
+	temp = res;
+	break;
+      }
+    case fmt_double:
+      {
+	unsigned64 res;
+	sim_fpu_64to (&wop1, op1);
+	sim_fpu_64to (&wop2, op2);
+	status |= sim_fpu_mul (&ans, &wop1, &wop2);
+	if (scale != 0 && sim_fpu_is_number (&ans))  /* number or denorm */
+	  ans.normal_exp += scale;
+	status |= sim_fpu_round_64 (&ans, round, denorm);
+	wop1 = ans;
+        op_status = 0;
+	sim_fpu_64to (&wop2, op3);
+	op_status |= (*sim_fpu_op) (&ans, &wop1, &wop2);
+	op_status |= sim_fpu_round_64 (&ans, round, denorm);
+	status |= op_status;
+	if (negate)
+	  {
+	    wop1 = ans;
+	    op_status = sim_fpu_neg (&ans, &wop1);
+	    op_status |= sim_fpu_round_64 (&ans, round, denorm);
+	    status |= op_status;
+	  }
+	sim_fpu_to64 (&res, &ans);
+	temp = res;
+	break;
+      }
+    default:
+      fprintf (stderr, "Bad switch\n");
+      abort ();
+    }
+  *result = temp;
+  return status;
+}
+
+/* Common implementation of madd, nmadd, msub, nmsub that does
+   intermediate rounding per spec.  Also used for recip2 and rsqrt2,
+   which are transformed into equivalent nmsub operations.  The scale
+   argument is an adjustment to the exponent of the intermediate
+   product op1*op2.  It is currently non-zero for rsqrt2 (-1), which
+   requires an effective division by 2. */
+static unsigned64
+fp_mac(sim_cpu *cpu,
+       address_word cia,
+       int (*sim_fpu_op)(sim_fpu *, const sim_fpu *, const sim_fpu *),
+       unsigned64 op1,
+       unsigned64 op2,
+       unsigned64 op3,
+       int scale,
+       int negate,
+       FP_formats fmt)
+{
+  sim_fpu_round round = rounding_mode (GETRM());
+  sim_fpu_denorm denorm = denorm_mode (cpu);
+  sim_fpu_status status = 0;
+  unsigned64 result = 0;
+
+  /* The format type has already been checked: */
+  switch (fmt)
+    {
+    case fmt_single:
+    case fmt_double:
+      status = inner_mac(sim_fpu_op, op1, op2, op3, scale,
+			 negate, fmt, round, denorm, &result);
+      break;
+    default:
+      sim_io_eprintf (SD, "Bad switch\n");
+      abort ();
+    }
+
+  update_fcsr (cpu, cia, status);
+  return result;
+}
+
+/* Common rsqrt code for single operands (.s or .d), intermediate rounding.  */
+static sim_fpu_status
+inner_rsqrt(unsigned64 op1,
+	    FP_formats fmt,
+	    sim_fpu_round round,
+	    sim_fpu_denorm denorm,
+	    unsigned64 *result)
+{
+  sim_fpu wop1;
+  sim_fpu ans;
+  sim_fpu_status status = 0;
+  sim_fpu_status op_status;
+  unsigned64 temp = 0;
+
+  switch (fmt)
+    {
+    case fmt_single:
+      {
+	unsigned32 res;
+	sim_fpu_32to (&wop1, op1);
+	status |= sim_fpu_sqrt (&ans, &wop1);
+	status |= sim_fpu_round_32 (&ans, status, round);
+	wop1 = ans;
+	op_status = sim_fpu_inv (&ans, &wop1);
+	op_status |= sim_fpu_round_32 (&ans, round, denorm);
+	sim_fpu_to32 (&res, &ans);
+	temp = res;
+	status |= op_status;
+	break;
+      }
+    case fmt_double:
+      {
+	unsigned64 res;
+	sim_fpu_64to (&wop1, op1);
+	status |= sim_fpu_sqrt (&ans, &wop1);
+	status |= sim_fpu_round_64 (&ans, round, denorm);
+	wop1 = ans;
+	op_status = sim_fpu_inv (&ans, &wop1);
+	op_status |= sim_fpu_round_64 (&ans, round, denorm);
+	sim_fpu_to64 (&res, &ans);
+	temp = res;
+	status |= op_status;
+	break;
+      }
+    default:
+      fprintf (stderr, "Bad switch\n");
+      abort ();
+    }
+  *result = temp;
+  return status;
+}
+
+static unsigned64
+fp_inv_sqrt(sim_cpu *cpu,
+	    address_word cia,
+	    unsigned64 op1,
+	    FP_formats fmt)
+{
+  sim_fpu_round round = rounding_mode (GETRM());
+  sim_fpu_round denorm = denorm_mode (cpu);
+  sim_fpu_status status = 0;
+  unsigned64 result = 0;
+
+  /* The format type has already been checked: */
+  switch (fmt)
+    {
+    case fmt_single:
+    case fmt_double:
+      status = inner_rsqrt (op1, fmt, round, denorm, &result);
+      break;
+    default:
+      sim_io_eprintf (SD, "Bad switch\n");
+      abort ();
+    }
+
+  update_fcsr (cpu, cia, status);
+  return result;
+}
+

 unsigned64
 fp_abs(sim_cpu *cpu,
@ -749,6 +949,59 @@ fp_sqrt(sim_cpu *cpu,
  return fp_unary(cpu, cia, &sim_fpu_sqrt, op, fmt);
 }

+unsigned64
+fp_rsqrt(sim_cpu *cpu,
+         address_word cia,
+         unsigned64 op,
+         FP_formats fmt)
+{
+  return fp_inv_sqrt(cpu, cia, op, fmt);
+}
+
+unsigned64
+fp_madd(sim_cpu *cpu,
+        address_word cia,
+        unsigned64 op1,
+        unsigned64 op2,
+        unsigned64 op3,
+        FP_formats fmt)
+{
+  return fp_mac(cpu, cia, &sim_fpu_add, op1, op2, op3, 0, 0, fmt);
+}
+
+unsigned64
+fp_msub(sim_cpu *cpu,
+        address_word cia,
+        unsigned64 op1,
+        unsigned64 op2,
+        unsigned64 op3,
+        FP_formats fmt)
+{
+  return fp_mac(cpu, cia, &sim_fpu_sub, op1, op2, op3, 0, 0, fmt);
+}
+
+unsigned64
+fp_nmadd(sim_cpu *cpu,
+         address_word cia,
+         unsigned64 op1,
+         unsigned64 op2,
+         unsigned64 op3,
+         FP_formats fmt)
+{
+  return fp_mac(cpu, cia, &sim_fpu_add, op1, op2, op3, 0, 1, fmt);
+}
+
+unsigned64
+fp_nmsub(sim_cpu *cpu,
+         address_word cia,
+         unsigned64 op1,
+         unsigned64 op2,
+         unsigned64 op3,
+         FP_formats fmt)
+{
+  return fp_mac(cpu, cia, &sim_fpu_sub, op1, op2, op3, 0, 1, fmt);
+}
+

 /* Conversion operations.  */

--- a/sim/mips/mips.igen
+++ b/sim/mips/mips.igen
@ -4198,34 +4198,19 @@



-//
-// FIXME: Not correct for mips*
-//
-010011,5.FR,5.FT,5.FS,5.FD,100,001:COP1X:32,f::MADD.D
-"madd.d f<FD>, f<FR>, f<FS>, f<FT>"
+010011,5.FR,5.FT,5.FS,5.FD,100,3.FMT:COP1X:64,f::MADD.fmt
+"madd.%s<FMT> f<FD>, f<FR>, f<FS>, f<FT>"
 *mipsIV:
 *mipsV:
 *mips64:
 *vr5000:
 {
+  int fmt = FMT;
  check_fpu (SD_);
-  {
-    StoreFPR(FD,fmt_double,Add(Multiply(ValueFPR(FS,fmt_double),ValueFPR(FT,fmt_double),fmt_double),ValueFPR(FR,fmt_double),fmt_double));
-  }
-}
-
-
-010011,5.FR,5.FT,5.FS,5.FD,100,000:COP1X:32,f::MADD.S
-"madd.s f<FD>, f<FR>, f<FS>, f<FT>"
-*mipsIV:
-*mipsV:
-*mips64:
-*vr5000:
-{
-  check_fpu (SD_);
-  {
-    StoreFPR(FD,fmt_single,Add(Multiply(ValueFPR(FS,fmt_single),ValueFPR(FT,fmt_single),fmt_single),ValueFPR(FR,fmt_single),fmt_single));
-  }
+  check_u64 (SD_, instruction_0);
+  check_fmt_p (SD_, fmt, instruction_0); 
+  StoreFPR (FD, fmt, MultiplyAdd (ValueFPR (FS, fmt), ValueFPR (FT, fmt),
+				  ValueFPR (FR, fmt), fmt));
 }


@ -4354,29 +4339,19 @@
 }


-// MSUB.fmt
-010011,5.FR,5.FT,5.FS,5.FD,101,001:COP1X:32,f::MSUB.D
-"msub.d f<FD>, f<FR>, f<FS>, f<FT>"
+010011,5.FR,5.FT,5.FS,5.FD,101,3.FMT:COP1X:64,f::MSUB.fmt
+"msub.%s<FMT> f<FD>, f<FR>, f<FS>, f<FT>"
 *mipsIV:
 *mipsV:
 *mips64:
 *vr5000:
 {
+  int fmt = FMT;
  check_fpu (SD_);
-  StoreFPR(FD,fmt_double,Sub(Multiply(ValueFPR(FS,fmt_double),ValueFPR(FT,fmt_double),fmt_double),ValueFPR(FR,fmt_double),fmt_double));
-}
-
-
-// MSUB.fmt
-010011,5.FR,5.FT,5.FS,5.FD,101000:COP1X:32,f::MSUB.S
-"msub.s f<FD>, f<FR>, f<FS>, f<FT>"
-*mipsIV:
-*mipsV:
-*mips64:
-*vr5000:
-{
-  check_fpu (SD_);
-  StoreFPR(FD,fmt_single,Sub(Multiply(ValueFPR(FS,fmt_single),ValueFPR(FT,fmt_single),fmt_single),ValueFPR(FR,fmt_single),fmt_single));
+  check_u64 (SD_, instruction_0);
+  check_fmt_p (SD_, fmt, instruction_0);
+  StoreFPR (FD, fmt, MultiplySub (ValueFPR (FS, fmt), ValueFPR (FT, fmt),
+				  ValueFPR (FR, fmt), fmt));
 }


@ -4449,55 +4424,35 @@
 }


-// NMADD.fmt
-010011,5.FR,5.FT,5.FS,5.FD,110001:COP1X:32,f::NMADD.D
-"nmadd.d f<FD>, f<FR>, f<FS>, f<FT>"
+010011,5.FR,5.FT,5.FS,5.FD,110,3.FMT:COP1X:64,f::NMADD.fmt
+"nmadd.%s<FMT> f<FD>, f<FR>, f<FS>, f<FT>"
 *mipsIV:
 *mipsV:
 *mips64:
 *vr5000:
 {
+  int fmt = FMT;
  check_fpu (SD_);
-  StoreFPR(FD,fmt_double,Negate(Add(Multiply(ValueFPR(FS,fmt_double),ValueFPR(FT,fmt_double),fmt_double),ValueFPR(FR,fmt_double),fmt_double),fmt_double));
+  check_u64 (SD_, instruction_0);
+  check_fmt_p (SD_, fmt, instruction_0);
+  StoreFPR (FD, fmt, NegMultiplyAdd (ValueFPR (FS, fmt), ValueFPR (FT, fmt),
+				     ValueFPR (FR, fmt), fmt));
 }


-// NMADD.fmt
-010011,5.FR,5.FT,5.FS,5.FD,110000:COP1X:32,f::NMADD.S
-"nmadd.s f<FD>, f<FR>, f<FS>, f<FT>"
+010011,5.FR,5.FT,5.FS,5.FD,111,3.FMT:COP1X:64,f::NMSUB.fmt
+"nmsub.%s<FMT> f<FD>, f<FR>, f<FS>, f<FT>"
 *mipsIV:
 *mipsV:
 *mips64:
 *vr5000:
 {
+  int fmt = FMT;
  check_fpu (SD_);
-  StoreFPR(FD,fmt_single,Negate(Add(Multiply(ValueFPR(FS,fmt_single),ValueFPR(FT,fmt_single),fmt_single),ValueFPR(FR,fmt_single),fmt_single),fmt_single));
-}
-
-
-// NMSUB.fmt
-010011,5.FR,5.FT,5.FS,5.FD,111001:COP1X:32,f::NMSUB.D
-"nmsub.d f<FD>, f<FR>, f<FS>, f<FT>"
-*mipsIV:
-*mipsV:
-*mips64:
-*vr5000:
-{
-  check_fpu (SD_);
-  StoreFPR(FD,fmt_double,Negate(Sub(Multiply(ValueFPR(FS,fmt_double),ValueFPR(FT,fmt_double),fmt_double),ValueFPR(FR,fmt_double),fmt_double),fmt_double));
-}
-
-
-// NMSUB.fmt
-010011,5.FR,5.FT,5.FS,5.FD,111000:COP1X:32,f::NMSUB.S
-"nmsub.s f<FD>, f<FR>, f<FS>, f<FT>"
-*mipsIV:
-*mipsV:
-*mips64:
-*vr5000:
-{
-  check_fpu (SD_);
-  StoreFPR(FD,fmt_single,Negate(Sub(Multiply(ValueFPR(FS,fmt_single),ValueFPR(FT,fmt_single),fmt_single),ValueFPR(FR,fmt_single),fmt_single),fmt_single));
+  check_u64 (SD_, instruction_0);
+  check_fmt_p (SD_, fmt, instruction_0);
+  StoreFPR (FD, fmt, NegMultiplySub (ValueFPR (FS, fmt), ValueFPR (FT, fmt),
+				     ValueFPR (FR, fmt), fmt));
 }


@ -4579,7 +4534,7 @@
  int fmt = FMT;
  check_fpu (SD_);
  check_fmt (SD_, fmt, instruction_0);
-  StoreFPR(FD,fmt,Recip(SquareRoot(ValueFPR(FS,fmt),fmt),fmt));
+  StoreFPR (FD, fmt, RSquareRoot (ValueFPR (FS, fmt), fmt));
 }


--- a/sim/mips/sim-main.h
+++ b/sim/mips/sim-main.h
@ -704,6 +704,20 @@ unsigned64 fp_recip (SIM_STATE, unsigned64 op, FP_formats fmt);
 #define Recip(op,fmt) fp_recip(SIM_ARGS, op, fmt)
 unsigned64 fp_sqrt (SIM_STATE, unsigned64 op, FP_formats fmt);
 #define SquareRoot(op,fmt) fp_sqrt(SIM_ARGS, op, fmt)
+unsigned64 fp_rsqrt (SIM_STATE, unsigned64 op, FP_formats fmt);
+#define RSquareRoot(op,fmt) fp_rsqrt(SIM_ARGS, op, fmt)
+unsigned64 fp_madd (SIM_STATE, unsigned64 op1, unsigned64 op2,
+		    unsigned64 op3, FP_formats fmt);
+#define MultiplyAdd(op1,op2,op3,fmt) fp_madd(SIM_ARGS, op1, op2, op3, fmt)
+unsigned64 fp_msub (SIM_STATE, unsigned64 op1, unsigned64 op2,
+		    unsigned64 op3, FP_formats fmt);
+#define MultiplySub(op1,op2,op3,fmt) fp_msub(SIM_ARGS, op1, op2, op3, fmt)
+unsigned64 fp_nmadd (SIM_STATE, unsigned64 op1, unsigned64 op2,
+		     unsigned64 op3, FP_formats fmt);
+#define NegMultiplyAdd(op1,op2,op3,fmt) fp_nmadd(SIM_ARGS, op1, op2, op3, fmt)
+unsigned64 fp_nmsub (SIM_STATE, unsigned64 op1, unsigned64 op2,
+		     unsigned64 op3, FP_formats fmt);
+#define NegMultiplySub(op1,op2,op3,fmt) fp_nmsub(SIM_ARGS, op1, op2, op3, fmt)
 unsigned64 convert (SIM_STATE, int rm, unsigned64 op, FP_formats from, FP_formats to);
 #define Convert(rm,op,from,to) convert (SIM_ARGS, rm, op, from, to)