[ARM] Optimize copysign/copysignf for soft-float using BFI

gcc/ * config/arm/arm.md (copysignsf3): New pattern. (copysigndf3): Likewise. gcc/testsuite/ * gcc.target/arm/copysign_softfloat_1.c: New copysign/copysignf testcase for soft-float. From-SVN: r218159
2014-11-28 14:07:04 +00:00 · 2014-11-28 14:07:04 +00:00 · 3403c4c78e
commit 3403c4c78e
parent 476ff78736
4 changed files with 112 additions and 0 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,8 @@
+2014-11-28  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/arm/arm.md (copysignsf3): New pattern.
+	(copysigndf3): Likewise.
+
 2014-11-28  Andrey Turetskiy  <andrey.turetskiy@intel.com>
 	    Ilya Verbin  <ilya.verbin@intel.com>

--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@ -11192,6 +11192,47 @@
  [(set_attr "predicable" "yes")]
 )

+(define_expand "copysignsf3"
+  [(match_operand:SF 0 "register_operand")
+   (match_operand:SF 1 "register_operand")
+   (match_operand:SF 2 "register_operand")]
+  "TARGET_SOFT_FLOAT && arm_arch_thumb2"
+  "{
+     emit_move_insn (operands[0], operands[2]);
+     emit_insn (gen_insv_t2 (simplify_gen_subreg (SImode, operands[0], SFmode, 0),
+		GEN_INT (31), GEN_INT (0),
+		simplify_gen_subreg (SImode, operands[1], SFmode, 0)));
+     DONE;
+  }"
+)
+
+(define_expand "copysigndf3"
+  [(match_operand:DF 0 "register_operand")
+   (match_operand:DF 1 "register_operand")
+   (match_operand:DF 2 "register_operand")]
+  "TARGET_SOFT_FLOAT && arm_arch_thumb2"
+  "{
+     rtx op0_low = gen_lowpart (SImode, operands[0]);
+     rtx op0_high = gen_highpart (SImode, operands[0]);
+     rtx op1_low = gen_lowpart (SImode, operands[1]);
+     rtx op1_high = gen_highpart (SImode, operands[1]);
+     rtx op2_high = gen_highpart (SImode, operands[2]);
+
+     rtx scratch1 = gen_reg_rtx (SImode);
+     rtx scratch2 = gen_reg_rtx (SImode);
+     emit_move_insn (scratch1, op2_high);
+     emit_move_insn (scratch2, op1_high);
+
+     emit_insn(gen_rtx_SET(SImode, scratch1,
+			   gen_rtx_LSHIFTRT (SImode, op2_high, GEN_INT(31))));
+     emit_insn(gen_insv_t2(scratch2, GEN_INT(1), GEN_INT(31), scratch1));
+     emit_move_insn (op0_low, op1_low);
+     emit_move_insn (op0_high, scratch2);
+
+     DONE;
+  }"
+)
+
 ;; Vector bits common to IWMMXT and Neon
 (include "vec-common.md")
 ;; Load the Intel Wireless Multimedia Extension patterns
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,8 @@
+2014-11-28  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/arm/copysign_softfloat_1.c: New copysign/copysignf
+	testcase for soft-float.
+
 2014-11-28  H.J. Lu  <hongjiu.lu@intel.com>

 	* gcc.target/i386/pr63661.c: Replace -mtune=native with
--- a/gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c
+++ b/gcc/testsuite/gcc.target/arm/copysign_softfloat_1.c
@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=softfp" "-mfloat-abi=hard" } { "" } } */
+/* { dg-options "-O2 -mfloat-abi=soft --save-temps" } */
+extern void abort (void);
+
+#define N 16
+
+float a_f[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
+		-12.5f, -15.6f, -18.7f, -21.8f,
+		24.9f, 27.1f, 30.2f, 33.3f,
+		36.4f, 39.5f, 42.6f, 45.7f};
+
+float b_f[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
+		-9.0f, 1.0f, -2.0f, 3.0f,
+		-4.0f, -5.0f, 6.0f, 7.0f,
+		-8.0f, -9.0f, 10.0f, 11.0f};
+
+float c_f[N] = {-0.1f, 3.2f, -6.3f, 9.4f,
+		-12.5f, 15.6f, -18.7f, 21.8f,
+		-24.9f, -27.1f, 30.2f, 33.3f,
+		-36.4f, -39.5f, 42.6f, 45.7f};
+
+double a_d[N] = {-0.1, -3.2, -6.3, -9.4,
+		 -12.5, -15.6, -18.7, -21.8,
+		 24.9, 27.1, 30.2, 33.3,
+		 36.4, 39.5, 42.6, 45.7};
+
+double b_d[N] = {-1.2, 3.4, -5.6, 7.8,
+		 -9.0, 1.0, -2.0, 3.0,
+		 -4.0, -5.0, 6.0, 7.0,
+		 -8.0, -9.0, 10.0, 11.0};
+
+double c_d[N] = {-0.1, 3.2, -6.3, 9.4,
+		 -12.5, 15.6, -18.7, 21.8,
+		 -24.9, -27.1, 30.2, 33.3,
+		 -36.4, -39.5, 42.6, 45.7};
+
+int
+main (int argc, char **argv)
+{
+  int index = 0;
+
+/* { dg-final { scan-assembler-times "bfi" 2 } } */
+/* { dg-final { scan-assembler-times "lsr" 1 } } */
+  for (index; index < N; index++)
+    {
+      if (__builtin_copysignf (a_f[index], b_f[index]) != c_f[index])
+	abort();
+    }
+
+  for (index = 0; index < N; index++)
+    {
+      if (__builtin_copysign (a_d[index], b_d[index]) != c_d[index])
+	abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */