[arm] PR target/83687: Fix invalid combination of VSUB + VABS into VABD

In this wrong-code bug we combine a VSUB.I8 and a VABS.S8 into a VABD.S8 instruction . This combination is not valid for integer operands because in the VABD instruction the semantics are that the difference is computed in notionally infinite precision and the absolute difference is computed on that, whereas for a VSUB.I8 + VABS.S8 sequence the VSUB operation will perform any wrapping that's needed for the 8-bit signed type before the VABS gets its hands on it. This leads to the wrong-code in the PR where the expected sequence from the intrinsics: VSUB + VABS of two vectors {-100, -100, -100...}, {100, 100, 100...} gives a result of {56, 56, 56...} (-100 - 100) but GCC optimises it into a single VABD of {-100, -100, -100...}, {100, 100, 100...} which produces a result of {200, 200, 200...} The transformation is still valid for floating-point operands, which is why it was added in the first place I believe (r178817) but this patch disables it for integer operands. The HFmode variants though only exist for TARGET_NEON_FP16INST, so this patch adds the appropriate guards to the new mode iterator Bootstrapped and tested on arm-none-linux-gnueabihf. PR target/83687 * config/arm/iterators.md (VF): New mode iterator. * config/arm/neon.md (neon_vabd<mode>_2): Use the above. Remove integer-related logic from pattern. (neon_vabd<mode>_3): Likewise. * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: Delete integer tests. * gcc.target/arm/pr83687.c: New test. From-SVN: r256696
2018-01-15 11:56:03 +00:00 · 2018-01-15 11:56:03 +00:00 · d0b6b5a7b4
parent c89f2d2468
commit d0b6b5a7b4
6 changed files with 61 additions and 45 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,11 @@
 2018-01-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 	PR target/83687
 	* config/arm/iterators.md (VF): New mode iterator.
 	* config/arm/neon.md (neon_vabd<mode>_2): Use the above.
 	Remove integer-related logic from pattern.
 	(neon_vabd<mode>_3): Likewise.
 2018-01-15  Jakub Jelinek  <jakub@redhat.com>
 	PR middle-end/82694
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@ -119,6 +119,10 @@
 ;; All supported vector modes (except singleton DImode).
 (define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF V2SF V4SF V2DI])
 ;; All supported floating-point vector modes (except V2DF).
 (define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST")
 			   (V8HF "TARGET_NEON_FP16INST") V2SF V4SF])
 ;; All supported vector modes (except those with 64-bit integer elements).
 (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@ -6706,28 +6706,22 @@ if (BYTES_BIG_ENDIAN)
 })
 (define_insn "neon_vabd<mode>_2"
- [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ [(set (match_operand:VF 0 "s_register_operand" "=w")
-       (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+       (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
-                           (match_operand:VDQ 2 "s_register_operand" "w"))))]
+			 (match_operand:VF 2 "s_register_operand" "w"))))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "TARGET_NEON && flag_unsafe_math_optimizations"
 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set (attr "type")
+ [(set_attr "type" "neon_fp_abd_s<q>")]
       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
                     (const_string "neon_fp_abd_s<q>")
                     (const_string "neon_abd<q>")))]
 )
 (define_insn "neon_vabd<mode>_3"
- [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ [(set (match_operand:VF 0 "s_register_operand" "=w")
-       (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
+       (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
-                             (match_operand:VDQ 2 "s_register_operand" "w")]
+			    (match_operand:VF 2 "s_register_operand" "w")]
-                 UNSPEC_VSUB)))]
+		UNSPEC_VSUB)))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "TARGET_NEON && flag_unsafe_math_optimizations"
 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set (attr "type")
+ [(set_attr "type" "neon_fp_abd_s<q>")]
       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
                     (const_string "neon_fp_abd_s<q>")
                     (const_string "neon_abd<q>")))]
 )
 ;; Copy from core-to-neon regs, then extend, not vice-versa
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,10 @@
 2018-01-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 	PR target/83687
 	* gcc.target/arm/neon-combine-sub-abs-into-vabd.c: Delete integer
 	tests.
 	* gcc.target/arm/pr83687.c: New test.
 2018-01-15  Georg-Johann Lay  <avr@gjlay.de>
 	Adjust tests to AVR_TINY.
--- a/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c
+++ b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c
@ -12,31 +12,3 @@ float32x2_t f_sub_abs_to_vabd_32(float32x2_t val1, float32x2_t val2)
  return res;
 }
 /* { dg-final { scan-assembler "vabd\.f32" } }*/
 #include <arm_neon.h>
 int8x8_t sub_abs_to_vabd_8(int8x8_t val1, int8x8_t val2)
 {
  int8x8_t sres = vsub_s8(val1, val2);
  int8x8_t res = vabs_s8 (sres);
  return res;
 }
 /* { dg-final { scan-assembler "vabd\.s8" } }*/
 int16x4_t sub_abs_to_vabd_16(int16x4_t val1, int16x4_t val2)
 {
  int16x4_t sres = vsub_s16(val1, val2);
  int16x4_t res = vabs_s16 (sres);
  return res;
 }
 /* { dg-final { scan-assembler "vabd\.s16" } }*/
 int32x2_t sub_abs_to_vabd_32(int32x2_t val1, int32x2_t val2)
 {
  int32x2_t sres = vsub_s32(val1, val2);
  int32x2_t res = vabs_s32 (sres);
   return res;
 }
 /* { dg-final { scan-assembler "vabd\.s32" } }*/
--- a/gcc/testsuite/gcc.target/arm/pr83687.c
+++ b/gcc/testsuite/gcc.target/arm/pr83687.c
@ -0,0 +1,31 @@
 /* { dg-do run } */
 /* { dg-require-effective-target arm_neon_hw } */
 /* { dg-options "-O2" } */
 /* { dg-add-options arm_neon } */
 #include <arm_neon.h>
 __attribute__ ((noinline)) int8_t
 testFunction1 (int8_t a, int8_t b)
 {
  volatile int8x16_t sub = vsubq_s8 (vdupq_n_s8 (a), vdupq_n_s8 (b));
  int8x16_t abs = vabsq_s8 (sub);
  return vgetq_lane_s8 (abs, 0);
 }
 __attribute__ ((noinline)) int8_t
 testFunction2 (int8_t a, int8_t b)
 {
  int8x16_t sub = vsubq_s8 (vdupq_n_s8 (a), vdupq_n_s8 (b));
  int8x16_t abs = vabsq_s8 (sub);
  return vgetq_lane_s8 (abs, 0);
 }
 int
 main (void)
 {
  if (testFunction1 (-100, 100) != testFunction2 (-100, 100))
    __builtin_abort ();
  return 0;
 }