re PR target/68620 (ICE on gcc.target/arm/attr-neon-fp16.c)
2016-01-26 Christophe Lyon <christophe.lyon@linaro.org> gcc/ PR target/68620 * config/arm/arm.c (neon_valid_immediate): Handle FP16 vectors. * config/arm/arm_neon.h (__ARM_NUM_LANES, __arm_lane, arm_lanq): New helper macros. (vget_lane_f16): Handle big-endian. (vgetq_lane_f16): Likewise. (vset_lane_f16): Likewise. (vsetq_lane_f16): Likewise. * config/arm/iterators.md (VQXMOV): Add V8HF. (VDQ): Add V4HF and V8HF. (V_reg): Handle V4HF and V8HF. (Is_float_mode): Likewise. * config/arm/neon.md (movv4hf, movv8hf, neon_vdup_nv4hf, neon_vdup_nv8hf): New patterns. (vec_set<mode>_internal, vec_extract<mode>, neon_vld1_dup<mode>): Use VD_LANE iterator. (neon_vld1_dup<mode>): Use VQ2 iterator. testsuite/ PR target/68620 * gcc.target/arm/pr68620.c: New test. From-SVN: r232832
This commit is contained in:
parent
0bccf11b4d
commit
9242223583
|
@ -1,3 +1,23 @@
|
|||
2016-01-26 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
PR target/68620
|
||||
* config/arm/arm.c (neon_valid_immediate): Handle FP16 vectors.
|
||||
* config/arm/arm_neon.h (__ARM_NUM_LANES, __arm_lane, arm_lanq):
|
||||
New helper macros.
|
||||
(vget_lane_f16): Handle big-endian.
|
||||
(vgetq_lane_f16): Likewise.
|
||||
(vset_lane_f16): Likewise.
|
||||
(vsetq_lane_f16): Likewise.
|
||||
* config/arm/iterators.md (VQXMOV): Add V8HF.
|
||||
(VDQ): Add V4HF and V8HF.
|
||||
(V_reg): Handle V4HF and V8HF.
|
||||
(Is_float_mode): Likewise.
|
||||
* config/arm/neon.md (movv4hf, movv8hf, neon_vdup_nv4hf,
|
||||
neon_vdup_nv8hf): New patterns.
|
||||
(vec_set<mode>_internal, vec_extract<mode>, neon_vld1_dup<mode>):
|
||||
Use VD_LANE iterator.
|
||||
(neon_vld1_dup<mode>): Use VQ2 iterator.
|
||||
|
||||
2016-01-26 Nathan Sidwell <nathan@acm.org>
|
||||
|
||||
* omp-low.h (oacc_fn_attrib_kernels_p): Declare.
|
||||
|
|
|
@ -12381,6 +12381,10 @@ neon_valid_immediate (rtx op, machine_mode mode, int inverse,
|
|||
if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
|
||||
return -1;
|
||||
|
||||
/* FP16 vectors cannot be represented. */
|
||||
if (GET_MODE_INNER (mode) == HFmode)
|
||||
return -1;
|
||||
|
||||
r0 = CONST_DOUBLE_REAL_VALUE (el0);
|
||||
|
||||
for (i = 1; i < n_elts; i++)
|
||||
|
|
|
@ -5302,14 +5302,26 @@ vget_lane_s32 (int32x2_t __a, const int __b)
|
|||
were marked always-inline so there were no call sites, the declaration
|
||||
would nonetheless raise an error. Hence, we must use a macro instead. */
|
||||
|
||||
#define vget_lane_f16(__v, __idx) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float16x4_t __vec = (__v); \
|
||||
__builtin_arm_lane_check (4, __idx); \
|
||||
float16_t __res = __vec[__idx]; \
|
||||
__res; \
|
||||
})
|
||||
/* For big-endian, GCC's vector indices are reversed within each 64
|
||||
bits compared to the architectural lane indices used by Neon
|
||||
intrinsics. */
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
|
||||
#define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1))
|
||||
#define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1))
|
||||
#else
|
||||
#define __arm_lane(__vec, __idx) __idx
|
||||
#define __arm_laneq(__vec, __idx) __idx
|
||||
#endif
|
||||
|
||||
#define vget_lane_f16(__v, __idx) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float16x4_t __vec = (__v); \
|
||||
__builtin_arm_lane_check (4, __idx); \
|
||||
float16_t __res = __vec[__arm_lane(__vec, __idx)]; \
|
||||
__res; \
|
||||
})
|
||||
#endif
|
||||
|
||||
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
|
||||
|
@ -5379,14 +5391,14 @@ vgetq_lane_s32 (int32x4_t __a, const int __b)
|
|||
}
|
||||
|
||||
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
|
||||
#define vgetq_lane_f16(__v, __idx) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float16x8_t __vec = (__v); \
|
||||
__builtin_arm_lane_check (8, __idx); \
|
||||
float16_t __res = __vec[__idx]; \
|
||||
__res; \
|
||||
})
|
||||
#define vgetq_lane_f16(__v, __idx) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float16x8_t __vec = (__v); \
|
||||
__builtin_arm_lane_check (8, __idx); \
|
||||
float16_t __res = __vec[__arm_laneq(__vec, __idx)]; \
|
||||
__res; \
|
||||
})
|
||||
#endif
|
||||
|
||||
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
|
||||
|
@ -5458,13 +5470,13 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
|
|||
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
|
||||
#define vset_lane_f16(__e, __v, __idx) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float16_t __elem = (__e); \
|
||||
float16x4_t __vec = (__v); \
|
||||
__builtin_arm_lane_check (4, __idx); \
|
||||
__vec[__idx] = __elem; \
|
||||
__vec; \
|
||||
})
|
||||
({ \
|
||||
float16_t __elem = (__e); \
|
||||
float16x4_t __vec = (__v); \
|
||||
__builtin_arm_lane_check (4, __idx); \
|
||||
__vec[__arm_lane (__vec, __idx)] = __elem; \
|
||||
__vec; \
|
||||
})
|
||||
#endif
|
||||
|
||||
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
|
||||
|
@ -5536,13 +5548,13 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c)
|
|||
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
|
||||
#define vsetq_lane_f16(__e, __v, __idx) \
|
||||
__extension__ \
|
||||
({ \
|
||||
float16_t __elem = (__e); \
|
||||
float16x8_t __vec = (__v); \
|
||||
__builtin_arm_lane_check (8, __idx); \
|
||||
__vec[__idx] = __elem; \
|
||||
__vec; \
|
||||
})
|
||||
({ \
|
||||
float16_t __elem = (__e); \
|
||||
float16x8_t __vec = (__v); \
|
||||
__builtin_arm_lane_check (8, __idx); \
|
||||
__vec[__arm_laneq (__vec, __idx)] = __elem; \
|
||||
__vec; \
|
||||
})
|
||||
#endif
|
||||
|
||||
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
|
||||
|
|
|
@ -99,7 +99,7 @@
|
|||
(define_mode_iterator VQI [V16QI V8HI V4SI])
|
||||
|
||||
;; Quad-width vector modes, with TImode added, for moves.
|
||||
(define_mode_iterator VQXMOV [V16QI V8HI V4SI V4SF V2DI TI])
|
||||
(define_mode_iterator VQXMOV [V16QI V8HI V8HF V4SI V4SF V2DI TI])
|
||||
|
||||
;; Opaque structure types wider than TImode.
|
||||
(define_mode_iterator VSTRUCT [EI OI CI XI])
|
||||
|
@ -114,7 +114,7 @@
|
|||
(define_mode_iterator VN [V8HI V4SI V2DI])
|
||||
|
||||
;; All supported vector modes (except singleton DImode).
|
||||
(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DI])
|
||||
(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF V2SF V4SF V2DI])
|
||||
|
||||
;; All supported vector modes (except those with 64-bit integer elements).
|
||||
(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
|
||||
|
@ -428,6 +428,7 @@
|
|||
;; Register width from element mode
|
||||
(define_mode_attr V_reg [(V8QI "P") (V16QI "q")
|
||||
(V4HI "P") (V8HI "q")
|
||||
(V4HF "P") (V8HF "q")
|
||||
(V2SI "P") (V4SI "q")
|
||||
(V2SF "P") (V4SF "q")
|
||||
(DI "P") (V2DI "q")
|
||||
|
@ -576,6 +577,7 @@
|
|||
(define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false")
|
||||
(V4HI "false") (V8HI "false")
|
||||
(V2SI "false") (V4SI "false")
|
||||
(V4HF "true") (V8HF "true")
|
||||
(V2SF "true") (V4SF "true")
|
||||
(DI "false") (V2DI "false")])
|
||||
|
||||
|
|
|
@ -137,6 +137,36 @@
|
|||
}
|
||||
})
|
||||
|
||||
(define_expand "movv4hf"
|
||||
[(set (match_operand:V4HF 0 "s_register_operand")
|
||||
(match_operand:V4HF 1 "s_register_operand"))]
|
||||
"TARGET_NEON && TARGET_FP16"
|
||||
{
|
||||
/* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
|
||||
causing an ICE on big-endian because it cannot extract subregs in
|
||||
this case. */
|
||||
if (can_create_pseudo_p ())
|
||||
{
|
||||
if (!REG_P (operands[0]))
|
||||
operands[1] = force_reg (V4HFmode, operands[1]);
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "movv8hf"
|
||||
[(set (match_operand:V8HF 0 "")
|
||||
(match_operand:V8HF 1 ""))]
|
||||
"TARGET_NEON && TARGET_FP16"
|
||||
{
|
||||
/* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
|
||||
causing an ICE on big-endian because it cannot extract subregs in
|
||||
this case. */
|
||||
if (can_create_pseudo_p ())
|
||||
{
|
||||
if (!REG_P (operands[0]))
|
||||
operands[1] = force_reg (V8HFmode, operands[1]);
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "*neon_mov<mode>"
|
||||
[(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
|
||||
(match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
|
||||
|
@ -299,11 +329,11 @@
|
|||
[(set_attr "type" "neon_load1_1reg<q>")])
|
||||
|
||||
(define_insn "vec_set<mode>_internal"
|
||||
[(set (match_operand:VD 0 "s_register_operand" "=w,w")
|
||||
(vec_merge:VD
|
||||
(vec_duplicate:VD
|
||||
[(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
|
||||
(vec_merge:VD_LANE
|
||||
(vec_duplicate:VD_LANE
|
||||
(match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
|
||||
(match_operand:VD 3 "s_register_operand" "0,0")
|
||||
(match_operand:VD_LANE 3 "s_register_operand" "0,0")
|
||||
(match_operand:SI 2 "immediate_operand" "i,i")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
|
@ -385,7 +415,7 @@
|
|||
(define_insn "vec_extract<mode>"
|
||||
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:VD 1 "s_register_operand" "w,w")
|
||||
(match_operand:VD_LANE 1 "s_register_operand" "w,w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
|
@ -2829,6 +2859,22 @@ if (BYTES_BIG_ENDIAN)
|
|||
[(set_attr "type" "neon_from_gp<q>")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vdup_nv4hf"
|
||||
[(set (match_operand:V4HF 0 "s_register_operand" "=w")
|
||||
(vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
|
||||
"TARGET_NEON"
|
||||
"vdup.16\t%P0, %1"
|
||||
[(set_attr "type" "neon_from_gp")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vdup_nv8hf"
|
||||
[(set (match_operand:V8HF 0 "s_register_operand" "=w")
|
||||
(vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
|
||||
"TARGET_NEON"
|
||||
"vdup.16\t%q0, %1"
|
||||
[(set_attr "type" "neon_from_gp_q")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vdup_n<mode>"
|
||||
[(set (match_operand:V32 0 "s_register_operand" "=w,w")
|
||||
(vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
|
||||
|
@ -4361,8 +4407,8 @@ if (BYTES_BIG_ENDIAN)
|
|||
)
|
||||
|
||||
(define_insn "neon_vld1_dup<mode>"
|
||||
[(set (match_operand:VD 0 "s_register_operand" "=w")
|
||||
(vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
|
||||
[(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
|
||||
(vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
|
||||
"TARGET_NEON"
|
||||
"vld1.<V_sz_elem>\t{%P0[]}, %A1"
|
||||
[(set_attr "type" "neon_load1_all_lanes<q>")]
|
||||
|
@ -4378,8 +4424,8 @@ if (BYTES_BIG_ENDIAN)
|
|||
)
|
||||
|
||||
(define_insn "neon_vld1_dup<mode>"
|
||||
[(set (match_operand:VQ 0 "s_register_operand" "=w")
|
||||
(vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
|
||||
[(set (match_operand:VQ2 0 "s_register_operand" "=w")
|
||||
(vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2016-01-26 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
PR target/68620
|
||||
* gcc.target/arm/pr68620.c: New test.
|
||||
|
||||
2016-01-26 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* gcc.target/i386/pr68986-2.c: Remove -m32.
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_fp_ok } */
|
||||
/* { dg-options "-mfp16-format=ieee" } */
|
||||
/* { dg-add-options arm_fp } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
float16x4_t __attribute__((target("fpu=neon-fp16")))
|
||||
foo (float32x4_t arg)
|
||||
{
|
||||
return vcvt_f16_f32 (arg);
|
||||
}
|
Loading…
Reference in New Issue