mips: Improved vectorization support for Loongson and mips3d-ps.
* config/mips/loongson.md (UNSPEC_LOONGSON_PINSR_0, UNSPEC_LOONGSON_PINSR_1, UNSPEC_LOONGSON_PINSR_2, UNSPEC_LOONGSON_PINSR_3): Replace with... (UNSPEC_LOONGSON_PINSRH): ... this. (UNSPEC_LOONGSON_VINIT): New. (UNSPEC_LOONGSON_DSLL, UNSPEC_LOONGSON_DSRL): New. (VWB): New mode iterator. (V_inner): New mode attribute. (loongson_vec_init1_<VHB>): New. (*vec_concatv2si): New. (and<VWHB>3, ior<VWHB>3, xor<VWHB>3, one_cmpl<VWHB>2): New. (*loongson_nor): New. (loongson_pextrh): Un-macro-ify. (loongson_pmaddhw): Likewise. (smaxv4hi3, umaxv8qi3, sminv4hi3, uminv8qi3): Likewise. (loongson_pinsrh_0): Represent with vec_select+vec_concat. (loongson_pinsrh_1, loongson_pinsrh_2, loongson_pinsrh_3): Likewise. (*vec_setv4hi, vec_setv4hi): New. (sdot_prodv4hi): New. (smax<VWB>3, smin<VWB>3): New. (reduc_uplus_v8qi): New. (loongson_pshufh): Remove destination matching input. (ashl<VWH>3, ashr<VWH>3, lshr<VWH>3): Fix type attribute. (vec_interleave_high<VWHB>, vec_interleave_low<VWHB>): Remove. (loongson_punpckhbh, loongson_punpckhhw, loongson_punpckhhw_qi, loongson_punpckhwd, loongson_punpckhwd_qi, loongson_punpckhwd_hi, loongson_punpcklbh, loongson_punpcklhw, loongson_punpcklhw_qi, loongson_punpcklwd, loongson_punpcklwd_qi, loongson_punpcklwd_hi, vec_perm_const<VWHB>, vec_unpacks_lo_<VHB>, vec_unpacks_hi_<VHB>, vec_unpacku_lo_<VHB>, vec_unpacku_hi_<VHB>, vec_shl_<VWHBDI>, vec_shr_<VWHBDI>, reduc_uplus_<VWH>, reduc_splus_<VWHB>, reduc_smax_<VWHB>, reduc_smin_<VWHB>, reduc_umax_<VWHB>, reduc_umin_<VB>): New. * config/mips/mips-ps-3d.md (vec_perm_const_ps): New. (mips_pul_ps, mips_puu_ps, mips_pll_ps, mips_plu_ps): Expand in terms of vec_perm_const_ps. (vec_perm_constv2sf): New. (vec_initv2sf): Use mips_expand_vector_init. (vec_concatv2sf): Rename from vec_initv2sf_internal. (vec_setv2sf): Use vec_perm_const_ps. (reduc_splus_v2sf, reduc_smin_v2sf, reduc_smax_v2sf): New. * config/mips/loongson.h (pshufh_u, pshufh_s): Don't pass dest to the builtin. * config/mips/mips-modes.def (V16QI, V8HI, V4SI, V4SF): New modes. * config/mips/mips-protos.h: Update. * config/mips/mips.c (mips_get_arg_info): Match V2SFmode, not all MODE_VECTOR_FLOAT. (mips_return_mode_in_fpr_p): Likewise. (mips_cannot_change_mode_class): Allow 8-byte integral mode changes. (CODE_FOR_loongson_punpckhbh, CODE_FOR_loongson_punpckhhw, CODE_FOR_loongson_punpckhwd, CODE_FOR_loongson_punpcklbh, CODE_FOR_loongson_punpcklhw, CODE_FOR_loongson_punpcklwd): Remove. (mips_builtins): Remove first operand for loongson pshufh builtins. (MAX_VECT_LEN, struct expand_vec_perm_d): New. (mips_expand_vselect, mips_expand_vselect_vconcat, mips_expand_vpc_loongson_even_odd, mips_expand_vpc_loongson_pshufh, mips_expand_vpc_loongson_bcast, mips_expand_vec_perm_const_1, mips_expand_vec_perm_const, mips_vectorize_vec_perm_const_ok, mips_expand_vec_unpack, mips_constant_elt_p, mips_expand_vi_broadcast, mips_expand_vi_constant, mips_expand_vi_loongson_one_pinsrh, mips_expand_vi_general, mips_expand_vec_reduc, mips_expand_vec_minmax, TARGET_VECTORIZE_VEC_PERM_CONST_OK): New. (mips_expand_vector_init): Rewrite. * config/mips/predicates.md (const_2_or_3_operand): New. (const_0_to_3_operand): New. From-SVN: r182662
This commit is contained in:
parent
7a37d6eaff
commit
7dab511cf3
@ -1,3 +1,71 @@
|
||||
2011-12-23 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/mips/loongson.md (UNSPEC_LOONGSON_PINSR_0,
|
||||
UNSPEC_LOONGSON_PINSR_1, UNSPEC_LOONGSON_PINSR_2,
|
||||
UNSPEC_LOONGSON_PINSR_3): Replace with...
|
||||
(UNSPEC_LOONGSON_PINSRH): ... this.
|
||||
(UNSPEC_LOONGSON_VINIT): New.
|
||||
(UNSPEC_LOONGSON_DSLL, UNSPEC_LOONGSON_DSRL): New.
|
||||
(VWB): New mode iterator.
|
||||
(V_inner): New mode attribute.
|
||||
(loongson_vec_init1_<VHB>): New.
|
||||
(*vec_concatv2si): New.
|
||||
(and<VWHB>3, ior<VWHB>3, xor<VWHB>3, one_cmpl<VWHB>2): New.
|
||||
(*loongson_nor): New.
|
||||
(loongson_pextrh): Un-macro-ify.
|
||||
(loongson_pmaddhw): Likewise.
|
||||
(smaxv4hi3, umaxv8qi3, sminv4hi3, uminv8qi3): Likewise.
|
||||
(loongson_pinsrh_0): Represent with vec_select+vec_concat.
|
||||
(loongson_pinsrh_1, loongson_pinsrh_2, loongson_pinsrh_3): Likewise.
|
||||
(*vec_setv4hi, vec_setv4hi): New.
|
||||
(sdot_prodv4hi): New.
|
||||
(smax<VWB>3, smin<VWB>3): New.
|
||||
(reduc_uplus_v8qi): New.
|
||||
(loongson_pshufh): Remove destination matching input.
|
||||
(ashl<VWH>3, ashr<VWH>3, lshr<VWH>3): Fix type attribute.
|
||||
(vec_interleave_high<VWHB>, vec_interleave_low<VWHB>): Remove.
|
||||
(loongson_punpckhbh, loongson_punpckhhw, loongson_punpckhhw_qi,
|
||||
loongson_punpckhwd, loongson_punpckhwd_qi, loongson_punpckhwd_hi,
|
||||
loongson_punpcklbh, loongson_punpcklhw, loongson_punpcklhw_qi,
|
||||
loongson_punpcklwd, loongson_punpcklwd_qi, loongson_punpcklwd_hi,
|
||||
vec_perm_const<VWHB>, vec_unpacks_lo_<VHB>, vec_unpacks_hi_<VHB>,
|
||||
vec_unpacku_lo_<VHB>, vec_unpacku_hi_<VHB>, vec_shl_<VWHBDI>,
|
||||
vec_shr_<VWHBDI>, reduc_uplus_<VWH>, reduc_splus_<VWHB>,
|
||||
reduc_smax_<VWHB>, reduc_smin_<VWHB>, reduc_umax_<VWHB>,
|
||||
reduc_umin_<VB>): New.
|
||||
* config/mips/mips-ps-3d.md (vec_perm_const_ps): New.
|
||||
(mips_pul_ps, mips_puu_ps, mips_pll_ps, mips_plu_ps): Expand in
|
||||
terms of vec_perm_const_ps.
|
||||
(vec_perm_constv2sf): New.
|
||||
(vec_initv2sf): Use mips_expand_vector_init.
|
||||
(vec_concatv2sf): Rename from vec_initv2sf_internal.
|
||||
(vec_setv2sf): Use vec_perm_const_ps.
|
||||
(reduc_splus_v2sf, reduc_smin_v2sf, reduc_smax_v2sf): New.
|
||||
* config/mips/loongson.h (pshufh_u, pshufh_s): Don't pass dest to
|
||||
the builtin.
|
||||
* config/mips/mips-modes.def (V16QI, V8HI, V4SI, V4SF): New modes.
|
||||
* config/mips/mips-protos.h: Update.
|
||||
* config/mips/mips.c (mips_get_arg_info): Match V2SFmode, not all
|
||||
MODE_VECTOR_FLOAT.
|
||||
(mips_return_mode_in_fpr_p): Likewise.
|
||||
(mips_cannot_change_mode_class): Allow 8-byte integral mode changes.
|
||||
(CODE_FOR_loongson_punpckhbh, CODE_FOR_loongson_punpckhhw,
|
||||
CODE_FOR_loongson_punpckhwd, CODE_FOR_loongson_punpcklbh,
|
||||
CODE_FOR_loongson_punpcklhw, CODE_FOR_loongson_punpcklwd): Remove.
|
||||
(mips_builtins): Remove first operand for loongson pshufh builtins.
|
||||
(MAX_VECT_LEN, struct expand_vec_perm_d): New.
|
||||
(mips_expand_vselect, mips_expand_vselect_vconcat,
|
||||
mips_expand_vpc_loongson_even_odd, mips_expand_vpc_loongson_pshufh,
|
||||
mips_expand_vpc_loongson_bcast, mips_expand_vec_perm_const_1,
|
||||
mips_expand_vec_perm_const, mips_vectorize_vec_perm_const_ok,
|
||||
mips_expand_vec_unpack, mips_constant_elt_p, mips_expand_vi_broadcast,
|
||||
mips_expand_vi_constant, mips_expand_vi_loongson_one_pinsrh,
|
||||
mips_expand_vi_general, mips_expand_vec_reduc, mips_expand_vec_minmax,
|
||||
TARGET_VECTORIZE_VEC_PERM_CONST_OK): New.
|
||||
(mips_expand_vector_init): Rewrite.
|
||||
* config/mips/predicates.md (const_2_or_3_operand): New.
|
||||
(const_0_to_3_operand): New.
|
||||
|
||||
2011-12-23 Dmitry Plotnikov <dplotnikov@ispras.ru>
|
||||
|
||||
* config/arm/neon.md (float<mode><V_cvtto>2): New.
|
||||
|
@ -449,13 +449,13 @@ psadbh (uint8x8_t s, uint8x8_t t)
|
||||
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||
pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
|
||||
{
|
||||
return __builtin_loongson_pshufh_u (dest, s, order);
|
||||
return __builtin_loongson_pshufh_u (s, order);
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
|
||||
pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
|
||||
{
|
||||
return __builtin_loongson_pshufh_s (dest, s, order);
|
||||
return __builtin_loongson_pshufh_s (s, order);
|
||||
}
|
||||
|
||||
/* Shift left logical. */
|
||||
|
@ -24,10 +24,8 @@
|
||||
UNSPEC_LOONGSON_PCMPEQ
|
||||
UNSPEC_LOONGSON_PCMPGT
|
||||
UNSPEC_LOONGSON_PEXTR
|
||||
UNSPEC_LOONGSON_PINSR_0
|
||||
UNSPEC_LOONGSON_PINSR_1
|
||||
UNSPEC_LOONGSON_PINSR_2
|
||||
UNSPEC_LOONGSON_PINSR_3
|
||||
UNSPEC_LOONGSON_PINSRH
|
||||
UNSPEC_LOONGSON_VINIT
|
||||
UNSPEC_LOONGSON_PMADD
|
||||
UNSPEC_LOONGSON_PMOVMSK
|
||||
UNSPEC_LOONGSON_PMULHU
|
||||
@ -41,6 +39,8 @@
|
||||
UNSPEC_LOONGSON_PUNPCKL
|
||||
UNSPEC_LOONGSON_PADDD
|
||||
UNSPEC_LOONGSON_PSUBD
|
||||
UNSPEC_LOONGSON_DSLL
|
||||
UNSPEC_LOONGSON_DSRL
|
||||
])
|
||||
|
||||
;; Mode iterators and attributes.
|
||||
@ -60,6 +60,9 @@
|
||||
;; 64-bit vectors of words and halfwords.
|
||||
(define_mode_iterator VWH [V2SI V4HI])
|
||||
|
||||
;; 64-bit vectors of words and bytes
|
||||
(define_mode_iterator VWB [V2SI V8QI])
|
||||
|
||||
;; 64-bit vectors of words, halfwords and bytes.
|
||||
(define_mode_iterator VWHB [V2SI V4HI V8QI])
|
||||
|
||||
@ -86,6 +89,9 @@
|
||||
;; but with twice as many elements.
|
||||
(define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")])
|
||||
|
||||
;; Given a vector type T, the inner mode.
|
||||
(define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
|
||||
|
||||
;; The Loongson instruction suffixes corresponding to the conversions
|
||||
;; specified by V_half_width.
|
||||
(define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")])
|
||||
@ -122,6 +128,28 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Helper for vec_init. Initialize element 0 of the output from the input.
|
||||
;; All other elements are undefined.
|
||||
(define_insn "loongson_vec_init1_<mode>"
|
||||
[(set (match_operand:VHB 0 "register_operand" "=f")
|
||||
(unspec:VHB [(truncate:<V_inner>
|
||||
(match_operand:DI 1 "reg_or_0_operand" "Jd"))]
|
||||
UNSPEC_LOONGSON_VINIT))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"dmtc1\t%z1,%0"
|
||||
[(set_attr "move_type" "mtc")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
;; Helper for vec_initv2si.
|
||||
(define_insn "*vec_concatv2si"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=f")
|
||||
(vec_concat:V2SI
|
||||
(match_operand:SI 1 "register_operand" "f")
|
||||
(match_operand:SI 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpcklwd\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
;; Instruction patterns for SIMD instructions.
|
||||
|
||||
;; Pack with signed saturation.
|
||||
@ -200,6 +228,51 @@
|
||||
"pandn\t%0,%1,%2"
|
||||
[(set_attr "type" "fmul")])
|
||||
|
||||
;; Logical AND.
|
||||
(define_insn "and<mode>3"
|
||||
[(set (match_operand:VWHB 0 "register_operand" "=f")
|
||||
(and:VWHB (match_operand:VWHB 1 "register_operand" "f")
|
||||
(match_operand:VWHB 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"and\t%0,%1,%2"
|
||||
[(set_attr "type" "fmul")])
|
||||
|
||||
;; Logical OR.
|
||||
(define_insn "ior<mode>3"
|
||||
[(set (match_operand:VWHB 0 "register_operand" "=f")
|
||||
(ior:VWHB (match_operand:VWHB 1 "register_operand" "f")
|
||||
(match_operand:VWHB 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"or\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
;; Logical XOR.
|
||||
(define_insn "xor<mode>3"
|
||||
[(set (match_operand:VWHB 0 "register_operand" "=f")
|
||||
(xor:VWHB (match_operand:VWHB 1 "register_operand" "f")
|
||||
(match_operand:VWHB 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"xor\t%0,%1,%2"
|
||||
[(set_attr "type" "fmul")])
|
||||
|
||||
;; Logical NOR.
|
||||
(define_insn "*loongson_nor"
|
||||
[(set (match_operand:VWHB 0 "register_operand" "=f")
|
||||
(and:VWHB
|
||||
(not:VWHB (match_operand:VWHB 1 "register_operand" "f"))
|
||||
(not:VWHB (match_operand:VWHB 2 "register_operand" "f"))))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"nor\t%0,%1,%2"
|
||||
[(set_attr "type" "fmul")])
|
||||
|
||||
;; Logical NOT.
|
||||
(define_insn "one_cmpl<mode>2"
|
||||
[(set (match_operand:VWHB 0 "register_operand" "=f")
|
||||
(not:VWHB (match_operand:VWHB 1 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"nor\t%0,%1,%1"
|
||||
[(set_attr "type" "fmul")])
|
||||
|
||||
;; Average.
|
||||
(define_insn "loongson_pavg<V_suffix>"
|
||||
[(set (match_operand:VHB 0 "register_operand" "=f")
|
||||
@ -231,96 +304,166 @@
|
||||
[(set_attr "type" "fadd")])
|
||||
|
||||
;; Extract halfword.
|
||||
(define_insn "loongson_pextr<V_suffix>"
|
||||
[(set (match_operand:VH 0 "register_operand" "=f")
|
||||
(unspec:VH [(match_operand:VH 1 "register_operand" "f")
|
||||
(match_operand:SI 2 "register_operand" "f")]
|
||||
(define_insn "loongson_pextrh"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:SI 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PEXTR))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pextr<V_suffix>\t%0,%1,%2"
|
||||
[(set_attr "type" "fmul")])
|
||||
"pextrh\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
;; Insert halfword.
|
||||
(define_insn "loongson_pinsr<V_suffix>_0"
|
||||
[(set (match_operand:VH 0 "register_operand" "=f")
|
||||
(unspec:VH [(match_operand:VH 1 "register_operand" "f")
|
||||
(match_operand:VH 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PINSR_0))]
|
||||
(define_insn "loongson_pinsrh_0"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 4) (const_int 1)
|
||||
(const_int 2) (const_int 3)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pinsr<V_suffix>_0\t%0,%1,%2"
|
||||
"pinsrh_0\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_insn "loongson_pinsr<V_suffix>_1"
|
||||
[(set (match_operand:VH 0 "register_operand" "=f")
|
||||
(unspec:VH [(match_operand:VH 1 "register_operand" "f")
|
||||
(match_operand:VH 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PINSR_1))]
|
||||
(define_insn "loongson_pinsrh_1"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 0) (const_int 4)
|
||||
(const_int 2) (const_int 3)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pinsr<V_suffix>_1\t%0,%1,%2"
|
||||
"pinsrh_1\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_insn "loongson_pinsr<V_suffix>_2"
|
||||
[(set (match_operand:VH 0 "register_operand" "=f")
|
||||
(unspec:VH [(match_operand:VH 1 "register_operand" "f")
|
||||
(match_operand:VH 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PINSR_2))]
|
||||
(define_insn "loongson_pinsrh_2"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 0) (const_int 1)
|
||||
(const_int 4) (const_int 3)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pinsr<V_suffix>_2\t%0,%1,%2"
|
||||
"pinsrh_2\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_insn "loongson_pinsr<V_suffix>_3"
|
||||
[(set (match_operand:VH 0 "register_operand" "=f")
|
||||
(unspec:VH [(match_operand:VH 1 "register_operand" "f")
|
||||
(match_operand:VH 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PINSR_3))]
|
||||
(define_insn "loongson_pinsrh_3"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 0) (const_int 1)
|
||||
(const_int 2) (const_int 4)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pinsr<V_suffix>_3\t%0,%1,%2"
|
||||
"pinsrh_3\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_insn "*vec_setv4hi"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:SI 2 "register_operand" "f")
|
||||
(match_operand:SI 3 "const_0_to_3_operand" "")]
|
||||
UNSPEC_LOONGSON_PINSRH))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pinsrh_%3\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_expand "vec_setv4hi"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:HI 2 "register_operand" "f")
|
||||
(match_operand:SI 3 "const_0_to_3_operand" "")]
|
||||
UNSPEC_LOONGSON_PINSRH))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
rtx ext = gen_reg_rtx (SImode);
|
||||
emit_move_insn (ext, gen_lowpart (SImode, operands[1]));
|
||||
operands[1] = ext;
|
||||
})
|
||||
|
||||
;; Multiply and add packed integers.
|
||||
(define_insn "loongson_pmadd<V_stretch_half_suffix>"
|
||||
[(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
|
||||
(unspec:<V_stretch_half> [(match_operand:VH 1 "register_operand" "f")
|
||||
(match_operand:VH 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PMADD))]
|
||||
(define_insn "loongson_pmaddhw"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=f")
|
||||
(unspec:V2SI [(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PMADD))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pmadd<V_stretch_half_suffix>\t%0,%1,%2"
|
||||
"pmaddhw\t%0,%1,%2"
|
||||
[(set_attr "type" "fmul")])
|
||||
|
||||
;; Maximum of signed halfwords.
|
||||
(define_insn "smax<mode>3"
|
||||
[(set (match_operand:VH 0 "register_operand" "=f")
|
||||
(smax:VH (match_operand:VH 1 "register_operand" "f")
|
||||
(match_operand:VH 2 "register_operand" "f")))]
|
||||
(define_expand "sdot_prodv4hi"
|
||||
[(match_operand:V2SI 0 "register_operand" "")
|
||||
(match_operand:V4HI 1 "register_operand" "")
|
||||
(match_operand:V4HI 2 "register_operand" "")
|
||||
(match_operand:V2SI 3 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pmaxs<V_suffix>\t%0,%1,%2"
|
||||
{
|
||||
rtx t = gen_reg_rtx (V2SImode);
|
||||
emit_insn (gen_loongson_pmaddhw (t, operands[1], operands[2]));
|
||||
emit_insn (gen_addv2si3 (operands[0], t, operands[3]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Maximum of signed halfwords.
|
||||
(define_insn "smaxv4hi3"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(smax:V4HI (match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pmaxsh\t%0,%1,%2"
|
||||
[(set_attr "type" "fadd")])
|
||||
|
||||
;; Maximum of unsigned bytes.
|
||||
(define_insn "umax<mode>3"
|
||||
[(set (match_operand:VB 0 "register_operand" "=f")
|
||||
(umax:VB (match_operand:VB 1 "register_operand" "f")
|
||||
(match_operand:VB 2 "register_operand" "f")))]
|
||||
(define_expand "smax<mode>3"
|
||||
[(match_operand:VWB 0 "register_operand" "")
|
||||
(match_operand:VWB 1 "register_operand" "")
|
||||
(match_operand:VWB 2 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pmaxu<V_suffix>\t%0,%1,%2"
|
||||
{
|
||||
mips_expand_vec_minmax (operands[0], operands[1], operands[2],
|
||||
gen_loongson_pcmpgt<V_suffix>, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Maximum of unsigned bytes.
|
||||
(define_insn "umaxv8qi3"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=f")
|
||||
(umax:V8QI (match_operand:V8QI 1 "register_operand" "f")
|
||||
(match_operand:V8QI 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pmaxub\t%0,%1,%2"
|
||||
[(set_attr "type" "fadd")])
|
||||
|
||||
;; Minimum of signed halfwords.
|
||||
(define_insn "smin<mode>3"
|
||||
[(set (match_operand:VH 0 "register_operand" "=f")
|
||||
(smin:VH (match_operand:VH 1 "register_operand" "f")
|
||||
(match_operand:VH 2 "register_operand" "f")))]
|
||||
(define_insn "sminv4hi3"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(smin:V4HI (match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pmins<V_suffix>\t%0,%1,%2"
|
||||
"pminsh\t%0,%1,%2"
|
||||
[(set_attr "type" "fadd")])
|
||||
|
||||
;; Minimum of unsigned bytes.
|
||||
(define_insn "umin<mode>3"
|
||||
[(set (match_operand:VB 0 "register_operand" "=f")
|
||||
(umin:VB (match_operand:VB 1 "register_operand" "f")
|
||||
(match_operand:VB 2 "register_operand" "f")))]
|
||||
(define_expand "smin<mode>3"
|
||||
[(match_operand:VWB 0 "register_operand" "")
|
||||
(match_operand:VWB 1 "register_operand" "")
|
||||
(match_operand:VWB 2 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pminu<V_suffix>\t%0,%1,%2"
|
||||
{
|
||||
mips_expand_vec_minmax (operands[0], operands[1], operands[2],
|
||||
gen_loongson_pcmpgt<V_suffix>, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Minimum of unsigned bytes.
|
||||
(define_insn "uminv8qi3"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=f")
|
||||
(umin:V8QI (match_operand:V8QI 1 "register_operand" "f")
|
||||
(match_operand:V8QI 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pminub\t%0,%1,%2"
|
||||
[(set_attr "type" "fadd")])
|
||||
|
||||
;; Move byte mask.
|
||||
@ -390,6 +533,14 @@
|
||||
"biadd\t%0,%1"
|
||||
[(set_attr "type" "fabs")])
|
||||
|
||||
(define_insn "reduc_uplus_v8qi"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=f")
|
||||
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_BIADD))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"biadd\t%0,%1"
|
||||
[(set_attr "type" "fabs")])
|
||||
|
||||
;; Sum of absolute differences.
|
||||
(define_insn "loongson_psadbh"
|
||||
[(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
|
||||
@ -403,12 +554,11 @@
|
||||
;; Shuffle halfwords.
|
||||
(define_insn "loongson_pshufh"
|
||||
[(set (match_operand:VH 0 "register_operand" "=f")
|
||||
(unspec:VH [(match_operand:VH 1 "register_operand" "0")
|
||||
(match_operand:VH 2 "register_operand" "f")
|
||||
(match_operand:SI 3 "register_operand" "f")]
|
||||
(unspec:VH [(match_operand:VH 1 "register_operand" "f")
|
||||
(match_operand:SI 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PSHUFH))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"pshufh\t%0,%2,%3"
|
||||
"pshufh\t%0,%1,%2"
|
||||
[(set_attr "type" "fmul")])
|
||||
|
||||
;; Shift left logical.
|
||||
@ -418,7 +568,7 @@
|
||||
(match_operand:SI 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"psll<V_suffix>\t%0,%1,%2"
|
||||
[(set_attr "type" "fmul")])
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
;; Shift right arithmetic.
|
||||
(define_insn "ashr<mode>3"
|
||||
@ -427,7 +577,7 @@
|
||||
(match_operand:SI 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"psra<V_suffix>\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
;; Shift right logical.
|
||||
(define_insn "lshr<mode>3"
|
||||
@ -436,7 +586,7 @@
|
||||
(match_operand:SI 2 "register_operand" "f")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"psrl<V_suffix>\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
;; Subtraction, treating overflow by wraparound.
|
||||
(define_insn "sub<mode>3"
|
||||
@ -478,26 +628,286 @@
|
||||
"psubus<V_suffix>\t%0,%1,%2"
|
||||
[(set_attr "type" "fadd")])
|
||||
|
||||
;; Unpack high data.
|
||||
(define_insn "vec_interleave_high<mode>"
|
||||
[(set (match_operand:VWHB 0 "register_operand" "=f")
|
||||
(unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
|
||||
(match_operand:VWHB 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PUNPCKH))]
|
||||
;; Unpack high data. Recall that Loongson only runs in little-endian.
|
||||
(define_insn "loongson_punpckhbh"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=f")
|
||||
(vec_select:V8QI
|
||||
(vec_concat:V16QI
|
||||
(match_operand:V8QI 1 "register_operand" "f")
|
||||
(match_operand:V8QI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 4) (const_int 12)
|
||||
(const_int 5) (const_int 13)
|
||||
(const_int 6) (const_int 14)
|
||||
(const_int 7) (const_int 15)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpckh<V_stretch_half_suffix>\t%0,%1,%2"
|
||||
"punpckhbh\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
;; Unpack low data.
|
||||
(define_insn "vec_interleave_low<mode>"
|
||||
[(set (match_operand:VWHB 0 "register_operand" "=f")
|
||||
(unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
|
||||
(match_operand:VWHB 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_PUNPCKL))]
|
||||
(define_insn "loongson_punpckhhw"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 2) (const_int 6)
|
||||
(const_int 3) (const_int 7)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpckl<V_stretch_half_suffix>\t%0,%1,%2"
|
||||
"punpckhhw\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_insn "loongson_punpckhhw_qi"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=f")
|
||||
(vec_select:V8QI
|
||||
(vec_concat:V16QI
|
||||
(match_operand:V8QI 1 "register_operand" "f")
|
||||
(match_operand:V8QI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 4) (const_int 5)
|
||||
(const_int 12) (const_int 13)
|
||||
(const_int 6) (const_int 7)
|
||||
(const_int 14) (const_int 15)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpckhhw\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_insn "loongson_punpckhwd"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=f")
|
||||
(vec_select:V2SI
|
||||
(vec_concat:V4SI
|
||||
(match_operand:V2SI 1 "register_operand" "f")
|
||||
(match_operand:V2SI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 1) (const_int 3)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpckhwd\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
(define_insn "loongson_punpckhwd_qi"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=f")
|
||||
(vec_select:V8QI
|
||||
(vec_concat:V16QI
|
||||
(match_operand:V8QI 1 "register_operand" "f")
|
||||
(match_operand:V8QI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 4) (const_int 5)
|
||||
(const_int 6) (const_int 7)
|
||||
(const_int 12) (const_int 13)
|
||||
(const_int 14) (const_int 15)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpckhwd\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
(define_insn "loongson_punpckhwd_hi"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 2) (const_int 3)
|
||||
(const_int 6) (const_int 7)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpckhwd\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
;; Unpack low data.
|
||||
(define_insn "loongson_punpcklbh"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=f")
|
||||
(vec_select:V8QI
|
||||
(vec_concat:V16QI
|
||||
(match_operand:V8QI 1 "register_operand" "f")
|
||||
(match_operand:V8QI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 0) (const_int 8)
|
||||
(const_int 1) (const_int 9)
|
||||
(const_int 2) (const_int 10)
|
||||
(const_int 3) (const_int 11)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpcklbh\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_insn "loongson_punpcklhw"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 0) (const_int 4)
|
||||
(const_int 1) (const_int 5)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpcklhw\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_insn "*loongson_punpcklhw_qi"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=f")
|
||||
(vec_select:V8QI
|
||||
(vec_concat:V16QI
|
||||
(match_operand:V8QI 1 "register_operand" "f")
|
||||
(match_operand:V8QI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 0) (const_int 1)
|
||||
(const_int 8) (const_int 9)
|
||||
(const_int 2) (const_int 3)
|
||||
(const_int 10) (const_int 11)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpcklhw\t%0,%1,%2"
|
||||
[(set_attr "type" "fdiv")])
|
||||
|
||||
(define_insn "loongson_punpcklwd"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=f")
|
||||
(vec_select:V2SI
|
||||
(vec_concat:V4SI
|
||||
(match_operand:V2SI 1 "register_operand" "f")
|
||||
(match_operand:V2SI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 0) (const_int 2)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpcklwd\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
(define_insn "*loongson_punpcklwd_qi"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=f")
|
||||
(vec_select:V8QI
|
||||
(vec_concat:V16QI
|
||||
(match_operand:V8QI 1 "register_operand" "f")
|
||||
(match_operand:V8QI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 0) (const_int 1)
|
||||
(const_int 2) (const_int 3)
|
||||
(const_int 8) (const_int 9)
|
||||
(const_int 10) (const_int 11)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpcklwd\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
(define_insn "*loongson_punpcklwd_hi"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=f")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
(match_operand:V4HI 1 "register_operand" "f")
|
||||
(match_operand:V4HI 2 "register_operand" "f"))
|
||||
(parallel [(const_int 0) (const_int 1)
|
||||
(const_int 4) (const_int 5)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"punpcklwd\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
(define_expand "vec_perm_const<mode>"
|
||||
[(match_operand:VWHB 0 "register_operand" "")
|
||||
(match_operand:VWHB 1 "register_operand" "")
|
||||
(match_operand:VWHB 2 "register_operand" "")
|
||||
(match_operand:VWHB 3 "" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
if (mips_expand_vec_perm_const (operands))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_lo_<mode>"
|
||||
[(match_operand:<V_stretch_half> 0 "register_operand" "")
|
||||
(match_operand:VHB 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
mips_expand_vec_unpack (operands, false, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_hi_<mode>"
|
||||
[(match_operand:<V_stretch_half> 0 "register_operand" "")
|
||||
(match_operand:VHB 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
mips_expand_vec_unpack (operands, false, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_lo_<mode>"
|
||||
[(match_operand:<V_stretch_half> 0 "register_operand" "")
|
||||
(match_operand:VHB 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
mips_expand_vec_unpack (operands, true, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_hi_<mode>"
|
||||
[(match_operand:<V_stretch_half> 0 "register_operand" "")
|
||||
(match_operand:VHB 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
mips_expand_vec_unpack (operands, true, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Whole vector shifts, used for reduction epilogues.
|
||||
(define_insn "vec_shl_<mode>"
|
||||
[(set (match_operand:VWHBDI 0 "register_operand" "=f")
|
||||
(unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f")
|
||||
(match_operand:SI 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_DSLL))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"dsll\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
(define_insn "vec_shr_<mode>"
|
||||
[(set (match_operand:VWHBDI 0 "register_operand" "=f")
|
||||
(unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f")
|
||||
(match_operand:SI 2 "register_operand" "f")]
|
||||
UNSPEC_LOONGSON_DSRL))]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
"dsrl\t%0,%1,%2"
|
||||
[(set_attr "type" "fcvt")])
|
||||
|
||||
(define_expand "reduc_uplus_<mode>"
|
||||
[(match_operand:VWH 0 "register_operand" "")
|
||||
(match_operand:VWH 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
mips_expand_vec_reduc (operands[0], operands[1], gen_add<mode>3);
|
||||
DONE;
|
||||
})
|
||||
|
||||
; ??? Given that we're not describing a widening reduction, we should
|
||||
; not have separate optabs for signed and unsigned.
|
||||
(define_expand "reduc_splus_<mode>"
|
||||
[(match_operand:VWHB 0 "register_operand" "")
|
||||
(match_operand:VWHB 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
emit_insn (gen_reduc_uplus_<mode>(operands[0], operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_smax_<mode>"
|
||||
[(match_operand:VWHB 0 "register_operand" "")
|
||||
(match_operand:VWHB 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
mips_expand_vec_reduc (operands[0], operands[1], gen_smax<mode>3);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_smin_<mode>"
|
||||
[(match_operand:VWHB 0 "register_operand" "")
|
||||
(match_operand:VWHB 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
mips_expand_vec_reduc (operands[0], operands[1], gen_smin<mode>3);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_umax_<mode>"
|
||||
[(match_operand:VB 0 "register_operand" "")
|
||||
(match_operand:VB 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
mips_expand_vec_reduc (operands[0], operands[1], gen_umax<mode>3);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_umin_<mode>"
|
||||
[(match_operand:VB 0 "register_operand" "")
|
||||
(match_operand:VB 1 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
|
||||
{
|
||||
mips_expand_vec_reduc (operands[0], operands[1], gen_umin<mode>3);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Integer division and modulus. For integer multiplication, see mips.md.
|
||||
|
||||
(define_insn "<u>div<mode>3"
|
||||
|
@ -26,9 +26,15 @@ RESET_FLOAT_FORMAT (DF, mips_double_format);
|
||||
FLOAT_MODE (TF, 16, mips_quad_format);
|
||||
|
||||
/* Vector modes. */
|
||||
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
|
||||
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
|
||||
VECTOR_MODES (INT, 4); /* V4QI V2HI */
|
||||
VECTOR_MODES (INT, 4); /* V4QI V2HI */
|
||||
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
|
||||
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
|
||||
|
||||
/* Double-sized vector modes for vec_concat. */
|
||||
VECTOR_MODE (INT, QI, 16); /* V16QI */
|
||||
VECTOR_MODE (INT, HI, 8); /* V8HI */
|
||||
VECTOR_MODE (INT, SI, 4); /* V4SI */
|
||||
VECTOR_MODE (FLOAT, SF, 4); /* V4SF */
|
||||
|
||||
VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */
|
||||
VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */
|
||||
|
@ -328,6 +328,11 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs,
|
||||
rtx, rtx, rtx, rtx);
|
||||
|
||||
extern void mips_expand_vector_init (rtx, rtx);
|
||||
extern bool mips_expand_vec_perm_const (rtx op[4]);
|
||||
extern void mips_expand_vec_unpack (rtx op[2], bool, bool);
|
||||
extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx));
|
||||
extern void mips_expand_vec_minmax (rtx, rtx, rtx,
|
||||
rtx (*) (rtx, rtx, rtx), bool);
|
||||
|
||||
extern bool mips_eh_uses (unsigned int);
|
||||
extern bool mips_epilogue_uses (unsigned int);
|
||||
|
@ -89,61 +89,169 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
; pul.ps - Pair Upper Lower
|
||||
(define_insn "mips_pul_ps"
|
||||
(define_insn "vec_perm_const_ps"
|
||||
[(set (match_operand:V2SF 0 "register_operand" "=f")
|
||||
(vec_merge:V2SF
|
||||
(match_operand:V2SF 1 "register_operand" "f")
|
||||
(match_operand:V2SF 2 "register_operand" "f")
|
||||
(const_int 2)))]
|
||||
(vec_select:V2SF
|
||||
(vec_concat:V4SF
|
||||
(match_operand:V2SF 1 "register_operand" "f")
|
||||
(match_operand:V2SF 2 "register_operand" "f"))
|
||||
(parallel [(match_operand:SI 3 "const_0_or_1_operand" "")
|
||||
(match_operand:SI 4 "const_2_or_3_operand" "")])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
"pul.ps\t%0,%1,%2"
|
||||
{
|
||||
/* Let <op>L be the lower part of operand <op> and <op>U be the upper part.
|
||||
The P[UL][UL].PS instruction always specifies the upper part of the
|
||||
result first, so the instruction is:
|
||||
|
||||
P<aUL><bUL>.PS %0,<aop>,<bop>
|
||||
|
||||
where 0U == <aop><aUL> and 0L == <bop><bUL>.
|
||||
|
||||
GCC's vector indices are specified in memory order, which means
|
||||
that vector element 0 is the lower part (L) on little-endian targets
|
||||
and the upper part (U) on big-endian targets. vec_concat likewise
|
||||
concatenates in memory order, which means that operand 3 (being
|
||||
0 or 1) selects part of operand 1 and operand 4 (being 2 or 3)
|
||||
selects part of operand 2.
|
||||
|
||||
Let:
|
||||
|
||||
I3 = INTVAL (operands[3])
|
||||
I4 = INTVAL (operands[4]) - 2
|
||||
|
||||
Taking the two endiannesses in turn:
|
||||
|
||||
Little-endian:
|
||||
|
||||
The semantics of the RTL pattern are:
|
||||
|
||||
{ 0L, 0U } = { X[I3], X[I4 + 2] }, where X = { 1L, 1U, 2L, 2U }
|
||||
|
||||
so: 0L = { 1L, 1U }[I3] (= <bop><bUL>)
|
||||
0U = { 2L, 2U }[I4] (= <aop><aUL>)
|
||||
|
||||
<aop> = 2, <aUL> = I4 ? U : L
|
||||
<bop> = 1, <bUL> = I3 ? U : L
|
||||
|
||||
[LL] !I4 && !I3 [UL] I4 && !I3
|
||||
[LU] !I4 && I3 [UU] I4 && I3
|
||||
|
||||
Big-endian:
|
||||
|
||||
The semantics of the RTL pattern are:
|
||||
|
||||
{ 0U, 0L } = { X[I3], X[I4 + 2] }, where X = { 1U, 1L, 2U, 2L }
|
||||
|
||||
so: 0U = { 1U, 1L }[I3] (= <aop><aUL>)
|
||||
0L = { 2U, 2L }[I4] (= <bop><bUL>)
|
||||
|
||||
<aop> = 1, <aUL> = I3 ? L : U
|
||||
<bop> = 2, <bUL> = I4 ? L : U
|
||||
|
||||
[UU] !I3 && !I4 [UL] !I3 && I4
|
||||
[LU] I3 && !I4 [LL] I3 && I4. */
|
||||
|
||||
static const char * const mnemonics[2][4] = {
|
||||
/* LE */ { "pll.ps\t%0,%2,%1", "pul.ps\t%0,%2,%1",
|
||||
"plu.ps\t%0,%2,%1", "puu.ps\t%0,%2,%1" },
|
||||
/* BE */ { "puu.ps\t%0,%1,%2", "pul.ps\t%0,%1,%2",
|
||||
"plu.ps\t%0,%1,%2", "pll.ps\t%0,%1,%2" },
|
||||
};
|
||||
|
||||
unsigned mask = INTVAL (operands[3]) * 2 + (INTVAL (operands[4]) - 2);
|
||||
return mnemonics[BYTES_BIG_ENDIAN][mask];
|
||||
}
|
||||
[(set_attr "type" "fmove")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
; puu.ps - Pair upper upper
|
||||
(define_insn "mips_puu_ps"
|
||||
[(set (match_operand:V2SF 0 "register_operand" "=f")
|
||||
(vec_merge:V2SF
|
||||
(match_operand:V2SF 1 "register_operand" "f")
|
||||
(vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f")
|
||||
(parallel [(const_int 1)
|
||||
(const_int 0)]))
|
||||
(const_int 2)))]
|
||||
(define_expand "vec_perm_constv2sf"
|
||||
[(match_operand:V2SF 0 "register_operand" "")
|
||||
(match_operand:V2SF 1 "register_operand" "")
|
||||
(match_operand:V2SF 2 "register_operand" "")
|
||||
(match_operand:V2SI 3 "" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
"puu.ps\t%0,%1,%2"
|
||||
[(set_attr "type" "fmove")
|
||||
(set_attr "mode" "SF")])
|
||||
{
|
||||
if (mips_expand_vec_perm_const (operands))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
; pll.ps - Pair Lower Lower
|
||||
(define_insn "mips_pll_ps"
|
||||
[(set (match_operand:V2SF 0 "register_operand" "=f")
|
||||
(vec_merge:V2SF
|
||||
(vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f")
|
||||
(parallel [(const_int 1)
|
||||
(const_int 0)]))
|
||||
(match_operand:V2SF 2 "register_operand" "f")
|
||||
(const_int 2)))]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
"pll.ps\t%0,%1,%2"
|
||||
[(set_attr "type" "fmove")
|
||||
(set_attr "mode" "SF")])
|
||||
;; Expanders for builtins. The instruction:
|
||||
;;
|
||||
;; P[UL][UL].PS <result>, <a>, <b>
|
||||
;;
|
||||
;; says that the upper part of <result> is taken from half of <a> and
|
||||
;; the lower part of <result> is taken from half of <b>. This means
|
||||
;; that the P[UL][UL].PS operand order matches memory order on big-endian
|
||||
;; targets; <a> is element 0 of the V2SF result while <b> is element 1.
|
||||
;; However, the P[UL][UL].PS operand order is the reverse of memory order
|
||||
;; on little-endian targets; <a> is element 1 of the V2SF result while
|
||||
;; <b> is element 0. The arguments to vec_perm_const_ps are always in
|
||||
;; memory order.
|
||||
;;
|
||||
;; Similarly, "U" corresponds to element 0 on big-endian targets but
|
||||
;; to element 1 on little-endian targets.
|
||||
|
||||
; plu.ps - Pair Lower Upper
|
||||
(define_insn "mips_plu_ps"
|
||||
[(set (match_operand:V2SF 0 "register_operand" "=f")
|
||||
(vec_merge:V2SF
|
||||
(vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f")
|
||||
(parallel [(const_int 1)
|
||||
(const_int 0)]))
|
||||
(vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f")
|
||||
(parallel [(const_int 1)
|
||||
(const_int 0)]))
|
||||
(const_int 2)))]
|
||||
(define_expand "mips_puu_ps"
|
||||
[(match_operand:V2SF 0 "register_operand" "")
|
||||
(match_operand:V2SF 1 "register_operand" "")
|
||||
(match_operand:V2SF 2 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
"plu.ps\t%0,%1,%2"
|
||||
[(set_attr "type" "fmove")
|
||||
(set_attr "mode" "SF")])
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
|
||||
const0_rtx, const2_rtx));
|
||||
else
|
||||
emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
|
||||
const1_rtx, GEN_INT (3)));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "mips_pul_ps"
|
||||
[(match_operand:V2SF 0 "register_operand" "")
|
||||
(match_operand:V2SF 1 "register_operand" "")
|
||||
(match_operand:V2SF 2 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
|
||||
const0_rtx, GEN_INT (3)));
|
||||
else
|
||||
emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
|
||||
const0_rtx, GEN_INT (3)));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "mips_plu_ps"
|
||||
[(match_operand:V2SF 0 "register_operand" "")
|
||||
(match_operand:V2SF 1 "register_operand" "")
|
||||
(match_operand:V2SF 2 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
|
||||
const1_rtx, const2_rtx));
|
||||
else
|
||||
emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
|
||||
const1_rtx, const2_rtx));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "mips_pll_ps"
|
||||
[(match_operand:V2SF 0 "register_operand" "")
|
||||
(match_operand:V2SF 1 "register_operand" "")
|
||||
(match_operand:V2SF 2 "register_operand" "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
|
||||
const1_rtx, GEN_INT (3)));
|
||||
else
|
||||
emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
|
||||
const0_rtx, const2_rtx));
|
||||
DONE;
|
||||
})
|
||||
|
||||
; vec_init
|
||||
(define_expand "vec_initv2sf"
|
||||
@ -151,13 +259,11 @@
|
||||
(match_operand:V2SF 1 "")]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
{
|
||||
rtx op0 = force_reg (SFmode, XVECEXP (operands[1], 0, 0));
|
||||
rtx op1 = force_reg (SFmode, XVECEXP (operands[1], 0, 1));
|
||||
emit_insn (gen_vec_initv2sf_internal (operands[0], op0, op1));
|
||||
mips_expand_vector_init (operands[0], operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "vec_initv2sf_internal"
|
||||
(define_insn "vec_concatv2sf"
|
||||
[(set (match_operand:V2SF 0 "register_operand" "=f")
|
||||
(vec_concat:V2SF
|
||||
(match_operand:SF 1 "register_operand" "f")
|
||||
@ -195,22 +301,21 @@
|
||||
;; no other way to get a vector mode bitfield store currently.
|
||||
|
||||
(define_expand "vec_setv2sf"
|
||||
[(match_operand:V2SF 0 "register_operand")
|
||||
(match_operand:SF 1 "register_operand")
|
||||
(match_operand 2 "const_0_or_1_operand")]
|
||||
[(set (match_operand:V2SF 0 "register_operand" "")
|
||||
(vec_select:V2SF
|
||||
(vec_concat:V4SF
|
||||
(match_operand:SF 1 "register_operand" "")
|
||||
(match_dup 0))
|
||||
(parallel [(match_operand 2 "const_0_or_1_operand" "")
|
||||
(match_dup 3)])))]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
{
|
||||
rtx temp;
|
||||
|
||||
/* We don't have an insert instruction, so we duplicate the float, and
|
||||
then use a PUL instruction. */
|
||||
temp = gen_reg_rtx (V2SFmode);
|
||||
emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1]));
|
||||
if (INTVAL (operands[2]) == !BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_mips_pul_ps (operands[0], temp, operands[0]));
|
||||
else
|
||||
emit_insn (gen_mips_pul_ps (operands[0], operands[0], temp));
|
||||
DONE;
|
||||
rtx temp = gen_reg_rtx (V2SFmode);
|
||||
emit_insn (gen_vec_concatv2sf (temp, operands[1], operands[1]));
|
||||
operands[1] = temp;
|
||||
operands[3] = GEN_INT (1 - INTVAL (operands[2]) + 2);
|
||||
})
|
||||
|
||||
; cvt.ps.s - Floating Point Convert Pair to Paired Single
|
||||
@ -221,11 +326,9 @@
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_vec_initv2sf_internal (operands[0], operands[1],
|
||||
operands[2]));
|
||||
emit_insn (gen_vec_concatv2sf (operands[0], operands[1], operands[2]));
|
||||
else
|
||||
emit_insn (gen_vec_initv2sf_internal (operands[0], operands[2],
|
||||
operands[1]));
|
||||
emit_insn (gen_vec_concatv2sf (operands[0], operands[2], operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
@ -268,6 +371,14 @@
|
||||
[(set_attr "type" "fadd")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "reduc_splus_v2sf"
|
||||
[(set (match_operand:V2SF 0 "register_operand" "=f")
|
||||
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")
|
||||
(match_dup 1)]
|
||||
UNSPEC_ADDR_PS))]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
"")
|
||||
|
||||
; cvt.pw.ps - Floating Point Convert Paired Single to Paired Word
|
||||
(define_insn "mips_cvt_pw_ps"
|
||||
[(set (match_operand:V2SF 0 "register_operand" "=f")
|
||||
@ -633,3 +744,21 @@
|
||||
LE, operands[2], operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_smin_v2sf"
|
||||
[(match_operand:V2SF 0 "register_operand")
|
||||
(match_operand:V2SF 1 "register_operand")]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
{
|
||||
mips_expand_vec_reduc (operands[0], operands[1], gen_sminv2sf3);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_smax_v2sf"
|
||||
[(match_operand:V2SF 0 "register_operand")
|
||||
(match_operand:V2SF 1 "register_operand")]
|
||||
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
|
||||
{
|
||||
mips_expand_vec_reduc (operands[0], operands[1], gen_smaxv2sf3);
|
||||
DONE;
|
||||
})
|
||||
|
@ -4638,7 +4638,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
|
||||
/* The EABI conventions have traditionally been defined in terms
|
||||
of TYPE_MODE, regardless of the actual type. */
|
||||
info->fpr_p = ((GET_MODE_CLASS (mode) == MODE_FLOAT
|
||||
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
|
||||
|| mode == V2SFmode)
|
||||
&& GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
|
||||
break;
|
||||
|
||||
@ -4653,7 +4653,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
|
||||
|| SCALAR_FLOAT_TYPE_P (type)
|
||||
|| VECTOR_FLOAT_TYPE_P (type))
|
||||
&& (GET_MODE_CLASS (mode) == MODE_FLOAT
|
||||
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
|
||||
|| mode == V2SFmode)
|
||||
&& GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
|
||||
break;
|
||||
|
||||
@ -4666,7 +4666,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
|
||||
&& (type == 0 || FLOAT_TYPE_P (type))
|
||||
&& (GET_MODE_CLASS (mode) == MODE_FLOAT
|
||||
|| GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
|
||||
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
|
||||
|| mode == V2SFmode)
|
||||
&& GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FPVALUE);
|
||||
|
||||
/* ??? According to the ABI documentation, the real and imaginary
|
||||
@ -5103,7 +5103,7 @@ static bool
|
||||
mips_return_mode_in_fpr_p (enum machine_mode mode)
|
||||
{
|
||||
return ((GET_MODE_CLASS (mode) == MODE_FLOAT
|
||||
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
|
||||
|| mode == V2SFmode
|
||||
|| GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
|
||||
&& GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_HWFPVALUE);
|
||||
}
|
||||
@ -10782,12 +10782,18 @@ mips_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
|
||||
/* Implement CANNOT_CHANGE_MODE_CLASS. */
|
||||
|
||||
bool
|
||||
mips_cannot_change_mode_class (enum machine_mode from ATTRIBUTE_UNUSED,
|
||||
enum machine_mode to ATTRIBUTE_UNUSED,
|
||||
mips_cannot_change_mode_class (enum machine_mode from,
|
||||
enum machine_mode to,
|
||||
enum reg_class rclass)
|
||||
{
|
||||
/* There are several problems with changing the modes of values in
|
||||
floating-point registers:
|
||||
/* Allow conversions between different Loongson integer vectors,
|
||||
and between those vectors and DImode. */
|
||||
if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8
|
||||
&& INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
|
||||
return false;
|
||||
|
||||
/* Otherwise, there are several problems with changing the modes of
|
||||
values in floating-point registers:
|
||||
|
||||
- When a multi-word value is stored in paired floating-point
|
||||
registers, the first register always holds the low word. We
|
||||
@ -10808,6 +10814,7 @@ mips_cannot_change_mode_class (enum machine_mode from ATTRIBUTE_UNUSED,
|
||||
format.
|
||||
|
||||
We therefore disallow all mode changes involving FPRs. */
|
||||
|
||||
return reg_classes_intersect_p (FP_REGS, rclass);
|
||||
}
|
||||
|
||||
@ -12785,12 +12792,6 @@ AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN)
|
||||
#define CODE_FOR_loongson_psubsb CODE_FOR_sssubv8qi3
|
||||
#define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3
|
||||
#define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3
|
||||
#define CODE_FOR_loongson_punpckhbh CODE_FOR_vec_interleave_highv8qi
|
||||
#define CODE_FOR_loongson_punpckhhw CODE_FOR_vec_interleave_highv4hi
|
||||
#define CODE_FOR_loongson_punpckhwd CODE_FOR_vec_interleave_highv2si
|
||||
#define CODE_FOR_loongson_punpcklbh CODE_FOR_vec_interleave_lowv8qi
|
||||
#define CODE_FOR_loongson_punpcklhw CODE_FOR_vec_interleave_lowv4hi
|
||||
#define CODE_FOR_loongson_punpcklwd CODE_FOR_vec_interleave_lowv2si
|
||||
|
||||
static const struct mips_builtin_description mips_builtins[] = {
|
||||
DIRECT_BUILTIN (pll_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
|
||||
@ -13032,8 +13033,8 @@ static const struct mips_builtin_description mips_builtins[] = {
|
||||
LOONGSON_BUILTIN (pasubub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
|
||||
LOONGSON_BUILTIN (biadd, MIPS_UV4HI_FTYPE_UV8QI),
|
||||
LOONGSON_BUILTIN (psadbh, MIPS_UV4HI_FTYPE_UV8QI_UV8QI),
|
||||
LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI_UQI),
|
||||
LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_V4HI_UQI),
|
||||
LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
|
||||
LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_UQI),
|
||||
LOONGSON_BUILTIN_SUFFIX (psllh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
|
||||
LOONGSON_BUILTIN_SUFFIX (psllh, s, MIPS_V4HI_FTYPE_V4HI_UQI),
|
||||
LOONGSON_BUILTIN_SUFFIX (psllw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI),
|
||||
@ -15923,30 +15924,6 @@ mips_conditional_register_usage (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize vector TARGET to VALS. */
|
||||
|
||||
void
|
||||
mips_expand_vector_init (rtx target, rtx vals)
|
||||
{
|
||||
enum machine_mode mode;
|
||||
enum machine_mode inner;
|
||||
unsigned int i, n_elts;
|
||||
rtx mem;
|
||||
|
||||
mode = GET_MODE (target);
|
||||
inner = GET_MODE_INNER (mode);
|
||||
n_elts = GET_MODE_NUNITS (mode);
|
||||
|
||||
gcc_assert (VECTOR_MODE_P (mode));
|
||||
|
||||
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
|
||||
for (i = 0; i < n_elts; i++)
|
||||
emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
|
||||
XVECEXP (vals, 0, i));
|
||||
|
||||
emit_move_insn (target, mem);
|
||||
}
|
||||
|
||||
/* When generating MIPS16 code, we want to allocate $24 (T_REG) before
|
||||
other registers for instructions for which it is possible. This
|
||||
encourages the compiler to use CMP in cases where an XOR would
|
||||
@ -16357,6 +16334,667 @@ mips_prepare_pch_save (void)
|
||||
mips16_globals = 0;
|
||||
}
|
||||
|
||||
/* Generate or test for an insn that supports a constant permutation. */
|
||||
|
||||
#define MAX_VECT_LEN 8
|
||||
|
||||
struct expand_vec_perm_d
|
||||
{
|
||||
rtx target, op0, op1;
|
||||
unsigned char perm[MAX_VECT_LEN];
|
||||
enum machine_mode vmode;
|
||||
unsigned char nelt;
|
||||
bool one_vector_p;
|
||||
bool testing_p;
|
||||
};
|
||||
|
||||
/* Construct (set target (vec_select op0 (parallel perm))) and
|
||||
return true if that's a valid instruction in the active ISA. */
|
||||
|
||||
static bool
|
||||
mips_expand_vselect (rtx target, rtx op0,
|
||||
const unsigned char *perm, unsigned nelt)
|
||||
{
|
||||
rtx rperm[MAX_VECT_LEN], x;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < nelt; ++i)
|
||||
rperm[i] = GEN_INT (perm[i]);
|
||||
|
||||
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
|
||||
x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
|
||||
x = gen_rtx_SET (VOIDmode, target, x);
|
||||
|
||||
x = emit_insn (x);
|
||||
if (recog_memoized (x) < 0)
|
||||
{
|
||||
remove_insn (x);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Similar, but generate a vec_concat from op0 and op1 as well. */
|
||||
|
||||
static bool
|
||||
mips_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
|
||||
const unsigned char *perm, unsigned nelt)
|
||||
{
|
||||
enum machine_mode v2mode;
|
||||
rtx x;
|
||||
|
||||
v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
|
||||
x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
|
||||
return mips_expand_vselect (target, x, perm, nelt);
|
||||
}
|
||||
|
||||
/* Recognize patterns for even-odd extraction. */
|
||||
|
||||
static bool
|
||||
mips_expand_vpc_loongson_even_odd (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned i, odd, nelt = d->nelt;
|
||||
rtx t0, t1, t2, t3;
|
||||
|
||||
if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
|
||||
return false;
|
||||
/* Even-odd for V2SI/V2SFmode is matched by interleave directly. */
|
||||
if (nelt < 4)
|
||||
return false;
|
||||
|
||||
odd = d->perm[0];
|
||||
if (odd > 1)
|
||||
return false;
|
||||
for (i = 1; i < nelt; ++i)
|
||||
if (d->perm[i] != i * 2 + odd)
|
||||
return false;
|
||||
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
/* We need 2*log2(N)-1 operations to achieve odd/even with interleave. */
|
||||
t0 = gen_reg_rtx (d->vmode);
|
||||
t1 = gen_reg_rtx (d->vmode);
|
||||
switch (d->vmode)
|
||||
{
|
||||
case V4HImode:
|
||||
emit_insn (gen_loongson_punpckhhw (t0, d->op0, d->op1));
|
||||
emit_insn (gen_loongson_punpcklhw (t1, d->op0, d->op1));
|
||||
if (odd)
|
||||
emit_insn (gen_loongson_punpckhhw (d->target, t1, t0));
|
||||
else
|
||||
emit_insn (gen_loongson_punpcklhw (d->target, t1, t0));
|
||||
break;
|
||||
|
||||
case V8QImode:
|
||||
t2 = gen_reg_rtx (d->vmode);
|
||||
t3 = gen_reg_rtx (d->vmode);
|
||||
emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op1));
|
||||
emit_insn (gen_loongson_punpcklbh (t1, d->op0, d->op1));
|
||||
emit_insn (gen_loongson_punpckhbh (t2, t1, t0));
|
||||
emit_insn (gen_loongson_punpcklbh (t3, t1, t0));
|
||||
if (odd)
|
||||
emit_insn (gen_loongson_punpckhbh (d->target, t3, t2));
|
||||
else
|
||||
emit_insn (gen_loongson_punpcklbh (d->target, t3, t2));
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Recognize patterns for the Loongson PSHUFH instruction. */
|
||||
|
||||
static bool
|
||||
mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned i, mask;
|
||||
rtx rmask;
|
||||
|
||||
if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
|
||||
return false;
|
||||
if (d->vmode != V4HImode)
|
||||
return false;
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
/* Convert the selector into the packed 8-bit form for pshufh. */
|
||||
/* Recall that loongson is little-endian only. No big-endian
|
||||
adjustment required. */
|
||||
for (i = mask = 0; i < 4; i++)
|
||||
mask |= (d->perm[i] & 3) << (i * 2);
|
||||
rmask = force_reg (SImode, GEN_INT (mask));
|
||||
|
||||
if (d->one_vector_p)
|
||||
emit_insn (gen_loongson_pshufh (d->target, d->op0, rmask));
|
||||
else
|
||||
{
|
||||
rtx t0, t1, x, merge, rmerge[4];
|
||||
|
||||
t0 = gen_reg_rtx (V4HImode);
|
||||
t1 = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_loongson_pshufh (t1, d->op1, rmask));
|
||||
emit_insn (gen_loongson_pshufh (t0, d->op0, rmask));
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
rmerge[i] = (d->perm[i] & 4 ? constm1_rtx : const0_rtx);
|
||||
merge = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmerge));
|
||||
merge = force_reg (V4HImode, merge);
|
||||
|
||||
x = gen_rtx_AND (V4HImode, merge, t1);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, t1, x));
|
||||
|
||||
x = gen_rtx_NOT (V4HImode, merge);
|
||||
x = gen_rtx_AND (V4HImode, x, t0);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, t0, x));
|
||||
|
||||
x = gen_rtx_IOR (V4HImode, t0, t1);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Recognize broadcast patterns for the Loongson. */
|
||||
|
||||
static bool
|
||||
mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned i, elt;
|
||||
rtx t0, t1;
|
||||
|
||||
if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
|
||||
return false;
|
||||
/* Note that we've already matched V2SI via punpck and V4HI via pshufh. */
|
||||
if (d->vmode != V8QImode)
|
||||
return false;
|
||||
if (!d->one_vector_p)
|
||||
return false;
|
||||
|
||||
elt = d->perm[0];
|
||||
for (i = 1; i < 8; ++i)
|
||||
if (d->perm[i] != elt)
|
||||
return false;
|
||||
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
/* With one interleave we put two of the desired element adjacent. */
|
||||
t0 = gen_reg_rtx (V8QImode);
|
||||
if (elt < 4)
|
||||
emit_insn (gen_loongson_punpcklbh (t0, d->op0, d->op0));
|
||||
else
|
||||
emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op0));
|
||||
|
||||
/* Shuffle that one HImode element into all locations. */
|
||||
elt &= 3;
|
||||
elt *= 0x55;
|
||||
t1 = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_loongson_pshufh (t1, gen_lowpart (V4HImode, t0),
|
||||
force_reg (SImode, GEN_INT (elt))));
|
||||
|
||||
emit_move_insn (d->target, gen_lowpart (V8QImode, t1));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned int i, nelt = d->nelt;
|
||||
unsigned char perm2[MAX_VECT_LEN];
|
||||
|
||||
if (d->one_vector_p)
|
||||
{
|
||||
/* Try interleave with alternating operands. */
|
||||
memcpy (perm2, d->perm, sizeof(perm2));
|
||||
for (i = 1; i < nelt; i += 2)
|
||||
perm2[i] += nelt;
|
||||
if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt))
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1,
|
||||
d->perm, nelt))
|
||||
return true;
|
||||
|
||||
/* Try again with swapped operands. */
|
||||
for (i = 0; i < nelt; ++i)
|
||||
perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
|
||||
if (mips_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
|
||||
return true;
|
||||
}
|
||||
|
||||
if (mips_expand_vpc_loongson_even_odd (d))
|
||||
return true;
|
||||
if (mips_expand_vpc_loongson_pshufh (d))
|
||||
return true;
|
||||
if (mips_expand_vpc_loongson_bcast (d))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Expand a vec_perm_const pattern. */
|
||||
|
||||
bool
|
||||
mips_expand_vec_perm_const (rtx operands[4])
|
||||
{
|
||||
struct expand_vec_perm_d d;
|
||||
int i, nelt, which;
|
||||
unsigned char orig_perm[MAX_VECT_LEN];
|
||||
rtx sel;
|
||||
bool ok;
|
||||
|
||||
d.target = operands[0];
|
||||
d.op0 = operands[1];
|
||||
d.op1 = operands[2];
|
||||
sel = operands[3];
|
||||
|
||||
d.vmode = GET_MODE (d.target);
|
||||
gcc_assert (VECTOR_MODE_P (d.vmode));
|
||||
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||
d.testing_p = false;
|
||||
|
||||
for (i = which = 0; i < nelt; ++i)
|
||||
{
|
||||
rtx e = XVECEXP (sel, 0, i);
|
||||
int ei = INTVAL (e) & (2 * nelt - 1);
|
||||
which |= (ei < nelt ? 1 : 2);
|
||||
orig_perm[i] = ei;
|
||||
}
|
||||
memcpy (d.perm, orig_perm, MAX_VECT_LEN);
|
||||
|
||||
switch (which)
|
||||
{
|
||||
default:
|
||||
gcc_unreachable();
|
||||
|
||||
case 3:
|
||||
d.one_vector_p = false;
|
||||
if (!rtx_equal_p (d.op0, d.op1))
|
||||
break;
|
||||
/* FALLTHRU */
|
||||
|
||||
case 2:
|
||||
for (i = 0; i < nelt; ++i)
|
||||
d.perm[i] &= nelt - 1;
|
||||
d.op0 = d.op1;
|
||||
d.one_vector_p = true;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
d.op1 = d.op0;
|
||||
d.one_vector_p = true;
|
||||
break;
|
||||
}
|
||||
|
||||
ok = mips_expand_vec_perm_const_1 (&d);
|
||||
|
||||
/* If we were given a two-vector permutation which just happened to
|
||||
have both input vectors equal, we folded this into a one-vector
|
||||
permutation. There are several loongson patterns that are matched
|
||||
via direct vec_select+vec_concat expansion, but we do not have
|
||||
support in mips_expand_vec_perm_const_1 to guess the adjustment
|
||||
that should be made for a single operand. Just try again with
|
||||
the original permutation. */
|
||||
if (!ok && which == 3)
|
||||
{
|
||||
d.op0 = operands[1];
|
||||
d.op1 = operands[2];
|
||||
d.one_vector_p = false;
|
||||
memcpy (d.perm, orig_perm, MAX_VECT_LEN);
|
||||
ok = mips_expand_vec_perm_const_1 (&d);
|
||||
}
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
|
||||
|
||||
static bool
|
||||
mips_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
||||
const unsigned char *sel)
|
||||
{
|
||||
struct expand_vec_perm_d d;
|
||||
unsigned int i, nelt, which;
|
||||
bool ret;
|
||||
|
||||
d.vmode = vmode;
|
||||
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||
d.testing_p = true;
|
||||
memcpy (d.perm, sel, nelt);
|
||||
|
||||
/* Categorize the set of elements in the selector. */
|
||||
for (i = which = 0; i < nelt; ++i)
|
||||
{
|
||||
unsigned char e = d.perm[i];
|
||||
gcc_assert (e < 2 * nelt);
|
||||
which |= (e < nelt ? 1 : 2);
|
||||
}
|
||||
|
||||
/* For all elements from second vector, fold the elements to first. */
|
||||
if (which == 2)
|
||||
for (i = 0; i < nelt; ++i)
|
||||
d.perm[i] -= nelt;
|
||||
|
||||
/* Check whether the mask can be applied to the vector type. */
|
||||
d.one_vector_p = (which != 3);
|
||||
|
||||
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
|
||||
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
|
||||
if (!d.one_vector_p)
|
||||
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
|
||||
|
||||
start_sequence ();
|
||||
ret = mips_expand_vec_perm_const_1 (&d);
|
||||
end_sequence ();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Expand an integral vector unpack operation. */
|
||||
|
||||
void
|
||||
mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
|
||||
{
|
||||
enum machine_mode imode = GET_MODE (operands[1]);
|
||||
rtx (*unpack) (rtx, rtx, rtx);
|
||||
rtx (*cmpgt) (rtx, rtx, rtx);
|
||||
rtx tmp, dest, zero;
|
||||
|
||||
switch (imode)
|
||||
{
|
||||
case V8QImode:
|
||||
if (high_p)
|
||||
unpack = gen_loongson_punpckhbh;
|
||||
else
|
||||
unpack = gen_loongson_punpcklbh;
|
||||
cmpgt = gen_loongson_pcmpgtb;
|
||||
break;
|
||||
case V4HImode:
|
||||
if (high_p)
|
||||
unpack = gen_loongson_punpckhhw;
|
||||
else
|
||||
unpack = gen_loongson_punpcklhw;
|
||||
cmpgt = gen_loongson_pcmpgth;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
zero = force_reg (imode, CONST0_RTX (imode));
|
||||
if (unsigned_p)
|
||||
tmp = zero;
|
||||
else
|
||||
{
|
||||
tmp = gen_reg_rtx (imode);
|
||||
emit_insn (cmpgt (tmp, zero, operands[1]));
|
||||
}
|
||||
|
||||
dest = gen_reg_rtx (imode);
|
||||
emit_insn (unpack (dest, operands[1], tmp));
|
||||
|
||||
emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
|
||||
}
|
||||
|
||||
/* A subroutine of mips_expand_vec_init, match constant vector elements. */
|
||||
|
||||
static inline bool
|
||||
mips_constant_elt_p (rtx x)
|
||||
{
|
||||
return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE;
|
||||
}
|
||||
|
||||
/* A subroutine of mips_expand_vec_init, expand via broadcast. */
|
||||
|
||||
static void
|
||||
mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt)
|
||||
{
|
||||
struct expand_vec_perm_d d;
|
||||
rtx t1;
|
||||
bool ok;
|
||||
|
||||
if (elt != const0_rtx)
|
||||
elt = force_reg (GET_MODE_INNER (vmode), elt);
|
||||
if (REG_P (elt))
|
||||
elt = gen_lowpart (DImode, elt);
|
||||
|
||||
t1 = gen_reg_rtx (vmode);
|
||||
switch (vmode)
|
||||
{
|
||||
case V8QImode:
|
||||
emit_insn (gen_loongson_vec_init1_v8qi (t1, elt));
|
||||
break;
|
||||
case V4HImode:
|
||||
emit_insn (gen_loongson_vec_init1_v4hi (t1, elt));
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
memset (&d, 0, sizeof (d));
|
||||
d.target = target;
|
||||
d.op0 = t1;
|
||||
d.op1 = t1;
|
||||
d.vmode = vmode;
|
||||
d.nelt = GET_MODE_NUNITS (vmode);
|
||||
d.one_vector_p = true;
|
||||
|
||||
ok = mips_expand_vec_perm_const_1 (&d);
|
||||
gcc_assert (ok);
|
||||
}
|
||||
|
||||
/* A subroutine of mips_expand_vec_init, replacing all of the non-constant
|
||||
elements of VALS with zeros, copy the constant vector to TARGET. */
|
||||
|
||||
static void
|
||||
mips_expand_vi_constant (enum machine_mode vmode, unsigned nelt,
|
||||
rtx target, rtx vals)
|
||||
{
|
||||
rtvec vec = shallow_copy_rtvec (XVEC (vals, 0));
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
if (!mips_constant_elt_p (RTVEC_ELT (vec, i)))
|
||||
RTVEC_ELT (vec, i) = const0_rtx;
|
||||
}
|
||||
|
||||
emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec));
|
||||
}
|
||||
|
||||
|
||||
/* A subroutine of mips_expand_vec_init, expand via pinsrh. */
|
||||
|
||||
static void
|
||||
mips_expand_vi_loongson_one_pinsrh (rtx target, rtx vals, unsigned one_var)
|
||||
{
|
||||
mips_expand_vi_constant (V4HImode, 4, target, vals);
|
||||
|
||||
emit_insn (gen_vec_setv4hi (target, target, XVECEXP (vals, 0, one_var),
|
||||
GEN_INT (one_var)));
|
||||
}
|
||||
|
||||
/* A subroutine of mips_expand_vec_init, expand anything via memory. */
|
||||
|
||||
static void
|
||||
mips_expand_vi_general (enum machine_mode vmode, enum machine_mode imode,
|
||||
unsigned nelt, unsigned nvar, rtx target, rtx vals)
|
||||
{
|
||||
rtx mem = assign_stack_temp (vmode, GET_MODE_SIZE (vmode), 0);
|
||||
unsigned int i, isize = GET_MODE_SIZE (imode);
|
||||
|
||||
if (nvar < nelt)
|
||||
mips_expand_vi_constant (vmode, nelt, mem, vals);
|
||||
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
rtx x = XVECEXP (vals, 0, i);
|
||||
if (!mips_constant_elt_p (x))
|
||||
emit_move_insn (adjust_address (mem, imode, i * isize), x);
|
||||
}
|
||||
|
||||
emit_move_insn (target, mem);
|
||||
}
|
||||
|
||||
/* Expand a vector initialization. */
|
||||
|
||||
void
|
||||
mips_expand_vector_init (rtx target, rtx vals)
|
||||
{
|
||||
enum machine_mode vmode = GET_MODE (target);
|
||||
enum machine_mode imode = GET_MODE_INNER (vmode);
|
||||
unsigned i, nelt = GET_MODE_NUNITS (vmode);
|
||||
unsigned nvar = 0, one_var = -1u;
|
||||
bool all_same = true;
|
||||
rtx x;
|
||||
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
x = XVECEXP (vals, 0, i);
|
||||
if (!mips_constant_elt_p (x))
|
||||
nvar++, one_var = i;
|
||||
if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
|
||||
all_same = false;
|
||||
}
|
||||
|
||||
/* Load constants from the pool, or whatever's handy. */
|
||||
if (nvar == 0)
|
||||
{
|
||||
emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)));
|
||||
return;
|
||||
}
|
||||
|
||||
/* For two-part initialization, always use CONCAT. */
|
||||
if (nelt == 2)
|
||||
{
|
||||
rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0));
|
||||
rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1));
|
||||
x = gen_rtx_VEC_CONCAT (vmode, op0, op1);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, target, x));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Loongson is the only cpu with vectors with more elements. */
|
||||
gcc_assert (TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS);
|
||||
|
||||
/* If all values are identical, broadcast the value. */
|
||||
if (all_same)
|
||||
{
|
||||
mips_expand_vi_broadcast (vmode, target, XVECEXP (vals, 0, 0));
|
||||
return;
|
||||
}
|
||||
|
||||
/* If we've only got one non-variable V4HImode, use PINSRH. */
|
||||
if (nvar == 1 && vmode == V4HImode)
|
||||
{
|
||||
mips_expand_vi_loongson_one_pinsrh (target, vals, one_var);
|
||||
return;
|
||||
}
|
||||
|
||||
mips_expand_vi_general (vmode, imode, nelt, nvar, target, vals);
|
||||
}
|
||||
|
||||
/* Expand a vector reduction. */
|
||||
|
||||
void
|
||||
mips_expand_vec_reduc (rtx target, rtx in, rtx (*gen)(rtx, rtx, rtx))
|
||||
{
|
||||
enum machine_mode vmode = GET_MODE (in);
|
||||
unsigned char perm2[2];
|
||||
rtx last, next, fold, x;
|
||||
bool ok;
|
||||
|
||||
last = in;
|
||||
fold = gen_reg_rtx (vmode);
|
||||
switch (vmode)
|
||||
{
|
||||
case V2SFmode:
|
||||
/* Use PUL/PLU to produce { L, H } op { H, L }.
|
||||
By reversing the pair order, rather than a pure interleave high,
|
||||
we avoid erroneous exceptional conditions that we might otherwise
|
||||
produce from the computation of H op H. */
|
||||
perm2[0] = 1;
|
||||
perm2[1] = 2;
|
||||
ok = mips_expand_vselect_vconcat (fold, last, last, perm2, 2);
|
||||
gcc_assert (ok);
|
||||
break;
|
||||
|
||||
case V2SImode:
|
||||
/* Use interleave to produce { H, L } op { H, H }. */
|
||||
emit_insn (gen_loongson_punpckhwd (fold, last, last));
|
||||
break;
|
||||
|
||||
case V4HImode:
|
||||
/* Perform the first reduction with interleave,
|
||||
and subsequent reductions with shifts. */
|
||||
emit_insn (gen_loongson_punpckhwd_hi (fold, last, last));
|
||||
|
||||
next = gen_reg_rtx (vmode);
|
||||
emit_insn (gen (next, last, fold));
|
||||
last = next;
|
||||
|
||||
fold = gen_reg_rtx (vmode);
|
||||
x = force_reg (SImode, GEN_INT (16));
|
||||
emit_insn (gen_vec_shr_v4hi (fold, last, x));
|
||||
break;
|
||||
|
||||
case V8QImode:
|
||||
emit_insn (gen_loongson_punpckhwd_qi (fold, last, last));
|
||||
|
||||
next = gen_reg_rtx (vmode);
|
||||
emit_insn (gen (next, last, fold));
|
||||
last = next;
|
||||
|
||||
fold = gen_reg_rtx (vmode);
|
||||
x = force_reg (SImode, GEN_INT (16));
|
||||
emit_insn (gen_vec_shr_v8qi (fold, last, x));
|
||||
|
||||
next = gen_reg_rtx (vmode);
|
||||
emit_insn (gen (next, last, fold));
|
||||
last = next;
|
||||
|
||||
fold = gen_reg_rtx (vmode);
|
||||
x = force_reg (SImode, GEN_INT (8));
|
||||
emit_insn (gen_vec_shr_v8qi (fold, last, x));
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
emit_insn (gen (target, last, fold));
|
||||
}
|
||||
|
||||
/* Expand a vector minimum/maximum. */
|
||||
|
||||
void
|
||||
mips_expand_vec_minmax (rtx target, rtx op0, rtx op1,
|
||||
rtx (*cmp) (rtx, rtx, rtx), bool min_p)
|
||||
{
|
||||
enum machine_mode vmode = GET_MODE (target);
|
||||
rtx tc, t0, t1, x;
|
||||
|
||||
tc = gen_reg_rtx (vmode);
|
||||
t0 = gen_reg_rtx (vmode);
|
||||
t1 = gen_reg_rtx (vmode);
|
||||
|
||||
/* op0 > op1 */
|
||||
emit_insn (cmp (tc, op0, op1));
|
||||
|
||||
x = gen_rtx_AND (vmode, tc, (min_p ? op1 : op0));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, t0, x));
|
||||
|
||||
x = gen_rtx_NOT (vmode, tc);
|
||||
x = gen_rtx_AND (vmode, x, (min_p ? op0 : op1));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, t1, x));
|
||||
|
||||
x = gen_rtx_IOR (vmode, t0, t1);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, target, x));
|
||||
}
|
||||
|
||||
/* Initialize the GCC target structure. */
|
||||
#undef TARGET_ASM_ALIGNED_HI_OP
|
||||
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
|
||||
@ -16578,6 +17216,9 @@ mips_prepare_pch_save (void)
|
||||
#undef TARGET_PREPARE_PCH_SAVE
|
||||
#define TARGET_PREPARE_PCH_SAVE mips_prepare_pch_save
|
||||
|
||||
#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
|
||||
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
#include "gt-mips.h"
|
||||
|
@ -73,8 +73,15 @@
|
||||
;; This is used for indexing into vectors, and hence only accepts const_int.
|
||||
(define_predicate "const_0_or_1_operand"
|
||||
(and (match_code "const_int")
|
||||
(ior (match_test "op == CONST0_RTX (GET_MODE (op))")
|
||||
(match_test "op == CONST1_RTX (GET_MODE (op))"))))
|
||||
(match_test "IN_RANGE (INTVAL (op), 0, 1)")))
|
||||
|
||||
(define_predicate "const_2_or_3_operand"
|
||||
(and (match_code "const_int")
|
||||
(match_test "IN_RANGE (INTVAL (op), 2, 3)")))
|
||||
|
||||
(define_predicate "const_0_to_3_operand"
|
||||
(and (match_code "const_int")
|
||||
(match_test "IN_RANGE (INTVAL (op), 0, 3)")))
|
||||
|
||||
(define_predicate "qi_mask_operand"
|
||||
(and (match_code "const_int")
|
||||
|
Loading…
Reference in New Issue
Block a user