ia64.c (ia64_expand_widen_sum): New.
* config/ia64/ia64.c (ia64_expand_widen_sum): New. (ia64_expand_dot_prod_v8qi): New. * config/ia64/ia64-protos.h: Update. * config/ia64/vect.md (pmpy2_r, pmpy2_l, widen_usumv8qi3, widen_usumv4hi3, widen_ssumv8qi3, widen_ssumv4hi3, udot_prodv8qi, sdot_prodv8qi, sdot_prodv4hi): New. (reduc_splus_v2sf): Rename from reduc_plus_v2sf. From-SVN: r104426
This commit is contained in:
parent
a220ee348d
commit
e898620c39
@ -1,3 +1,13 @@
|
||||
2005-09-19 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/ia64/ia64.c (ia64_expand_widen_sum): New.
|
||||
(ia64_expand_dot_prod_v8qi): New.
|
||||
* config/ia64/ia64-protos.h: Update.
|
||||
* config/ia64/vect.md (pmpy2_r, pmpy2_l, widen_usumv8qi3,
|
||||
widen_usumv4hi3, widen_ssumv8qi3, widen_ssumv4hi3, udot_prodv8qi,
|
||||
sdot_prodv8qi, sdot_prodv4hi): New.
|
||||
(reduc_splus_v2sf): Rename from reduc_plus_v2sf.
|
||||
|
||||
2005-09-19 Richard Henderson <rth@redhat.com>
|
||||
|
||||
PR 23941
|
||||
|
@ -50,6 +50,8 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
|
||||
extern rtx ia64_expand_compare (enum rtx_code, enum machine_mode);
|
||||
extern void ia64_expand_vecint_cmov (rtx[]);
|
||||
extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
|
||||
extern void ia64_expand_widen_sum (rtx[], bool);
|
||||
extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
|
||||
extern void ia64_expand_call (rtx, rtx, rtx, int);
|
||||
extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
|
||||
extern void ia64_reload_gp (void);
|
||||
|
@ -1766,6 +1766,113 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Emit an integral vector widening sum operations. */
|
||||
|
||||
void
|
||||
ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
|
||||
{
|
||||
rtx l, h, x, s;
|
||||
enum machine_mode wmode, mode;
|
||||
rtx (*unpack_l) (rtx, rtx, rtx);
|
||||
rtx (*unpack_h) (rtx, rtx, rtx);
|
||||
rtx (*plus) (rtx, rtx, rtx);
|
||||
|
||||
wmode = GET_MODE (operands[0]);
|
||||
mode = GET_MODE (operands[1]);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V8QImode:
|
||||
unpack_l = gen_unpack1_l;
|
||||
unpack_h = gen_unpack1_h;
|
||||
plus = gen_addv4hi3;
|
||||
break;
|
||||
case V4HImode:
|
||||
unpack_l = gen_unpack2_l;
|
||||
unpack_h = gen_unpack2_h;
|
||||
plus = gen_addv2si3;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Fill in x with the sign extension of each element in op1. */
|
||||
if (unsignedp)
|
||||
x = CONST0_RTX (mode);
|
||||
else
|
||||
{
|
||||
bool neg;
|
||||
|
||||
x = gen_reg_rtx (mode);
|
||||
|
||||
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
|
||||
CONST0_RTX (mode));
|
||||
gcc_assert (!neg);
|
||||
}
|
||||
|
||||
l = gen_reg_rtx (wmode);
|
||||
h = gen_reg_rtx (wmode);
|
||||
s = gen_reg_rtx (wmode);
|
||||
|
||||
emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
|
||||
emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
|
||||
emit_insn (plus (s, l, operands[2]));
|
||||
emit_insn (plus (operands[0], h, s));
|
||||
}
|
||||
|
||||
/* Emit a signed or unsigned V8QI dot product operation. */
|
||||
|
||||
void
|
||||
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
|
||||
{
|
||||
rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
|
||||
|
||||
/* Fill in x1 and x2 with the sign extension of each element. */
|
||||
if (unsignedp)
|
||||
x1 = x2 = CONST0_RTX (V8QImode);
|
||||
else
|
||||
{
|
||||
bool neg;
|
||||
|
||||
x1 = gen_reg_rtx (V8QImode);
|
||||
x2 = gen_reg_rtx (V8QImode);
|
||||
|
||||
neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
|
||||
CONST0_RTX (V8QImode));
|
||||
gcc_assert (!neg);
|
||||
neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
|
||||
CONST0_RTX (V8QImode));
|
||||
gcc_assert (!neg);
|
||||
}
|
||||
|
||||
l1 = gen_reg_rtx (V4HImode);
|
||||
l2 = gen_reg_rtx (V4HImode);
|
||||
h1 = gen_reg_rtx (V4HImode);
|
||||
h2 = gen_reg_rtx (V4HImode);
|
||||
|
||||
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
|
||||
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
|
||||
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
|
||||
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
|
||||
|
||||
p1 = gen_reg_rtx (V2SImode);
|
||||
p2 = gen_reg_rtx (V2SImode);
|
||||
p3 = gen_reg_rtx (V2SImode);
|
||||
p4 = gen_reg_rtx (V2SImode);
|
||||
emit_insn (gen_pmpy2_r (p1, l1, l2));
|
||||
emit_insn (gen_pmpy2_l (p2, l1, l2));
|
||||
emit_insn (gen_pmpy2_r (p3, h1, h2));
|
||||
emit_insn (gen_pmpy2_l (p4, h1, h2));
|
||||
|
||||
s1 = gen_reg_rtx (V2SImode);
|
||||
s2 = gen_reg_rtx (V2SImode);
|
||||
s3 = gen_reg_rtx (V2SImode);
|
||||
emit_insn (gen_addv2si3 (s1, p1, p2));
|
||||
emit_insn (gen_addv2si3 (s2, p3, p4));
|
||||
emit_insn (gen_addv2si3 (s3, s1, operands[3]));
|
||||
emit_insn (gen_addv2si3 (operands[0], s2, s3));
|
||||
}
|
||||
|
||||
/* Emit the appropriate sequence for a call. */
|
||||
|
||||
void
|
||||
|
@ -212,6 +212,36 @@
|
||||
"pmpyshr2 %0 = %1, %2, 0"
|
||||
[(set_attr "itanium_class" "mmmul")])
|
||||
|
||||
(define_insn "pmpy2_r"
|
||||
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
|
||||
(mult:V2SI
|
||||
(vec_select:V2SI
|
||||
(sign_extend:V4SI
|
||||
(match_operand:V4HI 1 "gr_register_operand" "r"))
|
||||
(parallel [(const_int 0) (const_int 2)]))
|
||||
(vec_select:V2SI
|
||||
(sign_extend:V4SI
|
||||
(match_operand:V4HI 2 "gr_register_operand" "r"))
|
||||
(parallel [(const_int 0) (const_int 2)]))))]
|
||||
""
|
||||
"pmpy2.r %0 = %1, %2"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "pmpy2_l"
|
||||
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
|
||||
(mult:V2SI
|
||||
(vec_select:V2SI
|
||||
(sign_extend:V4SI
|
||||
(match_operand:V4HI 1 "gr_register_operand" "r"))
|
||||
(parallel [(const_int 1) (const_int 3)]))
|
||||
(vec_select:V2SI
|
||||
(sign_extend:V4SI
|
||||
(match_operand:V4HI 2 "gr_register_operand" "r"))
|
||||
(parallel [(const_int 1) (const_int 3)]))))]
|
||||
""
|
||||
"pmpy2.l %0 = %1, %2"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_expand "umax<mode>3"
|
||||
[(set (match_operand:VECINT 0 "gr_register_operand" "")
|
||||
(umax:VECINT (match_operand:VECINT 1 "gr_register_operand" "")
|
||||
@ -331,6 +361,88 @@
|
||||
operands[1] = gen_lowpart (DImode, operands[1]);
|
||||
})
|
||||
|
||||
(define_expand "widen_usumv8qi3"
|
||||
[(match_operand:V4HI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V4HI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_widen_sum (operands, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "widen_usumv4hi3"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
||||
(match_operand:V2SI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_widen_sum (operands, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "widen_ssumv8qi3"
|
||||
[(match_operand:V4HI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V4HI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_widen_sum (operands, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "widen_ssumv4hi3"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
||||
(match_operand:V2SI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_widen_sum (operands, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "udot_prodv8qi"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V8QI 2 "gr_register_operand" "")
|
||||
(match_operand:V2SI 3 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_dot_prod_v8qi (operands, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "sdot_prodv8qi"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V8QI 2 "gr_register_operand" "")
|
||||
(match_operand:V2SI 3 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_dot_prod_v8qi (operands, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "sdot_prodv4hi"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
||||
(match_operand:V4HI 2 "gr_register_operand" "")
|
||||
(match_operand:V2SI 3 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
rtx l, r, t;
|
||||
|
||||
r = gen_reg_rtx (V2SImode);
|
||||
l = gen_reg_rtx (V2SImode);
|
||||
t = gen_reg_rtx (V2SImode);
|
||||
|
||||
emit_insn (gen_pmpy2_r (r, operands[1], operands[2]));
|
||||
emit_insn (gen_pmpy2_l (l, operands[1], operands[2]));
|
||||
emit_insn (gen_addv2si3 (t, r, operands[3]));
|
||||
emit_insn (gen_addv2si3 (operands[0], t, l));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vcond<mode>"
|
||||
[(set (match_operand:VECINT 0 "gr_register_operand" "")
|
||||
(if_then_else:VECINT
|
||||
@ -717,15 +829,11 @@
|
||||
;; padd.uus
|
||||
;; pavg
|
||||
;; pavgsub
|
||||
;; pmpy
|
||||
;; pmpyshr, general form
|
||||
;; psad
|
||||
;; pshladd
|
||||
;; pshradd
|
||||
;; psub.uus
|
||||
;; vec_set<mode>
|
||||
;; vec_extract<mode>
|
||||
;; vec_init<mode>
|
||||
|
||||
;; Floating point vector operations
|
||||
|
||||
@ -947,7 +1055,7 @@
|
||||
"fpmin %0 = %1, %2"
|
||||
[(set_attr "itanium_class" "fmisc")])
|
||||
|
||||
(define_expand "reduc_plus_v2sf"
|
||||
(define_expand "reduc_splus_v2sf"
|
||||
[(match_operand:V2SF 0 "fr_register_operand" "")
|
||||
(match_operand:V2SF 1 "fr_register_operand" "")]
|
||||
""
|
||||
|
Loading…
Reference in New Issue
Block a user