[ARC] Add SIMD extensions for ARC HS

gcc/
2016-04-28  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (arc_vector_mode_supported_p): Add support for
	the new ARC HS SIMD instructions.
	(arc_preferred_simd_mode): New function.
	(arc_autovectorize_vector_sizes): Likewise.
	(TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
	(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
	(arc_init_reg_tables): Accept new ARC HS SIMD modes.
	(arc_init_builtins): Add new SIMD builtin types.
	(arc_split_move): Handle 64 bit vector moves.
	* config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD)
	(TARGET_PLUS_QMACW): Define.
	* config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU, DMACH)
	(DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H, VMAC2HU, VMPY2H)
	(VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD, VADDSUB4H)
	(VSUBADD4H): New builtins.
	* config/arc/simdext.md: Add new ARC HS SIMD instructions.
	* testsuite/gcc.target/arc/builtin_simdarc.c: New file.

From-SVN: r235551
This commit is contained in:
Claudiu Zissulescu 2016-04-28 11:53:13 +02:00 committed by Claudiu Zissulescu
parent 174f66220d
commit 00c072ae51
6 changed files with 768 additions and 8 deletions

View File

@ -1,3 +1,23 @@
2016-04-28 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.c (arc_vector_mode_supported_p): Add support for
the new ARC HS SIMD instructions.
(arc_preferred_simd_mode): New function.
(arc_autovectorize_vector_sizes): Likewise.
(TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
(arc_init_reg_tables): Accept new ARC HS SIMD modes.
(arc_init_builtins): Add new SIMD builtin types.
(arc_split_move): Handle 64 bit vector moves.
* config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD)
(TARGET_PLUS_QMACW): Define.
* config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU, DMACH)
(DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H, VMAC2HU, VMPY2H)
(VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD, VADDSUB4H)
(VSUBADD4H): New builtins.
* config/arc/simdext.md: Add new ARC HS SIMD instructions.
* testsuite/gcc.target/arc/builtin_simdarc.c: New file.
2016-04-28 Eduard Sanou <dhole@openmailbox.org>
Matthias Klose <doko@debian.org>

View File

@ -247,16 +247,47 @@ static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
static bool
arc_vector_mode_supported_p (machine_mode mode)
{
if (!TARGET_SIMD_SET)
return false;
switch (mode)
{
case V2HImode:
return TARGET_PLUS_DMPY;
case V4HImode:
case V2SImode:
return TARGET_PLUS_QMACW;
case V4SImode:
case V8HImode:
return TARGET_SIMD_SET;
if ((mode == V4SImode)
|| (mode == V8HImode))
return true;
return false;
default:
return false;
}
}
/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
static enum machine_mode
arc_preferred_simd_mode (enum machine_mode mode)
{
switch (mode)
{
case HImode:
return TARGET_PLUS_QMACW ? V4HImode : V2HImode;
case SImode:
return V2SImode;
default:
return word_mode;
}
}
/* Implements target hook
TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
static unsigned int
arc_autovectorize_vector_sizes (void)
{
return TARGET_PLUS_QMACW ? (8 | 4) : 0;
}
/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */
static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
@ -345,6 +376,12 @@ static void arc_finalize_pic (void);
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes
#undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
@ -1214,7 +1251,12 @@ arc_init_reg_tables (void)
arc_mode_class[i] = 0;
break;
case MODE_VECTOR_INT:
arc_mode_class [i] = (1<< (int) V_MODE);
if (GET_MODE_SIZE (m) == 4)
arc_mode_class[i] = (1 << (int) S_MODE);
else if (GET_MODE_SIZE (m) == 8)
arc_mode_class[i] = (1 << (int) D_MODE);
else
arc_mode_class[i] = (1 << (int) V_MODE);
break;
case MODE_CC:
default:
@ -5277,6 +5319,15 @@ arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
static void
arc_init_builtins (void)
{
tree V4HI_type_node;
tree V2SI_type_node;
tree V2HI_type_node;
/* Vector types based on HS SIMD elements. */
V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
tree pcvoid_type_node
= build_pointer_type (build_qualified_type (void_type_node,
TYPE_QUAL_CONST));
@ -5341,6 +5392,28 @@ arc_init_builtins (void)
tree v8hi_ftype_v8hi
= build_function_type_list (V8HI_type_node, V8HI_type_node,
NULL_TREE);
/* ARCv2 SIMD types. */
tree long_ftype_v4hi_v4hi
= build_function_type_list (long_long_integer_type_node,
V4HI_type_node, V4HI_type_node, NULL_TREE);
tree int_ftype_v2hi_v2hi
= build_function_type_list (integer_type_node,
V2HI_type_node, V2HI_type_node, NULL_TREE);
tree v2si_ftype_v2hi_v2hi
= build_function_type_list (V2SI_type_node,
V2HI_type_node, V2HI_type_node, NULL_TREE);
tree v2hi_ftype_v2hi_v2hi
= build_function_type_list (V2HI_type_node,
V2HI_type_node, V2HI_type_node, NULL_TREE);
tree v2si_ftype_v2si_v2si
= build_function_type_list (V2SI_type_node,
V2SI_type_node, V2SI_type_node, NULL_TREE);
tree v4hi_ftype_v4hi_v4hi
= build_function_type_list (V4HI_type_node,
V4HI_type_node, V4HI_type_node, NULL_TREE);
tree long_ftype_v2si_v2hi
= build_function_type_list (long_long_integer_type_node,
V2SI_type_node, V2HI_type_node, NULL_TREE);
/* Add the builtins. */
#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \
@ -8706,6 +8779,31 @@ arc_split_move (rtx *operands)
return;
}
if (TARGET_PLUS_QMACW
&& GET_CODE (operands[1]) == CONST_VECTOR)
{
HOST_WIDE_INT intval0, intval1;
if (GET_MODE (operands[1]) == V2SImode)
{
intval0 = INTVAL (XVECEXP (operands[1], 0, 0));
intval1 = INTVAL (XVECEXP (operands[1], 0, 1));
}
else
{
intval1 = INTVAL (XVECEXP (operands[1], 0, 3)) << 16;
intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF;
intval0 = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
}
xop[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode));
xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode));
emit_move_insn (xop[0], xop[2]);
emit_move_insn (xop[3], xop[1]);
return;
}
for (i = 0; i < 2; i++)
{
if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))

View File

@ -1724,6 +1724,12 @@ enum
/* Any multiplication feature macro. */
#define TARGET_ANY_MPY \
(TARGET_MPY || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET)
/* PLUS_DMPY feature macro. */
#define TARGET_PLUS_DMPY ((arc_mpy_option > 6) && TARGET_HS)
/* PLUS_MACD feature macro. */
#define TARGET_PLUS_MACD ((arc_mpy_option > 7) && TARGET_HS)
/* PLUS_QMACW feature macro. */
#define TARGET_PLUS_QMACW ((arc_mpy_option > 8) && TARGET_HS)
/* ARC600 and ARC601 feature macro. */
#define TARGET_ARC600_FAMILY (TARGET_ARC600 || TARGET_ARC601)

View File

@ -193,3 +193,30 @@ DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn, TARGET_SIMD_SET)
/* END SIMD marker. */
DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0)
/* ARCv2 SIMD instructions that use/clobber the accumulator reg. */
DEF_BUILTIN (QMACH, 2, long_ftype_v4hi_v4hi, qmach, TARGET_PLUS_QMACW)
DEF_BUILTIN (QMACHU, 2, long_ftype_v4hi_v4hi, qmachu, TARGET_PLUS_QMACW)
DEF_BUILTIN (QMPYH, 2, long_ftype_v4hi_v4hi, qmpyh, TARGET_PLUS_QMACW)
DEF_BUILTIN (QMPYHU, 2, long_ftype_v4hi_v4hi, qmpyhu, TARGET_PLUS_QMACW)
DEF_BUILTIN (DMACH, 2, int_ftype_v2hi_v2hi, dmach, TARGET_PLUS_DMPY)
DEF_BUILTIN (DMACHU, 2, int_ftype_v2hi_v2hi, dmachu, TARGET_PLUS_DMPY)
DEF_BUILTIN (DMPYH, 2, int_ftype_v2hi_v2hi, dmpyh, TARGET_PLUS_DMPY)
DEF_BUILTIN (DMPYHU, 2, int_ftype_v2hi_v2hi, dmpyhu, TARGET_PLUS_DMPY)
DEF_BUILTIN (DMACWH, 2, long_ftype_v2si_v2hi, dmacwh, TARGET_PLUS_QMACW)
DEF_BUILTIN (DMACWHU, 2, long_ftype_v2si_v2hi, dmacwhu, TARGET_PLUS_QMACW)
DEF_BUILTIN (VMAC2H, 2, v2si_ftype_v2hi_v2hi, vmac2h, TARGET_PLUS_MACD)
DEF_BUILTIN (VMAC2HU, 2, v2si_ftype_v2hi_v2hi, vmac2hu, TARGET_PLUS_MACD)
DEF_BUILTIN (VMPY2H, 2, v2si_ftype_v2hi_v2hi, vmpy2h, TARGET_PLUS_MACD)
DEF_BUILTIN (VMPY2HU, 2, v2si_ftype_v2hi_v2hi, vmpy2hu, TARGET_PLUS_MACD)
/* Combined add/sub HS SIMD instructions. */
DEF_BUILTIN (VADDSUB2H, 2, v2hi_ftype_v2hi_v2hi, addsubv2hi3, TARGET_PLUS_DMPY)
DEF_BUILTIN (VSUBADD2H, 2, v2hi_ftype_v2hi_v2hi, subaddv2hi3, TARGET_PLUS_DMPY)
DEF_BUILTIN (VADDSUB, 2, v2si_ftype_v2si_v2si, addsubv2si3, TARGET_PLUS_QMACW)
DEF_BUILTIN (VSUBADD, 2, v2si_ftype_v2si_v2si, subaddv2si3, TARGET_PLUS_QMACW)
DEF_BUILTIN (VADDSUB4H, 2, v4hi_ftype_v4hi_v4hi, addsubv4hi3, TARGET_PLUS_QMACW)
DEF_BUILTIN (VSUBADD4H, 2, v4hi_ftype_v4hi_v4hi, subaddv4hi3, TARGET_PLUS_QMACW)

View File

@ -1288,3 +1288,574 @@
[(set_attr "type" "simd_vcontrol")
(set_attr "length" "4")
(set_attr "cond" "nocond")])
;; New ARCv2 SIMD extensions
;;64-bit vectors of halwords and words
(define_mode_iterator VWH [V4HI V2SI])
;;double element vectors
(define_mode_iterator VDV [V2HI V2SI])
(define_mode_attr V_addsub [(V2HI "HI") (V2SI "SI")])
(define_mode_attr V_addsub_suffix [(V2HI "2h") (V2SI "")])
;;all vectors
(define_mode_iterator VCT [V2HI V4HI V2SI])
(define_mode_attr V_suffix [(V2HI "2h") (V4HI "4h") (V2SI "2")])
;; Widening operations.
(define_code_iterator SE [sign_extend zero_extend])
(define_code_attr V_US [(sign_extend "s") (zero_extend "u")])
(define_code_attr V_US_suffix [(sign_extend "") (zero_extend "u")])
;; Move patterns
(define_expand "movv2hi"
[(set (match_operand:V2HI 0 "move_dest_operand" "")
(match_operand:V2HI 1 "general_operand" ""))]
""
"{
if (prepare_move_operands (operands, V2HImode))
DONE;
}")
(define_insn_and_split "*movv2hi_insn"
[(set (match_operand:V2HI 0 "nonimmediate_operand" "=r,r,r,m")
(match_operand:V2HI 1 "general_operand" "i,r,m,r"))]
"(register_operand (operands[0], V2HImode)
|| register_operand (operands[1], V2HImode))"
"@
#
mov%? %0, %1
ld%U1%V1 %0,%1
st%U0%V0 %1,%0"
"reload_completed && GET_CODE (operands[1]) == CONST_VECTOR"
[(set (match_dup 0) (match_dup 2))]
{
HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode));
}
[(set_attr "type" "move,move,load,store")
(set_attr "predicable" "yes,yes,no,no")
(set_attr "iscompact" "false,false,false,false")
])
(define_expand "movmisalignv2hi"
[(set (match_operand:V2HI 0 "general_operand" "")
(match_operand:V2HI 1 "general_operand" ""))]
""
{
if (!register_operand (operands[0], V2HImode)
&& !register_operand (operands[1], V2HImode))
operands[1] = force_reg (V2HImode, operands[1]);
})
(define_expand "mov<mode>"
[(set (match_operand:VWH 0 "move_dest_operand" "")
(match_operand:VWH 1 "general_operand" ""))]
""
"{
if (GET_CODE (operands[0]) == MEM)
operands[1] = force_reg (<MODE>mode, operands[1]);
}")
(define_insn_and_split "*mov<mode>_insn"
[(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m")
(match_operand:VWH 1 "general_operand" "i,r,m,r"))]
"TARGET_PLUS_QMACW
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"*
{
switch (which_alternative)
{
default:
return \"#\";
case 1:
return \"vadd2 %0, %1, 0\";
case 2:
if (TARGET_LL64)
return \"ldd%U1%V1 %0,%1\";
return \"#\";
case 3:
if (TARGET_LL64)
return \"std%U0%V0 %1,%0\";
return \"#\";
}
}"
"reload_completed"
[(const_int 0)]
{
arc_split_move (operands);
DONE;
}
[(set_attr "type" "move,move,load,store")
(set_attr "predicable" "yes,no,no,no")
(set_attr "iscompact" "false,false,false,false")
])
(define_expand "movmisalign<mode>"
[(set (match_operand:VWH 0 "general_operand" "")
(match_operand:VWH 1 "general_operand" ""))]
""
{
if (!register_operand (operands[0], <MODE>mode)
&& !register_operand (operands[1], <MODE>mode))
operands[1] = force_reg (<MODE>mode, operands[1]);
})
(define_insn "bswapv2hi2"
[(set (match_operand:V2HI 0 "register_operand" "=r,r")
(bswap:V2HI (match_operand:V2HI 1 "nonmemory_operand" "r,i")))]
"TARGET_V2 && TARGET_SWAP"
"swape %0, %1"
[(set_attr "length" "4,8")
(set_attr "type" "two_cycle_core")])
;; Simple arithmetic insns
(define_insn "add<mode>3"
[(set (match_operand:VCT 0 "register_operand" "=r,r")
(plus:VCT (match_operand:VCT 1 "register_operand" "0,r")
(match_operand:VCT 2 "register_operand" "r,r")))]
"TARGET_PLUS_DMPY"
"vadd<V_suffix>%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "sub<mode>3"
[(set (match_operand:VCT 0 "register_operand" "=r,r")
(minus:VCT (match_operand:VCT 1 "register_operand" "0,r")
(match_operand:VCT 2 "register_operand" "r,r")))]
"TARGET_PLUS_DMPY"
"vsub<V_suffix>%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
;; Combined arithmetic ops
(define_insn "addsub<mode>3"
[(set (match_operand:VDV 0 "register_operand" "=r,r")
(vec_concat:VDV
(plus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
(parallel [(const_int 0)]))
(vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
(parallel [(const_int 0)])))
(minus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
(vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_PLUS_DMPY"
"vaddsub<V_addsub_suffix>%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "subadd<mode>3"
[(set (match_operand:VDV 0 "register_operand" "=r,r")
(vec_concat:VDV
(minus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
(parallel [(const_int 0)]))
(vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
(parallel [(const_int 0)])))
(plus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
(vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_PLUS_DMPY"
"vsubadd<V_addsub_suffix>%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "addsubv4hi3"
[(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
(vec_concat:V4HI
(vec_concat:V2HI
(plus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
(parallel [(const_int 0)]))
(vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
(parallel [(const_int 0)])))
(minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
(vec_concat:V2HI
(plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
(minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
))]
"TARGET_PLUS_QMACW"
"vaddsub4h%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "subaddv4hi3"
[(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
(vec_concat:V4HI
(vec_concat:V2HI
(minus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
(parallel [(const_int 0)]))
(vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
(parallel [(const_int 0)])))
(plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
(vec_concat:V2HI
(minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
(plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
))]
"TARGET_PLUS_QMACW"
"vsubadd4h%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
;; Multiplication
(define_insn "dmpyh<V_US_suffix>"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(plus:SI
(mult:SI
(SE:SI
(vec_select:HI (match_operand:V2HI 1 "register_operand" "0,r")
(parallel [(const_int 0)])))
(SE:SI
(vec_select:HI (match_operand:V2HI 2 "register_operand" "r,r")
(parallel [(const_int 0)]))))
(mult:SI
(SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))))
(set (reg:DI ARCV2_ACC)
(zero_extend:DI
(plus:SI
(mult:SI
(SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
(SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 0)]))))
(mult:SI
(SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))))]
"TARGET_PLUS_DMPY"
"dmpy<V_US_suffix>%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
;; We can use dmac as well here. To be investigated which version
;; brings more.
(define_expand "sdot_prodv2hi"
[(match_operand:SI 0 "register_operand" "")
(match_operand:V2HI 1 "register_operand" "")
(match_operand:V2HI 2 "register_operand" "")
(match_operand:SI 3 "register_operand" "")]
"TARGET_PLUS_DMPY"
{
rtx t = gen_reg_rtx (SImode);
emit_insn (gen_dmpyh (t, operands[1], operands[2]));
emit_insn (gen_addsi3 (operands[0], operands[3], t));
DONE;
})
(define_expand "udot_prodv2hi"
[(match_operand:SI 0 "register_operand" "")
(match_operand:V2HI 1 "register_operand" "")
(match_operand:V2HI 2 "register_operand" "")
(match_operand:SI 3 "register_operand" "")]
"TARGET_PLUS_DMPY"
{
rtx t = gen_reg_rtx (SImode);
emit_insn (gen_dmpyhu (t, operands[1], operands[2]));
emit_insn (gen_addsi3 (operands[0], operands[3], t));
DONE;
})
(define_insn "arc_vec_<V_US>mult_lo_v4hi"
[(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
(mult:V2SI (SE:V2SI (vec_select:V2HI
(match_operand:V4HI 1 "even_register_operand" "0,r")
(parallel [(const_int 0) (const_int 1)])))
(SE:V2SI (vec_select:V2HI
(match_operand:V4HI 2 "even_register_operand" "r,r")
(parallel [(const_int 0) (const_int 1)])))))
(set (reg:V2SI ARCV2_ACC)
(mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
(parallel [(const_int 0) (const_int 1)])))
(SE:V2SI (vec_select:V2HI (match_dup 2)
(parallel [(const_int 0) (const_int 1)])))))
]
"TARGET_PLUS_MACD"
"vmpy2h<V_US_suffix>%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "arc_vec_<V_US>multacc_lo_v4hi"
[(set (reg:V2SI ARCV2_ACC)
(mult:V2SI (SE:V2SI (vec_select:V2HI
(match_operand:V4HI 0 "even_register_operand" "r")
(parallel [(const_int 0) (const_int 1)])))
(SE:V2SI (vec_select:V2HI
(match_operand:V4HI 1 "even_register_operand" "r")
(parallel [(const_int 0) (const_int 1)])))))
]
"TARGET_PLUS_MACD"
"vmpy2h<V_US_suffix>%? 0, %0, %1"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "no")
(set_attr "cond" "nocond")])
(define_expand "vec_widen_<V_US>mult_lo_v4hi"
[(set (match_operand:V2SI 0 "even_register_operand" "")
(mult:V2SI (SE:V2SI (vec_select:V2HI
(match_operand:V4HI 1 "even_register_operand" "")
(parallel [(const_int 0) (const_int 1)])))
(SE:V2SI (vec_select:V2HI
(match_operand:V4HI 2 "even_register_operand" "")
(parallel [(const_int 0) (const_int 1)])))))]
"TARGET_PLUS_QMACW"
{
emit_insn (gen_arc_vec_<V_US>mult_lo_v4hi (operands[0],
operands[1],
operands[2]));
DONE;
}
)
(define_insn "arc_vec_<V_US>mult_hi_v4hi"
[(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
(mult:V2SI (SE:V2SI (vec_select:V2HI
(match_operand:V4HI 1 "even_register_operand" "0,r")
(parallel [(const_int 2) (const_int 3)])))
(SE:V2SI (vec_select:V2HI
(match_operand:V4HI 2 "even_register_operand" "r,r")
(parallel [(const_int 2) (const_int 3)])))))
(set (reg:V2SI ARCV2_ACC)
(mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
(parallel [(const_int 2) (const_int 3)])))
(SE:V2SI (vec_select:V2HI (match_dup 2)
(parallel [(const_int 2) (const_int 3)])))))
]
"TARGET_PLUS_QMACW"
"vmpy2h<V_US_suffix>%? %0, %R1, %R2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_expand "vec_widen_<V_US>mult_hi_v4hi"
[(set (match_operand:V2SI 0 "even_register_operand" "")
(mult:V2SI (SE:V2SI (vec_select:V2HI
(match_operand:V4HI 1 "even_register_operand" "")
(parallel [(const_int 2) (const_int 3)])))
(SE:V2SI (vec_select:V2HI
(match_operand:V4HI 2 "even_register_operand" "")
(parallel [(const_int 2) (const_int 3)])))))]
"TARGET_PLUS_MACD"
{
emit_insn (gen_arc_vec_<V_US>mult_hi_v4hi (operands[0],
operands[1],
operands[2]));
DONE;
}
)
(define_insn "arc_vec_<V_US>mac_hi_v4hi"
[(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
(plus:V2SI
(reg:V2SI ARCV2_ACC)
(mult:V2SI (SE:V2SI (vec_select:V2HI
(match_operand:V4HI 1 "even_register_operand" "0,r")
(parallel [(const_int 2) (const_int 3)])))
(SE:V2SI (vec_select:V2HI
(match_operand:V4HI 2 "even_register_operand" "r,r")
(parallel [(const_int 2) (const_int 3)]))))))
(set (reg:V2SI ARCV2_ACC)
(plus:V2SI
(reg:V2SI ARCV2_ACC)
(mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
(parallel [(const_int 2) (const_int 3)])))
(SE:V2SI (vec_select:V2HI (match_dup 2)
(parallel [(const_int 2) (const_int 3)]))))))
]
"TARGET_PLUS_MACD"
"vmac2h<V_US_suffix>%? %0, %R1, %R2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
;; Builtins
(define_insn "dmach"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
(match_operand:V2HI 2 "register_operand" "r,r")
(reg:DI ARCV2_ACC)]
UNSPEC_ARC_DMACH))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_DMPY"
"dmach%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "dmachu"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
(match_operand:V2HI 2 "register_operand" "r,r")
(reg:DI ARCV2_ACC)]
UNSPEC_ARC_DMACHU))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_DMPY"
"dmachu%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "dmacwh"
[(set (match_operand:DI 0 "even_register_operand" "=r,r")
(unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
(match_operand:V2HI 2 "register_operand" "r,r")
(reg:DI ARCV2_ACC)]
UNSPEC_ARC_DMACWH))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_QMACW"
"dmacwh%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "dmacwhu"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
(match_operand:V2HI 2 "register_operand" "r,r")
(reg:DI ARCV2_ACC)]
UNSPEC_ARC_DMACWHU))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_QMACW"
"dmacwhu%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "vmac2h"
[(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
(match_operand:V2HI 2 "register_operand" "r,r")
(reg:DI ARCV2_ACC)]
UNSPEC_ARC_VMAC2H))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_MACD"
"vmac2h%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "vmac2hu"
[(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
(match_operand:V2HI 2 "register_operand" "r,r")
(reg:DI ARCV2_ACC)]
UNSPEC_ARC_VMAC2HU))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_MACD"
"vmac2hu%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "vmpy2h"
[(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
(match_operand:V2HI 2 "register_operand" "r,r")]
UNSPEC_ARC_VMPY2H))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_MACD"
"vmpy2h%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "vmpy2hu"
[(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
(match_operand:V2HI 2 "register_operand" "r,r")]
UNSPEC_ARC_VMPY2HU))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_MACD"
"vmpy2hu%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "qmach"
[(set (match_operand:DI 0 "even_register_operand" "=r,r")
(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
(match_operand:V4HI 2 "even_register_operand" "r,r")
(reg:DI ARCV2_ACC)]
UNSPEC_ARC_QMACH))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_QMACW"
"qmach%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "qmachu"
[(set (match_operand:DI 0 "even_register_operand" "=r,r")
(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
(match_operand:V4HI 2 "even_register_operand" "r,r")
(reg:DI ARCV2_ACC)]
UNSPEC_ARC_QMACHU))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_QMACW"
"qmachu%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "qmpyh"
[(set (match_operand:DI 0 "even_register_operand" "=r,r")
(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
(match_operand:V4HI 2 "even_register_operand" "r,r")]
UNSPEC_ARC_QMPYH))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_QMACW"
"qmpyh%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])
(define_insn "qmpyhu"
[(set (match_operand:DI 0 "even_register_operand" "=r,r")
(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
(match_operand:V4HI 2 "even_register_operand" "r,r")]
UNSPEC_ARC_QMPYHU))
(clobber (reg:DI ARCV2_ACC))]
"TARGET_PLUS_QMACW"
"qmpyhu%? %0, %1, %2"
[(set_attr "length" "4")
(set_attr "type" "multi")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")])

View File

@ -0,0 +1,38 @@
/* { dg-do compile } */
/* { dg-options "-mcpu=archs -O2 -Werror-implicit-function-declaration -mmpy-option=9" } */
#define STEST(name, rettype, op1type, op2type) \
rettype test_ ## name \
(op1type a, op2type b) \
{ \
return __builtin_arc_ ## name (a, b); \
}
typedef short v2hi __attribute__ ((vector_size (4)));
typedef short v4hi __attribute__ ((vector_size (8)));
typedef int v2si __attribute__ ((vector_size (8)));
STEST (qmach, long long, v4hi, v4hi)
STEST (qmachu, long long, v4hi, v4hi)
STEST (qmpyh, long long, v4hi, v4hi)
STEST (qmpyhu, long long, v4hi, v4hi)
STEST (dmach, int, v2hi, v2hi)
STEST (dmachu, int, v2hi, v2hi)
STEST (dmpyh, int, v2hi, v2hi)
STEST (dmpyhu, int, v2hi, v2hi)
STEST (dmacwh, long, v2si, v2hi)
STEST (dmacwhu, long, v2si, v2hi)
STEST (vmac2h, v2si, v2hi, v2hi)
STEST (vmac2hu, v2si, v2hi, v2hi)
STEST (vmpy2h, v2si, v2hi, v2hi)
STEST (vmpy2hu, v2si, v2hi, v2hi)
STEST (vaddsub2h, v2hi, v2hi, v2hi)
STEST (vsubadd2h, v2hi, v2hi, v2hi)
STEST (vaddsub, v2si, v2si, v2si)
STEST (vsubadd, v2si, v2si, v2si)
STEST (vaddsub4h, v4hi, v4hi, v4hi)
STEST (vsubadd4h, v4hi, v4hi, v4hi)