diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 752e391131f..ddd258727cb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2007-04-29 Bernd Schmidt + + * rtl.def (SS_ABS): New code. + * config/bfin/bfin.c (print_operand): New modifier 'v'. + (enum bfin_builtins): Add BFIN_BUILTIN_SUM_2X16, BFIN_BUILTIN_ABS_1x32, + BFIN_BUILTIN_ROUND_1x32, BFIN_BUILTIN_MULT_1x32x32, + BFIN_BUILTIN_MULT_1x32x32NS, BFIN_BUILTIN_SSASHIFT_1x32. + (bfin_init_builtins): Define them. + (bdesc_1arg, bdesc_2arg): Add some of them here, ... + (bfin_expand_builtin): ... and handle the others here. + * config/bfin/bfin.md (ssabssi2, ssroundsi2, ssashiftsi3, + flag_mul_macv2hi_parts_acconly_andcc0): New patterns. + (ss_absv2hi2): Renamed from absv2hi; use ss_abs code. + (ssashiftv2hi3, ssashifthi3, lshiftv2hi3, lshifthi3): Shift count + operand is only HImode. + 2007-04-29 Steven Bosscher * regclass.c (scan_one_insn): Remove splitting of diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c index 16b40968523..727f2ca9b81 100644 --- a/gcc/config/bfin/bfin.c +++ b/gcc/config/bfin/bfin.c @@ -1302,6 +1302,15 @@ print_operand (FILE *file, rtx x, char code) gcc_assert (REGNO (x) == REG_A0 || REGNO (x) == REG_A1); fprintf (file, "%s.x", reg_names[REGNO (x)]); } + else if (code == 'v') + { + if (REGNO (x) == REG_A0) + fprintf (file, "AV0"); + else if (REGNO (x) == REG_A1) + fprintf (file, "AV1"); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } else if (code == 'D') { fprintf (file, "%s", dregs_pair_names[REGNO (x)]); @@ -4627,16 +4636,21 @@ enum bfin_builtins BFIN_BUILTIN_MIN_1X16, BFIN_BUILTIN_MAX_1X16, + BFIN_BUILTIN_SUM_2X16, BFIN_BUILTIN_DIFFHL_2X16, BFIN_BUILTIN_DIFFLH_2X16, BFIN_BUILTIN_SSADD_1X32, BFIN_BUILTIN_SSSUB_1X32, BFIN_BUILTIN_NORM_1X32, + BFIN_BUILTIN_ROUND_1X32, BFIN_BUILTIN_NEG_1X32, + BFIN_BUILTIN_ABS_1X32, BFIN_BUILTIN_MIN_1X32, BFIN_BUILTIN_MAX_1X32, BFIN_BUILTIN_MULT_1X32, + BFIN_BUILTIN_MULT_1X32X32, + BFIN_BUILTIN_MULT_1X32X32NS, BFIN_BUILTIN_MULHISILL, BFIN_BUILTIN_MULHISILH, @@ -4647,6 +4661,7 @@ enum bfin_builtins BFIN_BUILTIN_LSHIFT_2X16, BFIN_BUILTIN_SSASHIFT_1X16, BFIN_BUILTIN_SSASHIFT_2X16, + BFIN_BUILTIN_SSASHIFT_1X32, BFIN_BUILTIN_CPLX_MUL_16, BFIN_BUILTIN_CPLX_MAC_16, @@ -4755,6 +4770,8 @@ bfin_init_builtins (void) def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int, BFIN_BUILTIN_NORM_1X16); + def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_SUM_2X16); def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi, BFIN_BUILTIN_DIFFHL_2X16); def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi, @@ -4775,10 +4792,18 @@ bfin_init_builtins (void) BFIN_BUILTIN_SSSUB_1X32); def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int, BFIN_BUILTIN_NEG_1X32); + def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int, + BFIN_BUILTIN_ABS_1X32); def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int, BFIN_BUILTIN_NORM_1X32); + def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int, + BFIN_BUILTIN_ROUND_1X32); def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short, BFIN_BUILTIN_MULT_1X32); + def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int, + BFIN_BUILTIN_MULT_1X32X32); + def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int, + BFIN_BUILTIN_MULT_1X32X32NS); /* Shifts. */ def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int, @@ -4789,6 +4814,8 @@ bfin_init_builtins (void) BFIN_BUILTIN_LSHIFT_1X16); def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int, BFIN_BUILTIN_LSHIFT_2X16); + def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSASHIFT_1X32); /* Complex numbers. */ def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi, @@ -4816,6 +4843,7 @@ static const struct builtin_description bdesc_2arg[] = { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 }, { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 }, { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 }, + { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 }, { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 }, { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 }, @@ -4848,12 +4876,14 @@ static const struct builtin_description bdesc_1arg[] = { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 }, { CODE_FOR_signbitssi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 }, + { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 }, { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 }, + { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 }, { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 }, { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 }, { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 }, - { CODE_FOR_absv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 } + { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 } }; /* Errors in the source file can cause expand_expr to return const0_rtx @@ -4985,7 +5015,7 @@ bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); unsigned int fcode = DECL_FUNCTION_CODE (fndecl); tree arg0, arg1, arg2; - rtx op0, op1, op2, accvec, pat, tmp1, tmp2; + rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg; enum machine_mode tmode, mode0; switch (fcode) @@ -4999,10 +5029,12 @@ bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, case BFIN_BUILTIN_DIFFHL_2X16: case BFIN_BUILTIN_DIFFLH_2X16: + case BFIN_BUILTIN_SUM_2X16: arg0 = CALL_EXPR_ARG (exp, 0); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); - icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 - ? CODE_FOR_subhilov2hi3 : CODE_FOR_sublohiv2hi3); + icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3 + : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3 + : CODE_FOR_ssaddhilov2hi3); tmode = insn_data[icode].operand[0].mode; mode0 = insn_data[icode].operand[1].mode; @@ -5023,6 +5055,61 @@ bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, emit_insn (pat); return target; + case BFIN_BUILTIN_MULT_1X32X32: + case BFIN_BUILTIN_MULT_1X32X32NS: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + if (! target + || !register_operand (target, SImode)) + target = gen_reg_rtx (SImode); + + a1reg = gen_rtx_REG (PDImode, REG_A1); + a0reg = gen_rtx_REG (PDImode, REG_A0); + tmp1 = gen_lowpart (V2HImode, op0); + tmp2 = gen_lowpart (V2HImode, op1); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, op0), + gen_lowpart (HImode, op1), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + + if (fcode == BFIN_BUILTIN_MULT_1X32X32) + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2, + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_NONE), + GEN_INT (MACFLAG_M))); + else + { + /* For saturating multiplication, there's exactly one special case + to be handled: multiplying the smallest negative value with + itself. Due to shift correction in fractional multiplies, this + can overflow. Iff this happens, OP2 will contain 1, which, when + added in 32 bits to the smallest negative, wraps to the largest + positive, which is the result we want. */ + op2 = gen_reg_rtx (V2HImode); + emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx)); + emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC), + gen_lowpart (SImode, op2))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2, + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_NONE), + GEN_INT (MACFLAG_M))); + op2 = gen_reg_rtx (SImode); + emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC))); + } + emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1, + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_M))); + emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15))); + emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg)); + if (fcode == BFIN_BUILTIN_MULT_1X32X32NS) + emit_insn (gen_addsi3 (target, target, op2)); + return target; + case BFIN_BUILTIN_CPLX_MUL_16: arg0 = CALL_EXPR_ARG (exp, 0); arg1 = CALL_EXPR_ARG (exp, 1); diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md index 853024cb148..c7eb7d97393 100644 --- a/gcc/config/bfin/bfin.md +++ b/gcc/config/bfin/bfin.md @@ -1362,6 +1362,13 @@ "%0 = abs %1%!" [(set_attr "type" "dsp32")]) +(define_insn "ssabssi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_abs:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = abs %1%!" + [(set_attr "type" "dsp32")]) + (define_insn "negsi2" [(set (match_operand:SI 0 "register_operand" "=d") (neg:SI (match_operand:SI 1 "register_operand" "d")))] @@ -1393,6 +1400,16 @@ "%h0 = signbits %1%!" [(set_attr "type" "dsp32")]) +(define_insn "ssroundsi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (truncate:HI + (lshiftrt:SI (ss_plus:SI (match_operand:SI 1 "register_operand" "d") + (const_int 32768)) + (const_int 16))))] + "" + "%h0 = %1 (RND)%!" + [(set_attr "type" "dsp32")]) + (define_insn "smaxhi3" [(set (match_operand:HI 0 "register_operand" "=d") (smax:HI (match_operand:HI 1 "register_operand" "d") @@ -2374,6 +2391,69 @@ operands[3] = gen_rtx_NE (BImode, operands[1], const0_rtx); }) +;; Same as above, but and CC with the overflow bit generated by the first +;; multiplication. +(define_insn "flag_mul_macv2hi_parts_acconly_andcc0" + [(set (match_operand:PDI 0 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_operand:V2HI 2 "register_operand" "d,d,d") + (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_operand:V2HI 3 "register_operand" "d,d,d") + (parallel [(match_operand 6 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand 10 "const_int_operand" "PB,PA,PA")] + UNSPEC_MUL_WITH_FLAG)) + (set (match_operand:PDI 1 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_dup 2) + (parallel [(match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_dup 3) + (parallel [(match_operand 7 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand:PDI 8 "register_operand" "1,1,1") + (match_operand 9 "const01_operand" "P0P1,P0P1,P0P1") + (match_operand 11 "const_int_operand" "PA,PB,PA")] + UNSPEC_MAC_WITH_FLAG)) + (set (reg:BI REG_CC) + (and:BI (reg:BI REG_CC) + (unspec:BI [(vec_select:HI (match_dup 2) (parallel [(match_dup 4)])) + (vec_select:HI (match_dup 3) (parallel [(match_dup 6)])) + (match_dup 10)] + UNSPEC_MUL_WITH_FLAG)))] + "MACFLAGS_MATCH_P (INTVAL (operands[10]), INTVAL (operands[11]))" +{ + rtx xops[6]; + const char *templates[] = { + "%0 = %h2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;" }; + int alt = (INTVAL (operands[4]) + (INTVAL (operands[5]) << 1) + + (INTVAL (operands[6]) << 2) + (INTVAL (operands[7]) << 3)); + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = operands[2]; + xops[3] = operands[3]; + xops[4] = operands[9]; + xops[5] = which_alternative == 0 ? operands[10] : operands[11]; + output_asm_insn (templates[alt], xops); + return ""; +} + [(set_attr "type" "misc") + (set_attr "length" "6") + (set_attr "seq_insns" "multi")]) (define_expand "bge" [(set (match_dup 1) (match_dup 2)) @@ -3966,9 +4046,9 @@ "%0 = - %1 (V)%!" [(set_attr "type" "dsp32")]) -(define_insn "absv2hi2" +(define_insn "ssabsv2hi2" [(set (match_operand:V2HI 0 "register_operand" "=d") - (abs:V2HI (match_operand:V2HI 1 "register_operand" "d")))] + (ss_abs:V2HI (match_operand:V2HI 1 "register_operand" "d")))] "" "%0 = ABS %1 (V)%!" [(set_attr "type" "dsp32")]) @@ -3978,7 +4058,7 @@ (define_insn "ssashiftv2hi3" [(set (match_operand:V2HI 0 "register_operand" "=d,d,d") (if_then_else:V2HI - (lt (match_operand:SI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d") (match_dup 2)) (ss_ashift:V2HI (match_dup 1) (match_dup 2))))] @@ -3992,7 +4072,7 @@ (define_insn "ssashifthi3" [(set (match_operand:HI 0 "register_operand" "=d,d,d") (if_then_else:HI - (lt (match_operand:SI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) (ashiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d") (match_dup 2)) (ss_ashift:HI (match_dup 1) (match_dup 2))))] @@ -4003,10 +4083,24 @@ %0 = %1 >>> %N2 (V,S)%!" [(set_attr "type" "dsp32")]) +(define_insn "ssashiftsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d,d") + (if_then_else:SI + (lt (match_operand:HI 2 "reg_or_const_int_operand" "d,Ku5,Ks5") (const_int 0)) + (ashiftrt:SI (match_operand:HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ss_ashift:SI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = ASHIFT %1 BY %h2 (S)%! + %0 = %1 << %2 (S)%! + %0 = %1 >>> %N2 (S)%!" + [(set_attr "type" "dsp32")]) + (define_insn "lshiftv2hi3" [(set (match_operand:V2HI 0 "register_operand" "=d,d,d") (if_then_else:V2HI - (lt (match_operand:SI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d") (match_dup 2)) (ashift:V2HI (match_dup 1) (match_dup 2))))] @@ -4020,7 +4114,7 @@ (define_insn "lshifthi3" [(set (match_operand:HI 0 "register_operand" "=d,d,d") (if_then_else:HI - (lt (match_operand:SI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) (lshiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d") (match_dup 2)) (ashift:HI (match_dup 1) (match_dup 2))))] diff --git a/gcc/rtl.def b/gcc/rtl.def index a49f1649e8a..741dd2926d7 100644 --- a/gcc/rtl.def +++ b/gcc/rtl.def @@ -657,6 +657,9 @@ DEF_RTL_EXPR(SS_MINUS, "ss_minus", "ee", RTX_BIN_ARITH) /* Negation with signed saturation. */ DEF_RTL_EXPR(SS_NEG, "ss_neg", "e", RTX_UNARY) +/* Absolute value with signed saturation. */ +DEF_RTL_EXPR(SS_ABS, "ss_abs", "e", RTX_UNARY) + /* Shift left with signed saturation. */ DEF_RTL_EXPR(SS_ASHIFT, "ss_ashift", "ee", RTX_BIN_ARITH)