re PR target/49687 ([avr] Missed optimization for widening MUL)

PR target/49687
	* config/avr/t-avr (LIB1ASMFUNCS): Remove _xmulhisi3_exit.
	Add _muluhisi3, _mulshisi3, _usmulhisi3.
	* config/avr/libgcc.S (__mulsi3): Rewrite.
	(__mulhisi3): Rewrite.
	(__umulhisi3): Rewrite.
	(__usmulhisi3): New.
	(__muluhisi3): New.
	(__mulshisi3): New.
	(__mulohisi3): New.
	(__mulqi3, __mulqihi3, __umulqihi3, __mulhi3): Use DEFUN/ENDF to
	declare.
	* config/avr/predicates.md (pseudo_register_operand): Rewrite.
	(pseudo_register_or_const_int_operand): New.
	(combine_pseudo_register_operand): New.
	(u16_operand): New.
	(s16_operand): New.
	(o16_operand): New.
	* config/avr/avr.c (avr_rtx_costs): Handle costs for mult:SI.
	* config/avr/avr.md (QIHI, QIHI2): New mode iterators.
	(any_extend, any_extend2): New code iterators.
	(extend_prefix): New code attribute.
	(mulsi3): Rewrite. Turn insn to expander.
	(mulhisi3): Ditto.
	(umulhisi3): Ditto.
	(usmulhisi3): New expander.
	(*mulsi3): New insn-and-split.
	(mulu<mode>si3): New insn-and-split.
	(muls<mode>si3): New insn-and-split.
	(mulohisi3): New insn-and-split.
	(*uumulqihisi3, *uumulhiqisi3, *uumulhihisi3, *uumulqiqisi3,
	*usmulqihisi3, *usmulhiqisi3, *usmulhihisi3, *usmulqiqisi3,
	*sumulqihisi3, *sumulhiqisi3, *sumulhihisi3, *sumulqiqisi3,
	*ssmulqihisi3, *ssmulhiqisi3, *ssmulhihisi3, *ssmulqiqisi3): New
	insn-and-split.
	(*mulsi3_call): Rewrite.
	(*mulhisi3_call): Rewrite.
	(*umulhisi3_call): Rewrite.
	(*usmulhisi3_call): New insn.
	(*muluhisi3_call): New insn.
	(*mulshisi3_call): New insn.
	(*mulohisi3_call): New insn.
	(extendqihi2): Use combine_pseudo_register_operand as predicate
	for operand 1.
	(extendqisi2): Ditto.
	(zero_extendqihi2): Ditto.
	(zero_extendqisi2): Ditto.
	(zero_extendhisi2): Ditto.
	(extendhisi2): Ditto. Don't early-clobber operand 0.

From-SVN: r176862
This commit is contained in:
Georg-Johann Lay 2011-07-28 08:03:07 +00:00 committed by Georg-Johann Lay
parent 2374a88acf
commit 296799ba06
6 changed files with 641 additions and 246 deletions

View File

@ -1,3 +1,55 @@
2011-07-28 Georg-Johann Lay <avr@gjlay.de>
PR target/49687
* config/avr/t-avr (LIB1ASMFUNCS): Remove _xmulhisi3_exit.
Add _muluhisi3, _mulshisi3, _usmulhisi3.
* config/avr/libgcc.S (__mulsi3): Rewrite.
(__mulhisi3): Rewrite.
(__umulhisi3): Rewrite.
(__usmulhisi3): New.
(__muluhisi3): New.
(__mulshisi3): New.
(__mulohisi3): New.
(__mulqi3, __mulqihi3, __umulqihi3, __mulhi3): Use DEFUN/ENDF to
declare.
* config/avr/predicates.md (pseudo_register_operand): Rewrite.
(pseudo_register_or_const_int_operand): New.
(combine_pseudo_register_operand): New.
(u16_operand): New.
(s16_operand): New.
(o16_operand): New.
* config/avr/avr.c (avr_rtx_costs): Handle costs for mult:SI.
* config/avr/avr.md (QIHI, QIHI2): New mode iterators.
(any_extend, any_extend2): New code iterators.
(extend_prefix): New code attribute.
(mulsi3): Rewrite. Turn insn to expander.
(mulhisi3): Ditto.
(umulhisi3): Ditto.
(usmulhisi3): New expander.
(*mulsi3): New insn-and-split.
(mulu<mode>si3): New insn-and-split.
(muls<mode>si3): New insn-and-split.
(mulohisi3): New insn-and-split.
(*uumulqihisi3, *uumulhiqisi3, *uumulhihisi3, *uumulqiqisi3,
*usmulqihisi3, *usmulhiqisi3, *usmulhihisi3, *usmulqiqisi3,
*sumulqihisi3, *sumulhiqisi3, *sumulhihisi3, *sumulqiqisi3,
*ssmulqihisi3, *ssmulhiqisi3, *ssmulhihisi3, *ssmulqiqisi3): New
insn-and-split.
(*mulsi3_call): Rewrite.
(*mulhisi3_call): Rewrite.
(*umulhisi3_call): Rewrite.
(*usmulhisi3_call): New insn.
(*muluhisi3_call): New insn.
(*mulshisi3_call): New insn.
(*mulohisi3_call): New insn.
(extendqihi2): Use combine_pseudo_register_operand as predicate
for operand 1.
(extendqisi2): Ditto.
(zero_extendqihi2): Ditto.
(zero_extendqisi2): Ditto.
(zero_extendhisi2): Ditto.
(extendhisi2): Ditto. Don't early-clobber operand 0.
2011-07-28 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (add->lea splitter): Add SWI mode to PLUS RTX.

View File

@ -5515,6 +5515,34 @@ avr_rtx_costs (rtx x, int codearg, int outer_code ATTRIBUTE_UNUSED, int *total,
return false;
break;
case SImode:
if (AVR_HAVE_MUL)
{
if (!speed)
{
/* Add some additional costs besides CALL like moves etc. */
*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
}
else
{
/* Just a rough estimate. Even with -O2 we don't want bulky
code expanded inline. */
*total = COSTS_N_INSNS (25);
}
}
else
{
if (speed)
*total = COSTS_N_INSNS (300);
else
/* Add some additional costs besides CALL like moves etc. */
*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
}
return true;
default:
return false;
}

View File

@ -127,12 +127,25 @@
(const_int 2))]
(const_int 2)))
;; Define mode iterator
;; Define mode iterators
(define_mode_iterator QIHI [(QI "") (HI "")])
(define_mode_iterator QIHI2 [(QI "") (HI "")])
(define_mode_iterator QISI [(QI "") (HI "") (SI "")])
(define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")])
(define_mode_iterator HIDI [(HI "") (SI "") (DI "")])
(define_mode_iterator HISI [(HI "") (SI "")])
;; Define code iterators
;; Define two incarnations so that we can build the cross product.
(define_code_iterator any_extend [sign_extend zero_extend])
(define_code_iterator any_extend2 [sign_extend zero_extend])
;; Define code attributes
(define_code_attr extend_prefix
[(sign_extend "s")
(zero_extend "u")])
;;========================================================================
;; The following is used by nonlocal_goto and setjmp.
;; The receiver pattern will create no instructions since internally
@ -1350,69 +1363,310 @@
;; Operand 2 (reg:SI 18) not clobbered on the enhanced core.
;; All call-used registers clobbered otherwise - normal library call.
;; To support widening multiplicatioon with constant we postpone
;; expanding to the implicit library call until post combine and
;; prior to register allocation. Clobber all hard registers that
;; might be used by the (widening) multiply until it is split and
;; it's final register footprint is worked out.
(define_expand "mulsi3"
[(set (reg:SI 22) (match_operand:SI 1 "register_operand" ""))
(set (reg:SI 18) (match_operand:SI 2 "register_operand" ""))
(parallel [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
(clobber (reg:HI 26))
(clobber (reg:HI 30))])
(set (match_operand:SI 0 "register_operand" "") (reg:SI 22))]
[(parallel [(set (match_operand:SI 0 "register_operand" "")
(mult:SI (match_operand:SI 1 "register_operand" "")
(match_operand:SI 2 "nonmemory_operand" "")))
(clobber (reg:DI 18))])]
"AVR_HAVE_MUL"
{
if (u16_operand (operands[2], SImode))
{
operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
DONE;
}
if (o16_operand (operands[2], SImode))
{
operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
DONE;
}
})
(define_insn_and_split "*mulsi3"
[(set (match_operand:SI 0 "pseudo_register_operand" "=r")
(mult:SI (match_operand:SI 1 "pseudo_register_operand" "r")
(match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
(clobber (reg:DI 18))]
"AVR_HAVE_MUL && !reload_completed"
{ gcc_unreachable(); }
"&& 1"
[(set (reg:SI 18)
(match_dup 1))
(set (reg:SI 22)
(match_dup 2))
(parallel [(set (reg:SI 22)
(mult:SI (reg:SI 22)
(reg:SI 18)))
(clobber (reg:HI 26))])
(set (match_dup 0)
(reg:SI 22))]
{
if (u16_operand (operands[2], SImode))
{
operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
DONE;
}
if (o16_operand (operands[2], SImode))
{
operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
DONE;
}
})
;; "muluqisi3"
;; "muluhisi3"
(define_insn_and_split "mulu<mode>si3"
[(set (match_operand:SI 0 "pseudo_register_operand" "=r")
(mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
(match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
(clobber (reg:DI 18))]
"AVR_HAVE_MUL && !reload_completed"
{ gcc_unreachable(); }
"&& 1"
[(set (reg:HI 26)
(match_dup 1))
(set (reg:SI 18)
(match_dup 2))
(set (reg:SI 22)
(mult:SI (zero_extend:SI (reg:HI 26))
(reg:SI 18)))
(set (match_dup 0)
(reg:SI 22))]
{
/* Do the QI -> HI extension explicitely before the multiplication. */
/* Do the HI -> SI extension implicitely and after the multiplication. */
if (QImode == <MODE>mode)
operands[1] = gen_rtx_ZERO_EXTEND (HImode, operands[1]);
if (u16_operand (operands[2], SImode))
{
operands[1] = force_reg (HImode, operands[1]);
operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
emit_insn (gen_umulhisi3 (operands[0], operands[1], operands[2]));
DONE;
}
})
;; "mulsqisi3"
;; "mulshisi3"
(define_insn_and_split "muls<mode>si3"
[(set (match_operand:SI 0 "pseudo_register_operand" "=r")
(mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
(match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
(clobber (reg:DI 18))]
"AVR_HAVE_MUL && !reload_completed"
{ gcc_unreachable(); }
"&& 1"
[(set (reg:HI 26)
(match_dup 1))
(set (reg:SI 18)
(match_dup 2))
(set (reg:SI 22)
(mult:SI (sign_extend:SI (reg:HI 26))
(reg:SI 18)))
(set (match_dup 0)
(reg:SI 22))]
{
/* Do the QI -> HI extension explicitely before the multiplication. */
/* Do the HI -> SI extension implicitely and after the multiplication. */
if (QImode == <MODE>mode)
operands[1] = gen_rtx_SIGN_EXTEND (HImode, operands[1]);
if (u16_operand (operands[2], SImode)
|| s16_operand (operands[2], SImode))
{
rtx xop2 = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
operands[1] = force_reg (HImode, operands[1]);
if (u16_operand (operands[2], SImode))
emit_insn (gen_usmulhisi3 (operands[0], xop2, operands[1]));
else
emit_insn (gen_mulhisi3 (operands[0], operands[1], xop2));
DONE;
}
})
;; One-extend operand 1
(define_insn_and_split "mulohisi3"
[(set (match_operand:SI 0 "pseudo_register_operand" "=r")
(mult:SI (not:SI (zero_extend:SI
(not:HI (match_operand:HI 1 "pseudo_register_operand" "r"))))
(match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
(clobber (reg:DI 18))]
"AVR_HAVE_MUL && !reload_completed"
{ gcc_unreachable(); }
"&& 1"
[(set (reg:HI 26)
(match_dup 1))
(set (reg:SI 18)
(match_dup 2))
(set (reg:SI 22)
(mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
(reg:SI 18)))
(set (match_dup 0)
(reg:SI 22))]
"")
(define_expand "mulhisi3"
[(parallel [(set (match_operand:SI 0 "register_operand" "")
(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
(sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
(clobber (reg:DI 18))])]
"AVR_HAVE_MUL"
"")
(define_expand "umulhisi3"
[(parallel [(set (match_operand:SI 0 "register_operand" "")
(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
(zero_extend:SI (match_operand:HI 2 "register_operand" ""))))
(clobber (reg:DI 18))])]
"AVR_HAVE_MUL"
"")
(define_expand "usmulhisi3"
[(parallel [(set (match_operand:SI 0 "register_operand" "")
(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
(sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
(clobber (reg:DI 18))])]
"AVR_HAVE_MUL"
"")
;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3"
;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3"
;; "*sumulqihisi3" "*sumulhiqisi3" "*sumulhihisi3" "*sumulqiqisi3"
;; "*ssmulqihisi3" "*ssmulhiqisi3" "*ssmulhihisi3" "*ssmulqiqisi3"
(define_insn_and_split
"*<any_extend:extend_prefix><any_extend2:extend_prefix>mul<QIHI:mode><QIHI2:mode>si3"
[(set (match_operand:SI 0 "pseudo_register_operand" "=r")
(mult:SI (any_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
(any_extend2:SI (match_operand:QIHI2 2 "pseudo_register_operand" "r"))))
(clobber (reg:DI 18))]
"AVR_HAVE_MUL && !reload_completed"
{ gcc_unreachable(); }
"&& 1"
[(set (reg:HI 18)
(match_dup 1))
(set (reg:HI 26)
(match_dup 2))
(set (reg:SI 22)
(mult:SI (match_dup 3)
(match_dup 4)))
(set (match_dup 0)
(reg:SI 22))]
{
rtx xop1 = operands[1];
rtx xop2 = operands[2];
/* Do the QI -> HI extension explicitely before the multiplication. */
/* Do the HI -> SI extension implicitely and after the multiplication. */
if (QImode == <QIHI:MODE>mode)
xop1 = gen_rtx_fmt_e (<any_extend:CODE>, HImode, xop1);
if (QImode == <QIHI2:MODE>mode)
xop2 = gen_rtx_fmt_e (<any_extend2:CODE>, HImode, xop2);
if (<any_extend:CODE> == <any_extend2:CODE>
|| <any_extend:CODE> == ZERO_EXTEND)
{
operands[1] = xop1;
operands[2] = xop2;
operands[3] = gen_rtx_fmt_e (<any_extend:CODE>, SImode, gen_rtx_REG (HImode, 18));
operands[4] = gen_rtx_fmt_e (<any_extend2:CODE>, SImode, gen_rtx_REG (HImode, 26));
}
else
{
/* <any_extend:CODE> = SIGN_EXTEND */
/* <any_extend2:CODE> = ZERO_EXTEND */
operands[1] = xop2;
operands[2] = xop1;
operands[3] = gen_rtx_ZERO_EXTEND (SImode, gen_rtx_REG (HImode, 18));
operands[4] = gen_rtx_SIGN_EXTEND (SImode, gen_rtx_REG (HImode, 26));
}
})
(define_insn "*mulsi3_call"
[(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
(clobber (reg:HI 26))
(clobber (reg:HI 30))]
[(set (reg:SI 22)
(mult:SI (reg:SI 22)
(reg:SI 18)))
(clobber (reg:HI 26))]
"AVR_HAVE_MUL"
"%~call __mulsi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
(define_expand "mulhisi3"
[(set (reg:HI 18)
(match_operand:HI 1 "register_operand" ""))
(set (reg:HI 20)
(match_operand:HI 2 "register_operand" ""))
(set (reg:SI 22)
(mult:SI (sign_extend:SI (reg:HI 18))
(sign_extend:SI (reg:HI 20))))
(set (match_operand:SI 0 "register_operand" "")
(reg:SI 22))]
"AVR_HAVE_MUL"
"")
(define_expand "umulhisi3"
[(set (reg:HI 18)
(match_operand:HI 1 "register_operand" ""))
(set (reg:HI 20)
(match_operand:HI 2 "register_operand" ""))
(set (reg:SI 22)
(mult:SI (zero_extend:SI (reg:HI 18))
(zero_extend:SI (reg:HI 20))))
(set (match_operand:SI 0 "register_operand" "")
(reg:SI 22))]
"AVR_HAVE_MUL"
"")
(define_insn "*mulhisi3_call"
[(set (reg:SI 22)
[(set (reg:SI 22)
(mult:SI (sign_extend:SI (reg:HI 18))
(sign_extend:SI (reg:HI 20))))]
(sign_extend:SI (reg:HI 26))))]
"AVR_HAVE_MUL"
"%~call __mulhisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
(define_insn "*umulhisi3_call"
[(set (reg:SI 22)
[(set (reg:SI 22)
(mult:SI (zero_extend:SI (reg:HI 18))
(zero_extend:SI (reg:HI 20))))]
(zero_extend:SI (reg:HI 26))))]
"AVR_HAVE_MUL"
"%~call __umulhisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
(define_insn "*usmulhisi3_call"
[(set (reg:SI 22)
(mult:SI (zero_extend:SI (reg:HI 18))
(sign_extend:SI (reg:HI 26))))]
"AVR_HAVE_MUL"
"%~call __usmulhisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
(define_insn "*muluhisi3_call"
[(set (reg:SI 22)
(mult:SI (zero_extend:SI (reg:HI 26))
(reg:SI 18)))]
"AVR_HAVE_MUL"
"%~call __muluhisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
(define_insn "*mulshisi3_call"
[(set (reg:SI 22)
(mult:SI (sign_extend:SI (reg:HI 26))
(reg:SI 18)))]
"AVR_HAVE_MUL"
"%~call __mulshisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
(define_insn "*mulohisi3_call"
[(set (reg:SI 22)
(mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
(reg:SI 18)))]
"AVR_HAVE_MUL"
"%~call __mulohisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / %
; divmod
@ -2400,9 +2654,16 @@
;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x
;; sign extend
;; We keep combiner from inserting hard registers into the input of sign- and
;; zero-extends. A hard register in the input operand is not wanted because
;; 32-bit multiply patterns clobber some hard registers and extends with a
;; hard register that overlaps these clobbers won't be combined to a widening
;; multiplication. There is no need for combine to propagate hard registers,
;; register allocation can do it just as well.
(define_insn "extendqihi2"
[(set (match_operand:HI 0 "register_operand" "=r,r")
(sign_extend:HI (match_operand:QI 1 "register_operand" "0,*r")))]
(sign_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
""
"@
clr %B0\;sbrc %0,7\;com %B0
@ -2412,7 +2673,7 @@
(define_insn "extendqisi2"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(sign_extend:SI (match_operand:QI 1 "register_operand" "0,*r")))]
(sign_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
""
"@
clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0
@ -2421,8 +2682,8 @@
(set_attr "cc" "set_n,set_n")])
(define_insn "extendhisi2"
[(set (match_operand:SI 0 "register_operand" "=r,&r")
(sign_extend:SI (match_operand:HI 1 "register_operand" "0,*r")))]
[(set (match_operand:SI 0 "register_operand" "=r,r")
(sign_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r")))]
""
"@
clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0
@ -2439,7 +2700,7 @@
(define_insn_and_split "zero_extendqihi2"
[(set (match_operand:HI 0 "register_operand" "=r")
(zero_extend:HI (match_operand:QI 1 "register_operand" "r")))]
(zero_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
""
"#"
"reload_completed"
@ -2455,7 +2716,7 @@
(define_insn_and_split "zero_extendqisi2"
[(set (match_operand:SI 0 "register_operand" "=r")
(zero_extend:SI (match_operand:QI 1 "register_operand" "r")))]
(zero_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
""
"#"
"reload_completed"
@ -2470,8 +2731,8 @@
})
(define_insn_and_split "zero_extendhisi2"
[(set (match_operand:SI 0 "register_operand" "=r")
(zero_extend:SI (match_operand:HI 1 "register_operand" "r")))]
[(set (match_operand:SI 0 "register_operand" "=r")
(zero_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))]
""
"#"
"reload_completed"

View File

@ -72,10 +72,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
.endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
#if !defined (__AVR_HAVE_MUL__)
/*******************************************************
Multiplication 8 x 8
Multiplication 8 x 8 without MUL
*******************************************************/
#if defined (L_mulqi3)
@ -83,9 +84,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define r_arg1 r24 /* multiplier */
#define r_res __tmp_reg__ /* result */
.global __mulqi3
.func __mulqi3
__mulqi3:
DEFUN __mulqi3
clr r_res ; clear result
__mulqi3_loop:
sbrc r_arg1,0
@ -97,18 +96,16 @@ __mulqi3_loop:
__mulqi3_exit:
mov r_arg1,r_res ; result to return register
ret
ENDF __mulqi3
#undef r_arg2
#undef r_arg1
#undef r_res
.endfunc
#endif /* defined (L_mulqi3) */
#if defined (L_mulqihi3)
.global __mulqihi3
.func __mulqihi3
__mulqihi3:
DEFUN __mulqihi3
clr r25
sbrc r24, 7
dec r25
@ -116,21 +113,19 @@ __mulqihi3:
sbrc r22, 7
dec r22
rjmp __mulhi3
.endfunc
ENDF __mulqihi3:
#endif /* defined (L_mulqihi3) */
#if defined (L_umulqihi3)
.global __umulqihi3
.func __umulqihi3
__umulqihi3:
DEFUN __umulqihi3
clr r25
clr r23
rjmp __mulhi3
.endfunc
ENDF __umulqihi3
#endif /* defined (L_umulqihi3) */
/*******************************************************
Multiplication 16 x 16
Multiplication 16 x 16 without MUL
*******************************************************/
#if defined (L_mulhi3)
#define r_arg1L r24 /* multiplier Low */
@ -140,9 +135,7 @@ __umulqihi3:
#define r_resL __tmp_reg__ /* result Low */
#define r_resH r21 /* result High */
.global __mulhi3
.func __mulhi3
__mulhi3:
DEFUN __mulhi3
clr r_resH ; clear result
clr r_resL ; clear result
__mulhi3_loop:
@ -166,6 +159,7 @@ __mulhi3_exit:
mov r_arg1H,r_resH ; result to return register
mov r_arg1L,r_resL
ret
ENDF __mulhi3
#undef r_arg1L
#undef r_arg1H
@ -174,168 +168,51 @@ __mulhi3_exit:
#undef r_resL
#undef r_resH
.endfunc
#endif /* defined (L_mulhi3) */
#endif /* !defined (__AVR_HAVE_MUL__) */
/*******************************************************
Widening Multiplication 32 = 16 x 16
Widening Multiplication 32 = 16 x 16 without MUL
*******************************************************/
#if defined (L_mulhisi3)
DEFUN __mulhisi3
#if defined (__AVR_HAVE_MUL__)
;; r25:r22 = r19:r18 * r21:r20
#define A0 18
#define B0 20
#define C0 22
#define A1 A0+1
#define B1 B0+1
#define C1 C0+1
#define C2 C0+2
#define C3 C0+3
; C = (signed)A1 * (signed)B1
muls A1, B1
movw C2, R0
; C += A0 * B0
mul A0, B0
movw C0, R0
; C += (signed)A1 * B0
mulsu A1, B0
sbci C3, 0
add C1, R0
adc C2, R1
clr __zero_reg__
adc C3, __zero_reg__
; C += (signed)B1 * A0
mulsu B1, A0
sbci C3, 0
XJMP __xmulhisi3_exit
#undef A0
#undef A1
#undef B0
#undef B1
#undef C0
#undef C1
#undef C2
#undef C3
#else /* !__AVR_HAVE_MUL__ */
;;; FIXME: This is dead code (noone calls it)
mov_l r18, r24
mov_h r19, r25
clr r24
sbrc r23, 7
dec r24
mov r25, r24
clr r20
sbrc r19, 7
dec r20
mov r21, r20
XJMP __mulsi3
#endif /* __AVR_HAVE_MUL__ */
mov_l r18, r24
mov_h r19, r25
clr r24
sbrc r23, 7
dec r24
mov r25, r24
clr r20
sbrc r19, 7
dec r20
mov r21, r20
XJMP __mulsi3
ENDF __mulhisi3
#endif /* defined (L_mulhisi3) */
#if defined (L_umulhisi3)
DEFUN __umulhisi3
#if defined (__AVR_HAVE_MUL__)
;; r25:r22 = r19:r18 * r21:r20
#define A0 18
#define B0 20
#define C0 22
#define A1 A0+1
#define B1 B0+1
#define C1 C0+1
#define C2 C0+2
#define C3 C0+3
; C = A1 * B1
mul A1, B1
movw C2, R0
; C += A0 * B0
mul A0, B0
movw C0, R0
; C += A1 * B0
mul A1, B0
add C1, R0
adc C2, R1
clr __zero_reg__
adc C3, __zero_reg__
; C += B1 * A0
mul B1, A0
XJMP __xmulhisi3_exit
#undef A0
#undef A1
#undef B0
#undef B1
#undef C0
#undef C1
#undef C2
#undef C3
#else /* !__AVR_HAVE_MUL__ */
;;; FIXME: This is dead code (noone calls it)
mov_l r18, r24
mov_h r19, r25
clr r24
clr r25
clr r20
clr r21
XJMP __mulsi3
#endif /* __AVR_HAVE_MUL__ */
mov_l r18, r24
mov_h r19, r25
clr r24
clr r25
mov_l r20, r24
mov_h r21, r25
XJMP __mulsi3
ENDF __umulhisi3
#endif /* defined (L_umulhisi3) */
#if defined (L_xmulhisi3_exit)
;;; Helper for __mulhisi3 resp. __umulhisi3.
#define C0 22
#define C1 C0+1
#define C2 C0+2
#define C3 C0+3
DEFUN __xmulhisi3_exit
add C1, R0
adc C2, R1
clr __zero_reg__
adc C3, __zero_reg__
ret
ENDF __xmulhisi3_exit
#undef C0
#undef C1
#undef C2
#undef C3
#endif /* defined (L_xmulhisi3_exit) */
#if defined (L_mulsi3)
/*******************************************************
Multiplication 32 x 32
Multiplication 32 x 32 without MUL
*******************************************************/
#define r_arg1L r22 /* multiplier Low */
#define r_arg1H r23
#define r_arg1HL r24
#define r_arg1HH r25 /* multiplier High */
#define r_arg2L r18 /* multiplicand Low */
#define r_arg2H r19
#define r_arg2HL r20
@ -346,43 +223,7 @@ ENDF __xmulhisi3_exit
#define r_resHL r30
#define r_resHH r31 /* result High */
.global __mulsi3
.func __mulsi3
__mulsi3:
#if defined (__AVR_HAVE_MUL__)
mul r_arg1L, r_arg2L
movw r_resL, r0
mul r_arg1H, r_arg2H
movw r_resHL, r0
mul r_arg1HL, r_arg2L
add r_resHL, r0
adc r_resHH, r1
mul r_arg1L, r_arg2HL
add r_resHL, r0
adc r_resHH, r1
mul r_arg1HH, r_arg2L
add r_resHH, r0
mul r_arg1HL, r_arg2H
add r_resHH, r0
mul r_arg1H, r_arg2HL
add r_resHH, r0
mul r_arg1L, r_arg2HH
add r_resHH, r0
clr r_arg1HH ; use instead of __zero_reg__ to add carry
mul r_arg1H, r_arg2L
add r_resH, r0
adc r_resHL, r1
adc r_resHH, r_arg1HH ; add carry
mul r_arg1L, r_arg2H
add r_resH, r0
adc r_resHL, r1
adc r_resHH, r_arg1HH ; add carry
movw r_arg1L, r_resL
movw r_arg1HL, r_resHL
clr r1 ; __zero_reg__ clobbered by "mul"
ret
#else
DEFUN __mulsi3
clr r_resHH ; clear result
clr r_resHL ; clear result
clr r_resH ; clear result
@ -414,13 +255,13 @@ __mulsi3_exit:
mov_h r_arg1H,r_resH
mov_l r_arg1L,r_resL
ret
#endif /* defined (__AVR_HAVE_MUL__) */
ENDF __mulsi3
#undef r_arg1L
#undef r_arg1H
#undef r_arg1HL
#undef r_arg1HH
#undef r_arg2L
#undef r_arg2H
#undef r_arg2HL
@ -431,9 +272,181 @@ __mulsi3_exit:
#undef r_resHL
#undef r_resHH
.endfunc
#endif /* defined (L_mulsi3) */
#endif /* !defined (__AVR_HAVE_MUL__) */
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
#if defined (__AVR_HAVE_MUL__)
#define A0 26
#define B0 18
#define C0 22
#define A1 A0+1
#define B1 B0+1
#define B2 B0+2
#define B3 B0+3
#define C1 C0+1
#define C2 C0+2
#define C3 C0+3
/*******************************************************
Widening Multiplication 32 = 16 x 16
*******************************************************/
#if defined (L_mulhisi3)
;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
;;; Clobbers: __tmp_reg__
DEFUN __mulhisi3
XCALL __umulhisi3
;; Sign-extend B
tst B1
brpl 1f
sub C2, A0
sbc C3, A1
1: ;; Sign-extend A
XJMP __usmulhisi3_tail
ENDF __mulhisi3
#endif /* L_mulhisi3 */
#if defined (L_usmulhisi3)
;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
;;; Clobbers: __tmp_reg__
DEFUN __usmulhisi3
XCALL __umulhisi3
;; FALLTHRU
ENDF __usmulhisi3
DEFUN __usmulhisi3_tail
;; Sign-extend A
sbrs A1, 7
ret
sub C2, B0
sbc C3, B1
ret
ENDF __usmulhisi3_tail
#endif /* L_usmulhisi3 */
#if defined (L_umulhisi3)
;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
;;; Clobbers: __tmp_reg__
DEFUN __umulhisi3
mul A0, B0
movw C0, r0
mul A1, B1
movw C2, r0
mul A0, B1
rcall 1f
mul A1, B0
1: add C1, r0
adc C2, r1
clr __zero_reg__
adc C3, __zero_reg__
ret
ENDF __umulhisi3
#endif /* L_umulhisi3 */
/*******************************************************
Widening Multiplication 32 = 16 x 32
*******************************************************/
#if defined (L_mulshisi3)
;;; R25:R22 = (signed long) R27:R26 * R21:R18
;;; (C3:C0) = (signed long) A1:A0 * B3:B0
;;; Clobbers: __tmp_reg__
DEFUN __mulshisi3
#ifdef __AVR_HAVE_JMP_CALL__
;; Some cores have problem skipping 2-word instruction
tst A1
brmi __mulohisi3
#else
sbrs A1, 7
#endif /* __AVR_HAVE_JMP_CALL__ */
XJMP __muluhisi3
;; FALLTHRU
ENDF __mulshisi3
;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
;;; Clobbers: __tmp_reg__
DEFUN __mulohisi3
XCALL __muluhisi3
;; One-extend R27:R26 (A1:A0)
sub C2, B0
sbc C3, B1
ret
ENDF __mulohisi3
#endif /* L_mulshisi3 */
#if defined (L_muluhisi3)
;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
;;; Clobbers: __tmp_reg__
DEFUN __muluhisi3
XCALL __umulhisi3
mul A0, B3
add C3, r0
mul A1, B2
add C3, r0
mul A0, B2
add C2, r0
adc C3, r1
clr __zero_reg__
ret
ENDF __muluhisi3
#endif /* L_muluhisi3 */
/*******************************************************
Multiplication 32 x 32
*******************************************************/
#if defined (L_mulsi3)
;;; R25:R22 = R25:R22 * R21:R18
;;; (C3:C0) = C3:C0 * B3:B0
;;; Clobbers: R26, R27, __tmp_reg__
DEFUN __mulsi3
movw A0, C0
push C2
push C3
XCALL __muluhisi3
pop A1
pop A0
;; A1:A0 now contains the high word of A
mul A0, B0
add C2, r0
adc C3, r1
mul A0, B1
add C3, r0
mul A1, B0
add C3, r0
clr __zero_reg__
ret
ENDF __mulsi3
#endif /* L_mulsi3 */
#undef A0
#undef A1
#undef B0
#undef B1
#undef B2
#undef B3
#undef C0
#undef C1
#undef C2
#undef C3
#endif /* __AVR_HAVE_MUL__ */
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*******************************************************
Division 8 / 8 => (result + remainder)
*******************************************************/

View File

@ -155,10 +155,34 @@
(ior (match_test "register_operand (XEXP (op, 0), mode)")
(match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))"))))
;; For some insns we must ensure that no hard register is inserted
;; into their operands because the insns are split and the split
;; involves hard registers. An example are divmod insn that are
;; split to insns that represent implicit library calls.
;; True for register that is pseudo register.
(define_predicate "pseudo_register_operand"
(and (match_code "reg")
(match_test "!HARD_REGISTER_P (op)")))
(and (match_operand 0 "register_operand")
(not (and (match_code "reg")
(match_test "HARD_REGISTER_P (op)")))))
;; True for operand that is pseudo register or CONST_INT.
(define_predicate "pseudo_register_or_const_int_operand"
(ior (match_operand 0 "const_int_operand")
(match_operand 0 "pseudo_register_operand")))
;; We keep combiner from inserting hard registers into the input of sign- and
;; zero-extends. A hard register in the input operand is not wanted because
;; 32-bit multiply patterns clobber some hard registers and extends with a
;; hard register that overlaps these clobbers won't combine to a widening
;; multiplication. There is no need for combine to propagate or insert
;; hard registers, register allocation can do it just as well.
;; True for operand that is pseudo register at combine time.
(define_predicate "combine_pseudo_register_operand"
(ior (match_operand 0 "pseudo_register_operand")
(and (match_operand 0 "register_operand")
(match_test "reload_completed || reload_in_progress"))))
;; Return true if OP is a constant integer that is either
;; 8 or 16 or 24.
@ -189,3 +213,18 @@
(define_predicate "register_or_s9_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "s9_operand")))
;; Unsigned CONST_INT that fits in 16 bits, i.e. 0..65536.
(define_predicate "u16_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, (1<<16)-1)")))
;; Signed CONST_INT that fits in 16 bits, i.e. -32768..32767.
(define_predicate "s16_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), -(1<<15), (1<<15)-1)")))
;; One-extended CONST_INT that fits in 16 bits, i.e. -65536..-1.
(define_predicate "o16_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), -(1<<16), -1)")))

View File

@ -41,7 +41,9 @@ LIB1ASMFUNCS = \
_mulhi3 \
_mulhisi3 \
_umulhisi3 \
_xmulhisi3_exit \
_usmulhisi3 \
_muluhisi3 \
_mulshisi3 \
_mulsi3 \
_udivmodqi4 \
_divmodqi4 \