re PR target/50931 ([avr] Support a 24-bit scalar integer mode)

libgcc/
	PR target/50931
	* config/avr/t-avr (LIB1ASMSRC): Add _mulpsi3, _mulsqipsi3.
	* config/avr/lib1funcs.S (__mulpsi3, __mulsqipsi3): New functions.
gcc/
	PR target/50931
	* config/avr/avr.md (mulpsi3): New expander.
	(*umulqihipsi3, *umulhiqipsi3): New insns.
	(*mulsqipsi3.libgcc, *mulpsi3.libgcc): New insns.
	(mulsqipsi3, *mulpsi3): New insn-and-splits.
	(ashlpsi3): Turn to expander.  Move insn code to...
	(*ashlpsi3): ...this new insn.
testsuite/
	PR target/50931
	* gcc.target/avr/torture/int24-mul.c: New testcase.

From-SVN: r182328
This commit is contained in:
Georg-Johann Lay 2011-12-14 10:00:56 +00:00 committed by Georg-Johann Lay
parent 552d2db565
commit d7288dfb9f
7 changed files with 411 additions and 3 deletions

View File

@ -1,3 +1,13 @@
2011-12-14 Georg-Johann Lay <avr@gjlay.de>
PR target/50931
* config/avr/avr.md (mulpsi3): New expander.
(*umulqihipsi3, *umulhiqipsi3): New insns.
(*mulsqipsi3.libgcc, *mulpsi3.libgcc): New insns.
(mulsqipsi3, *mulpsi3): New insn-and-splits.
(ashlpsi3): Turn to expander. Move insn code to...
(*ashlpsi3): ...this new insn.
2011-12-14 Richard Guenther <rguenther@suse.de>
* tree-cfg.c (replace_uses_by): Only mark blocks altered

View File

@ -2113,7 +2113,7 @@
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
;; To support widening multiplicatioon with constant we postpone
;; To support widening multiplication with constant we postpone
;; expanding to the implicit library call until post combine and
;; prior to register allocation. Clobber all hard registers that
;; might be used by the (widening) multiply until it is split and
@ -2574,6 +2574,132 @@
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; 24-bit multiply
;; To support widening multiplication with constant we postpone
;; expanding to the implicit library call until post combine and
;; prior to register allocation. Clobber all hard registers that
;; might be used by the (widening) multiply until it is split and
;; it's final register footprint is worked out.
(define_expand "mulpsi3"
[(parallel [(set (match_operand:PSI 0 "register_operand" "")
(mult:PSI (match_operand:PSI 1 "register_operand" "")
(match_operand:PSI 2 "nonmemory_operand" "")))
(clobber (reg:HI 26))
(clobber (reg:DI 18))])]
"AVR_HAVE_MUL"
{
if (s8_operand (operands[2], PSImode))
{
rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode));
emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1]));
DONE;
}
})
(define_insn "*umulqihipsi3"
[(set (match_operand:PSI 0 "register_operand" "=&r")
(mult:PSI (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))
(zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))))]
"AVR_HAVE_MUL"
"mul %1,%A2
movw %A0,r0
mul %1,%B2
clr %C0
add %B0,r0
adc %C0,r1
clr __zero_reg__"
[(set_attr "length" "7")
(set_attr "cc" "clobber")])
(define_insn "*umulhiqipsi3"
[(set (match_operand:PSI 0 "register_operand" "=&r")
(mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))
(zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))))]
"AVR_HAVE_MUL"
"mul %1,%A2
movw %A0,r0
mul %1,%B2
add %B0,r0
mov %C0,r1
clr __zero_reg__
adc %C0,__zero_reg__"
[(set_attr "length" "7")
(set_attr "cc" "clobber")])
(define_insn_and_split "mulsqipsi3"
[(set (match_operand:PSI 0 "pseudo_register_operand" "=r")
(mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r"))
(match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn")))
(clobber (reg:HI 26))
(clobber (reg:DI 18))]
"AVR_HAVE_MUL && !reload_completed"
{ gcc_unreachable(); }
"&& 1"
[(set (reg:QI 25)
(match_dup 1))
(set (reg:PSI 22)
(match_dup 2))
(set (reg:PSI 18)
(mult:PSI (sign_extend:PSI (reg:QI 25))
(reg:PSI 22)))
(set (match_dup 0)
(reg:PSI 18))])
(define_insn_and_split "*mulpsi3"
[(set (match_operand:PSI 0 "pseudo_register_operand" "=r")
(mult:PSI (match_operand:PSI 1 "pseudo_register_operand" "r")
(match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn")))
(clobber (reg:HI 26))
(clobber (reg:DI 18))]
"AVR_HAVE_MUL && !reload_completed"
{ gcc_unreachable(); }
"&& 1"
[(set (reg:PSI 18)
(match_dup 1))
(set (reg:PSI 22)
(match_dup 2))
(parallel [(set (reg:PSI 22)
(mult:PSI (reg:PSI 22)
(reg:PSI 18)))
(clobber (reg:QI 21))
(clobber (reg:QI 25))
(clobber (reg:HI 26))])
(set (match_dup 0)
(reg:PSI 22))]
{
if (s8_operand (operands[2], PSImode))
{
rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode));
emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1]));
DONE;
}
})
(define_insn "*mulsqipsi3.libgcc"
[(set (reg:PSI 18)
(mult:PSI (sign_extend:PSI (reg:QI 25))
(reg:PSI 22)))]
"AVR_HAVE_MUL"
"%~call __mulsqipsi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
(define_insn "*mulpsi3.libgcc"
[(set (reg:PSI 22)
(mult:PSI (reg:PSI 22)
(reg:PSI 18)))
(clobber (reg:QI 21))
(clobber (reg:QI 25))
(clobber (reg:HI 26))]
"AVR_HAVE_MUL"
"%~call __mulpsi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; 24-bit signed/unsigned division and modulo.
;; Notice that the libgcc implementation return the quotient in R22
@ -3363,7 +3489,34 @@
(set_attr "adjust_len" "ashlsi")
(set_attr "cc" "none,set_n,clobber,clobber")])
(define_insn "ashlpsi3"
(define_expand "ashlpsi3"
[(parallel [(set (match_operand:PSI 0 "register_operand" "")
(ashift:PSI (match_operand:PSI 1 "register_operand" "")
(match_operand:QI 2 "nonmemory_operand" "")))
(clobber (scratch:QI))])]
""
{
if (AVR_HAVE_MUL
&& CONST_INT_P (operands[2]))
{
if (IN_RANGE (INTVAL (operands[2]), 3, 6))
{
rtx xoffset = force_reg (QImode, gen_int_mode (1 << INTVAL (operands[2]), QImode));
emit_insn (gen_mulsqipsi3 (operands[0], xoffset, operands[1]));
DONE;
}
else if (optimize_insn_for_speed_p ()
&& INTVAL (operands[2]) != 16
&& IN_RANGE (INTVAL (operands[2]), 9, 22))
{
rtx xoffset = force_reg (PSImode, gen_int_mode (1 << INTVAL (operands[2]), PSImode));
emit_insn (gen_mulpsi3 (operands[0], operands[1], xoffset));
DONE;
}
}
})
(define_insn "*ashlpsi3"
[(set (match_operand:PSI 0 "register_operand" "=r,r,r,r")
(ashift:PSI (match_operand:PSI 1 "register_operand" "0,0,r,0")
(match_operand:QI 2 "nonmemory_operand" "r,P,O,n")))

View File

@ -1,3 +1,8 @@
2011-12-14 Georg-Johann Lay <avr@gjlay.de>
PR target/50931
* gcc.target/avr/torture/int24-mul.c: New.
2011-12-14 Dodji Seketeli <dodji@redhat.com>
PR c++/51476

View File

@ -0,0 +1,86 @@
/* { dg-do run } */
/* { dg-options "-w" } */
#include <stdlib.h>
const __pgm __int24 vals[] =
{
0, 1, 2, 3, -1, -2, -3, 0xff, 0x100, 0x101,
0xffL * 0xff, 0xfffL * 0xfff, 0x101010L, 0xaaaaaaL
};
void test_u (void)
{
unsigned int i;
unsigned long la, lb, lc;
__uint24 a, b, c;
int S = sizeof (vals) / sizeof (*vals);
for (i = 0; i < 500; i++)
{
if (i < S*S)
{
a = vals[i / S];
b = vals[i % S];
}
else
{
if (i & 1)
a += 0x7654321L;
else
b += 0x5fe453L;
}
c = a * b;
la = a;
lb = b;
lc = 0xffffff & (la * lb);
if (c != lc)
abort();
}
}
#define TEST_N_U(A1,A2,B) \
do { \
if ((0xffffff & (A1*B)) != A2*B) \
abort(); \
} while (0)
void test_nu (void)
{
unsigned long la;
unsigned int i;
int S = sizeof (vals) / sizeof (*vals);
__uint24 a;
for (i = 0; i < 500; i++)
{
a = i < S
? vals[i % S]
: a + 0x7654321;
la = a;
TEST_N_U (la, a, 2);
TEST_N_U (la, a, 3);
TEST_N_U (la, a, 4);
TEST_N_U (la, a, 5);
TEST_N_U (la, a, 15);
TEST_N_U (la, a, 16);
TEST_N_U (la, a, 128);
TEST_N_U (la, a, 0x1000);
}
}
int main (void)
{
test_u();
test_nu();
exit(0);
return 0;
}

View File

@ -1,3 +1,9 @@
2011-12-14 Georg-Johann Lay <avr@gjlay.de>
PR target/49313
* config/avr/t-avr (LIB1ASMSRC): Add _mulpsi3, _mulsqipsi3.
* config/avr/lib1funcs.S (__mulpsi3, __mulsqipsi3): New functions.
2011-12-11 Eric Botcazou <ebotcazou@adacore.com>
* config/sparc/sol2-unwind.h: Use #ifdef directive consistently.

View File

@ -465,6 +465,153 @@ ENDF __mulsi3
#endif /* __AVR_HAVE_MUL__ */
/*******************************************************
Multiplication 24 x 24
*******************************************************/
#if defined (L_mulpsi3)
;; A[0..2]: In: Multiplicand; Out: Product
#define A0 22
#define A1 A0+1
#define A2 A0+2
;; B[0..2]: In: Multiplier
#define B0 18
#define B1 B0+1
#define B2 B0+2
#if defined (__AVR_HAVE_MUL__)
;; C[0..2]: Expand Result
#define C0 22
#define C1 C0+1
#define C2 C0+2
;; R24:R22 *= R20:R18
;; Clobbers: r21, r25, r26, r27, __tmp_reg__
#define AA0 26
#define AA2 21
DEFUN __mulpsi3
wmov AA0, A0
mov AA2, A2
XCALL __umulhisi3
mul AA2, B0 $ add C2, r0
mul AA0, B2 $ add C2, r0
clr __zero_reg__
ret
ENDF __mulpsi3
#undef AA2
#undef AA0
#undef C2
#undef C1
#undef C0
#else /* !HAVE_MUL */
;; C[0..2]: Expand Result
#define C0 0
#define C1 C0+1
#define C2 21
;; R24:R22 *= R20:R18
;; Clobbers: __tmp_reg__, R18, R19, R20, R21
DEFUN __mulpsi3
;; C[] = 0
clr __tmp_reg__
clr C2
0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
LSR B2 $ ror B1 $ ror B0
;; If the N-th Bit of B[] was set...
brcc 1f
;; ...then add A[] * 2^N to the Result C[]
ADD C0,A0 $ adc C1,A1 $ adc C2,A2
1: ;; Multiply A[] by 2
LSL A0 $ rol A1 $ rol A2
;; Loop until B[] is 0
subi B0,0 $ sbci B1,0 $ sbci B2,0
brne 0b
;; Copy C[] to the return Register A[]
wmov A0, C0
mov A2, C2
clr __zero_reg__
ret
ENDF __mulpsi3
#undef C2
#undef C1
#undef C0
#endif /* HAVE_MUL */
#undef B2
#undef B1
#undef B0
#undef A2
#undef A1
#undef A0
#endif /* L_mulpsi3 */
#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
;; A[0..2]: In: Multiplicand
#define A0 22
#define A1 A0+1
#define A2 A0+2
;; BB: In: Multiplier
#define BB 25
;; C[0..2]: Result
#define C0 18
#define C1 C0+1
#define C2 C0+2
;; C[] = A[] * sign_extend (BB)
DEFUN __mulsqipsi3
mul A0, BB
movw C0, r0
mul A2, BB
mov C2, r0
mul A1, BB
add C1, r0
adc C2, r1
clr __zero_reg__
sbrs BB, 7
ret
;; One-extend BB
sub C1, A0
sbc C2, A1
ret
ENDF __mulsqipsi3
#undef C2
#undef C1
#undef C0
#undef BB
#undef A2
#undef A1
#undef A0
#endif /* L_mulsqipsi3 && HAVE_MUL */
/*******************************************************
Multiplication 64 x 64
*******************************************************/
@ -1342,7 +1489,7 @@ DEFUN __divdi3_moddi3
#endif /* SPEED_DIV */
0: ;; The Prologue
;; Save Z = 12 Registers: Y, 17...8
;; Save 12 Registers: Y, 17...8
;; No Frame needed (X = 0)
clr r26
clr r27

View File

@ -2,6 +2,7 @@ LIB1ASMSRC = avr/lib1funcs.S
LIB1ASMFUNCS = \
_mulqi3 \
_mulhi3 \
_mulpsi3 _mulsqipsi3 \
_mulhisi3 \
_umulhisi3 \
_usmulhisi3 \