divtab-sh4.c, [...]: New files.
2006-03-23 J"orn Rennecke <joern.rennecke@st.com> * config/sh/divtab-sh4.c, config/sh/divcost-analysis: New files. * config/sh/lib1funcs.asm (div_table): Add !__SH5__ variant. * config/sh/t-sh (LIB1ASMFUNCS): Add _div_table. * config/sh/sh.opt (mdiv=): Amend description. * config/sh/sh.h (TARGET_DIVIDE_CALL_DIV1): New macro. (TARGET_DIVIDE_CALL_FP, TARGET_DIVIDE_CALL_TABLE): Likewise. (sh_divide_strategy_e): Add new members SH_DIV_CALL_DIV1, SH_DIV_CALL_FP, SH_DIV_CALL_TABLE and SH_DIV_INTRINSIC. (OVERRIDE_OPTIONS): Also process sh_div_str for TARGET_SH1. Calculate sh_divsi3_libfunc using TARGET_DIVIDE_* macros. * config/sh/sh.md (udivsi3_i4_int, divsi3_i4_int): New patterns. (udivsi3, divsi3): Use them. Check TARGET_DIVIDE_CALL_TABLE / TARGET_DIVIDE_CALL_FP. From-SVN: r112331
This commit is contained in:
parent
a57aee2ab6
commit
b368d6b8df
@ -1,3 +1,19 @@
|
||||
2006-03-23 J"orn Rennecke <joern.rennecke@st.com>
|
||||
|
||||
* config/sh/divtab-sh4.c, config/sh/divcost-analysis: New files.
|
||||
* config/sh/lib1funcs.asm (div_table): Add !__SH5__ variant.
|
||||
* config/sh/t-sh (LIB1ASMFUNCS): Add _div_table.
|
||||
* config/sh/sh.opt (mdiv=): Amend description.
|
||||
* config/sh/sh.h (TARGET_DIVIDE_CALL_DIV1): New macro.
|
||||
(TARGET_DIVIDE_CALL_FP, TARGET_DIVIDE_CALL_TABLE): Likewise.
|
||||
(sh_divide_strategy_e): Add new members SH_DIV_CALL_DIV1,
|
||||
SH_DIV_CALL_FP, SH_DIV_CALL_TABLE and SH_DIV_INTRINSIC.
|
||||
(OVERRIDE_OPTIONS): Also process sh_div_str for TARGET_SH1.
|
||||
Calculate sh_divsi3_libfunc using TARGET_DIVIDE_* macros.
|
||||
* config/sh/sh.md (udivsi3_i4_int, divsi3_i4_int): New patterns.
|
||||
(udivsi3, divsi3): Use them. Check TARGET_DIVIDE_CALL_TABLE /
|
||||
TARGET_DIVIDE_CALL_FP.
|
||||
|
||||
2006-03-23 Maxim Kuvyrkov <mkuvyrkov@ispras.ru>
|
||||
|
||||
* haifa-sched.c (choose_ready): Fix type of the local variable.
|
||||
|
76
gcc/config/sh/divcost-analysis
Normal file
76
gcc/config/sh/divcost-analysis
Normal file
@ -0,0 +1,76 @@
|
||||
Analysis of cycle costs for SH4:
|
||||
|
||||
-> udiv_le128: 5
|
||||
-> udiv_ge64k: 6
|
||||
-> udiv udiv_25: 10
|
||||
-> pos_divisor: 3
|
||||
-> pos_result linear: 5
|
||||
-> pos_result - -: 5
|
||||
-> div_le128: 7
|
||||
-> div_ge64k: 9
|
||||
sdivsi3 -> udiv_25 13
|
||||
udiv25 -> div_ge64k_end: 15
|
||||
div_ge64k_end -> rts: 13
|
||||
div_le128 -> div_le128_2: 2, r1 latency 3
|
||||
udiv_le128 -> div_le128_2: 2, r1 latency 3
|
||||
(u)div_le128 -> div_by_1: 9
|
||||
(u)div_le128 -> rts: 17
|
||||
div_by_1(_neg) -> rts: 4
|
||||
div_ge64k -> div_r8: 2
|
||||
div_ge64k -> div_ge64k_2: 3
|
||||
udiv_ge64k -> udiv_r8: 3
|
||||
udiv_ge64k -> div_ge64k_2: 3 + LS
|
||||
(u)div_ge64k -> div_ge64k_end: 13
|
||||
div_r8 -> div_r8_2: 2
|
||||
udiv_r8 -> div_r8_2: 2 + LS
|
||||
(u)div_r8 -> rts: 21
|
||||
|
||||
-> - + neg_result: 5
|
||||
-> + - neg_result: 5
|
||||
-> div_le128_neg: 7
|
||||
-> div_ge64k_neg: 9
|
||||
-> div_r8_neg: 11
|
||||
-> <64k div_ge64k_neg_end: 28
|
||||
-> >=64k div_ge64k_neg_end: 22
|
||||
div_ge64k_neg_end ft -> rts: 14
|
||||
div_r8_neg_end -> rts: 4
|
||||
div_r8_neg -> div_r8_neg_end: 18
|
||||
div_le128_neg -> div_by_1_neg: 4
|
||||
div_le128_neg -> rts 18
|
||||
|
||||
absolute divisor range:
|
||||
1 [2..128] [129..64K) [64K..|divident|/256] >=64K,>|divident/256|
|
||||
udiv 18 22 38 32 30
|
||||
sdiv pos: 20 24 41 35 32
|
||||
sdiv neg: 15 25 42 36 33
|
||||
|
||||
|
||||
fp-based:
|
||||
|
||||
unsigned: 42 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
|
||||
signed: 33 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
|
||||
|
||||
call-div1: divisor range:
|
||||
[1..64K) >= 64K
|
||||
unsigned: 63 58
|
||||
signed: 76 76
|
||||
|
||||
SFUNC_STATIC call overhead:
|
||||
mov.l 0f,r1
|
||||
bsrf r1
|
||||
|
||||
SFUNC_GOT call overhead - current:
|
||||
mov.l 0f,r1
|
||||
mova 0f,r0
|
||||
mov.l 1f,r2
|
||||
add r1,r0
|
||||
mov.l @(r0,r2),r0
|
||||
jmp @r0
|
||||
; 3 cycles worse than SFUNC_STATIC
|
||||
|
||||
SFUNC_GOT call overhead - improved assembler:
|
||||
mov.l 0f,r1
|
||||
mova 0f,r0
|
||||
mov.l @(r0,r1),r0
|
||||
jmp @r0
|
||||
; 2 cycles worse than SFUNC_STATIC
|
90
gcc/config/sh/divtab-sh4.c
Normal file
90
gcc/config/sh/divtab-sh4.c
Normal file
@ -0,0 +1,90 @@
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
|
||||
This file is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version.
|
||||
|
||||
In addition to the permissions in the GNU General Public License, the
|
||||
Free Software Foundation gives you unlimited permission to link the
|
||||
compiled version of this file into combinations with other programs,
|
||||
and to distribute those combinations without any restriction coming
|
||||
from the use of this file. (The General Public License restrictions
|
||||
do apply in other respects; for example, they cover modification of
|
||||
the file, and distribution when not linked into a combine
|
||||
executable.)
|
||||
|
||||
This file is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* Calculate division table for SH2..4 integer division
|
||||
Contributed by Joern Rernnecke
|
||||
joern.rennecke@superh.com */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i, j;
|
||||
double q, r, err, max_err = 0, max_s_err = 0;
|
||||
|
||||
puts("/* This table has been generated by divtab-sh4.c. */");
|
||||
puts ("\t.balign 4");
|
||||
puts ("LOCAL(div_table_clz):");
|
||||
/* output some dummy number for 1/0. */
|
||||
printf ("\t.byte\t%d\n", 0);
|
||||
for (i = 1; i <= 128; i++)
|
||||
{
|
||||
int n = 0;
|
||||
if (i == 128)
|
||||
puts ("\
|
||||
/* Lookup table translating positive divisor to index into table of\n\
|
||||
normalized inverse. N.B. the '0' entry is also the last entry of the\n\
|
||||
previous table, and causes an unaligned access for division by zero. */\n\
|
||||
LOCAL(div_table_ix):");
|
||||
for (j = i; j <= 128; j += j)
|
||||
n++;
|
||||
printf ("\t.byte\t%d\n", n - 7);
|
||||
}
|
||||
for (i = 1; i <= 128; i++)
|
||||
{
|
||||
j = i < 0 ? -i : i;
|
||||
while (j < 128)
|
||||
j += j;
|
||||
printf ("\t.byte\t%d\n", j * 2 - 96*4);
|
||||
}
|
||||
puts("\
|
||||
/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */\n\
|
||||
.balign 4\n\
|
||||
LOCAL(zero_l):");
|
||||
for (i = 64; i < 128; i++)
|
||||
{
|
||||
if (i == 96)
|
||||
puts ("LOCAL(div_table):");
|
||||
q = 4.*(1<<30)*128/i;
|
||||
r = ceil (q);
|
||||
/* The value for 64 is actually differently scaled that it would
|
||||
appear from this calculation. The implicit part is %01, not 10.
|
||||
Still, since the value in the table is 0 either way, this
|
||||
doesn't matter here. Still, the 1/64 entry is effectively a 1/128
|
||||
entry. */
|
||||
printf ("\t.long\t0x%X\n", (unsigned) r);
|
||||
err = r - q;
|
||||
if (err > max_err)
|
||||
max_err = err;
|
||||
err = err * i / 128;
|
||||
if (err > max_s_err)
|
||||
max_s_err = err;
|
||||
}
|
||||
printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err);
|
||||
exit (0);
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
|
||||
2004, 2005
|
||||
2004, 2005, 2006
|
||||
Free Software Foundation, Inc.
|
||||
|
||||
This file is free software; you can redistribute it and/or modify it
|
||||
@ -3019,8 +3019,8 @@ GLOBAL(GCC_pop_shmedia_regs_nofpu):
|
||||
#endif /* __SH5__ == 32 */
|
||||
#endif /* L_push_pop_shmedia_regs */
|
||||
|
||||
#if __SH5__
|
||||
#ifdef L_div_table
|
||||
#if __SH5__
|
||||
#if defined(__pic__) && defined(__SHMEDIA__)
|
||||
.global GLOBAL(sdivsi3)
|
||||
FUNC(GLOBAL(sdivsi3))
|
||||
@ -3247,5 +3247,632 @@ GLOBAL(div_table):
|
||||
.word 17738
|
||||
.word 17136
|
||||
.word 16639
|
||||
|
||||
#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
|
||||
/* This code used shld, thus is not suitable for SH1 / SH2. */
|
||||
|
||||
/* Signed / unsigned division without use of FPU, optimized for SH4.
|
||||
Uses a lookup table for divisors in the range -128 .. +128, and
|
||||
div1 with case distinction for larger divisors in three more ranges.
|
||||
The code is lumped together with the table to allow the use of mova. */
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
#define L_LSB 0
|
||||
#define L_LSWMSB 1
|
||||
#define L_MSWLSB 2
|
||||
#else
|
||||
#define L_LSB 3
|
||||
#define L_LSWMSB 2
|
||||
#define L_MSWLSB 1
|
||||
#endif
|
||||
|
||||
.balign 4
|
||||
.global GLOBAL(udivsi3_i4i)
|
||||
FUNC(GLOBAL(udivsi3_i4i))
|
||||
GLOBAL(udivsi3_i4i):
|
||||
mov.w LOCAL(c128_w), r1
|
||||
div0u
|
||||
mov r4,r0
|
||||
shlr8 r0
|
||||
cmp/hi r1,r5
|
||||
extu.w r5,r1
|
||||
bf LOCAL(udiv_le128)
|
||||
cmp/eq r5,r1
|
||||
bf LOCAL(udiv_ge64k)
|
||||
shlr r0
|
||||
mov r5,r1
|
||||
shll16 r5
|
||||
mov.l r4,@-r15
|
||||
div1 r5,r0
|
||||
mov.l r1,@-r15
|
||||
div1 r5,r0
|
||||
div1 r5,r0
|
||||
bra LOCAL(udiv_25)
|
||||
div1 r5,r0
|
||||
|
||||
LOCAL(div_le128):
|
||||
mova LOCAL(div_table_ix),r0
|
||||
bra LOCAL(div_le128_2)
|
||||
mov.b @(r0,r5),r1
|
||||
LOCAL(udiv_le128):
|
||||
mov.l r4,@-r15
|
||||
mova LOCAL(div_table_ix),r0
|
||||
mov.b @(r0,r5),r1
|
||||
mov.l r5,@-r15
|
||||
LOCAL(div_le128_2):
|
||||
mova LOCAL(div_table_inv),r0
|
||||
mov.l @(r0,r1),r1
|
||||
mov r5,r0
|
||||
tst #0xfe,r0
|
||||
mova LOCAL(div_table_clz),r0
|
||||
dmulu.l r1,r4
|
||||
mov.b @(r0,r5),r1
|
||||
bt/s LOCAL(div_by_1)
|
||||
mov r4,r0
|
||||
mov.l @r15+,r5
|
||||
sts mach,r0
|
||||
/* clrt */
|
||||
addc r4,r0
|
||||
mov.l @r15+,r4
|
||||
rotcr r0
|
||||
rts
|
||||
shld r1,r0
|
||||
|
||||
LOCAL(div_by_1_neg):
|
||||
neg r4,r0
|
||||
LOCAL(div_by_1):
|
||||
mov.l @r15+,r5
|
||||
rts
|
||||
mov.l @r15+,r4
|
||||
|
||||
LOCAL(div_ge64k):
|
||||
bt/s LOCAL(div_r8)
|
||||
div0u
|
||||
shll8 r5
|
||||
bra LOCAL(div_ge64k_2)
|
||||
div1 r5,r0
|
||||
LOCAL(udiv_ge64k):
|
||||
cmp/hi r0,r5
|
||||
mov r5,r1
|
||||
bt LOCAL(udiv_r8)
|
||||
shll8 r5
|
||||
mov.l r4,@-r15
|
||||
div1 r5,r0
|
||||
mov.l r1,@-r15
|
||||
LOCAL(div_ge64k_2):
|
||||
div1 r5,r0
|
||||
mov.l LOCAL(zero_l),r1
|
||||
.rept 4
|
||||
div1 r5,r0
|
||||
.endr
|
||||
mov.l r1,@-r15
|
||||
div1 r5,r0
|
||||
mov.w LOCAL(m256_w),r1
|
||||
div1 r5,r0
|
||||
mov.b r0,@(L_LSWMSB,r15)
|
||||
xor r4,r0
|
||||
and r1,r0
|
||||
bra LOCAL(div_ge64k_end)
|
||||
xor r4,r0
|
||||
|
||||
LOCAL(div_r8):
|
||||
shll16 r4
|
||||
bra LOCAL(div_r8_2)
|
||||
shll8 r4
|
||||
LOCAL(udiv_r8):
|
||||
mov.l r4,@-r15
|
||||
shll16 r4
|
||||
clrt
|
||||
shll8 r4
|
||||
mov.l r5,@-r15
|
||||
LOCAL(div_r8_2):
|
||||
rotcl r4
|
||||
mov r0,r1
|
||||
div1 r5,r1
|
||||
mov r4,r0
|
||||
rotcl r0
|
||||
mov r5,r4
|
||||
div1 r5,r1
|
||||
.rept 5
|
||||
rotcl r0; div1 r5,r1
|
||||
.endr
|
||||
rotcl r0
|
||||
mov.l @r15+,r5
|
||||
div1 r4,r1
|
||||
mov.l @r15+,r4
|
||||
rts
|
||||
rotcl r0
|
||||
|
||||
ENDFUNC(GLOBAL(udivsi3_i4i))
|
||||
|
||||
.global GLOBAL(sdivsi3_i4i)
|
||||
FUNC(GLOBAL(sdivsi3_i4i))
|
||||
/* This is link-compatible with a GLOBAL(sdivsi3) call,
|
||||
but we effectively clobber only r1. */
|
||||
GLOBAL(sdivsi3_i4i):
|
||||
mov.l r4,@-r15
|
||||
cmp/pz r5
|
||||
mov.w LOCAL(c128_w), r1
|
||||
bt/s LOCAL(pos_divisor)
|
||||
cmp/pz r4
|
||||
mov.l r5,@-r15
|
||||
neg r5,r5
|
||||
bt/s LOCAL(neg_result)
|
||||
cmp/hi r1,r5
|
||||
neg r4,r4
|
||||
LOCAL(pos_result):
|
||||
extu.w r5,r0
|
||||
bf LOCAL(div_le128)
|
||||
cmp/eq r5,r0
|
||||
mov r4,r0
|
||||
shlr8 r0
|
||||
bf/s LOCAL(div_ge64k)
|
||||
cmp/hi r0,r5
|
||||
div0u
|
||||
shll16 r5
|
||||
div1 r5,r0
|
||||
div1 r5,r0
|
||||
div1 r5,r0
|
||||
LOCAL(udiv_25):
|
||||
mov.l LOCAL(zero_l),r1
|
||||
div1 r5,r0
|
||||
div1 r5,r0
|
||||
mov.l r1,@-r15
|
||||
.rept 3
|
||||
div1 r5,r0
|
||||
.endr
|
||||
mov.b r0,@(L_MSWLSB,r15)
|
||||
xtrct r4,r0
|
||||
swap.w r0,r0
|
||||
.rept 8
|
||||
div1 r5,r0
|
||||
.endr
|
||||
mov.b r0,@(L_LSWMSB,r15)
|
||||
LOCAL(div_ge64k_end):
|
||||
.rept 8
|
||||
div1 r5,r0
|
||||
.endr
|
||||
mov.l @r15+,r4 ! zero-extension and swap using LS unit.
|
||||
extu.b r0,r0
|
||||
mov.l @r15+,r5
|
||||
or r4,r0
|
||||
mov.l @r15+,r4
|
||||
rts
|
||||
rotcl r0
|
||||
|
||||
LOCAL(div_le128_neg):
|
||||
tst #0xfe,r0
|
||||
mova LOCAL(div_table_ix),r0
|
||||
mov.b @(r0,r5),r1
|
||||
mova LOCAL(div_table_inv),r0
|
||||
bt/s LOCAL(div_by_1_neg)
|
||||
mov.l @(r0,r1),r1
|
||||
mova LOCAL(div_table_clz),r0
|
||||
dmulu.l r1,r4
|
||||
mov.b @(r0,r5),r1
|
||||
mov.l @r15+,r5
|
||||
sts mach,r0
|
||||
/* clrt */
|
||||
addc r4,r0
|
||||
mov.l @r15+,r4
|
||||
rotcr r0
|
||||
shld r1,r0
|
||||
rts
|
||||
neg r0,r0
|
||||
|
||||
LOCAL(pos_divisor):
|
||||
mov.l r5,@-r15
|
||||
bt/s LOCAL(pos_result)
|
||||
cmp/hi r1,r5
|
||||
neg r4,r4
|
||||
LOCAL(neg_result):
|
||||
extu.w r5,r0
|
||||
bf LOCAL(div_le128_neg)
|
||||
cmp/eq r5,r0
|
||||
mov r4,r0
|
||||
shlr8 r0
|
||||
bf/s LOCAL(div_ge64k_neg)
|
||||
cmp/hi r0,r5
|
||||
div0u
|
||||
mov.l LOCAL(zero_l),r1
|
||||
shll16 r5
|
||||
div1 r5,r0
|
||||
mov.l r1,@-r15
|
||||
.rept 7
|
||||
div1 r5,r0
|
||||
.endr
|
||||
mov.b r0,@(L_MSWLSB,r15)
|
||||
xtrct r4,r0
|
||||
swap.w r0,r0
|
||||
.rept 8
|
||||
div1 r5,r0
|
||||
.endr
|
||||
mov.b r0,@(L_LSWMSB,r15)
|
||||
LOCAL(div_ge64k_neg_end):
|
||||
.rept 8
|
||||
div1 r5,r0
|
||||
.endr
|
||||
mov.l @r15+,r4 ! zero-extension and swap using LS unit.
|
||||
extu.b r0,r1
|
||||
mov.l @r15+,r5
|
||||
or r4,r1
|
||||
LOCAL(div_r8_neg_end):
|
||||
mov.l @r15+,r4
|
||||
rotcl r1
|
||||
rts
|
||||
neg r1,r0
|
||||
|
||||
LOCAL(div_ge64k_neg):
|
||||
bt/s LOCAL(div_r8_neg)
|
||||
div0u
|
||||
shll8 r5
|
||||
mov.l LOCAL(zero_l),r1
|
||||
.rept 6
|
||||
div1 r5,r0
|
||||
.endr
|
||||
mov.l r1,@-r15
|
||||
div1 r5,r0
|
||||
mov.w LOCAL(m256_w),r1
|
||||
div1 r5,r0
|
||||
mov.b r0,@(L_LSWMSB,r15)
|
||||
xor r4,r0
|
||||
and r1,r0
|
||||
bra LOCAL(div_ge64k_neg_end)
|
||||
xor r4,r0
|
||||
|
||||
LOCAL(c128_w):
|
||||
.word 128
|
||||
|
||||
LOCAL(div_r8_neg):
|
||||
clrt
|
||||
shll16 r4
|
||||
mov r4,r1
|
||||
shll8 r1
|
||||
mov r5,r4
|
||||
.rept 7
|
||||
rotcl r1; div1 r5,r0
|
||||
.endr
|
||||
mov.l @r15+,r5
|
||||
rotcl r1
|
||||
bra LOCAL(div_r8_neg_end)
|
||||
div1 r4,r0
|
||||
|
||||
LOCAL(m256_w):
|
||||
.word 0xff00
|
||||
/* This table has been generated by divtab-sh4.c. */
|
||||
.balign 4
|
||||
LOCAL(div_table_clz):
|
||||
.byte 0
|
||||
.byte 1
|
||||
.byte 0
|
||||
.byte -1
|
||||
.byte -1
|
||||
.byte -2
|
||||
.byte -2
|
||||
.byte -2
|
||||
.byte -2
|
||||
.byte -3
|
||||
.byte -3
|
||||
.byte -3
|
||||
.byte -3
|
||||
.byte -3
|
||||
.byte -3
|
||||
.byte -3
|
||||
.byte -3
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -4
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -5
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
.byte -6
|
||||
/* Lookup table translating positive divisor to index into table of
|
||||
normalized inverse. N.B. the '0' entry is also the last entry of the
|
||||
previous table, and causes an unaligned access for division by zero. */
|
||||
LOCAL(div_table_ix):
|
||||
.byte -6
|
||||
.byte -128
|
||||
.byte -128
|
||||
.byte 0
|
||||
.byte -128
|
||||
.byte -64
|
||||
.byte 0
|
||||
.byte 64
|
||||
.byte -128
|
||||
.byte -96
|
||||
.byte -64
|
||||
.byte -32
|
||||
.byte 0
|
||||
.byte 32
|
||||
.byte 64
|
||||
.byte 96
|
||||
.byte -128
|
||||
.byte -112
|
||||
.byte -96
|
||||
.byte -80
|
||||
.byte -64
|
||||
.byte -48
|
||||
.byte -32
|
||||
.byte -16
|
||||
.byte 0
|
||||
.byte 16
|
||||
.byte 32
|
||||
.byte 48
|
||||
.byte 64
|
||||
.byte 80
|
||||
.byte 96
|
||||
.byte 112
|
||||
.byte -128
|
||||
.byte -120
|
||||
.byte -112
|
||||
.byte -104
|
||||
.byte -96
|
||||
.byte -88
|
||||
.byte -80
|
||||
.byte -72
|
||||
.byte -64
|
||||
.byte -56
|
||||
.byte -48
|
||||
.byte -40
|
||||
.byte -32
|
||||
.byte -24
|
||||
.byte -16
|
||||
.byte -8
|
||||
.byte 0
|
||||
.byte 8
|
||||
.byte 16
|
||||
.byte 24
|
||||
.byte 32
|
||||
.byte 40
|
||||
.byte 48
|
||||
.byte 56
|
||||
.byte 64
|
||||
.byte 72
|
||||
.byte 80
|
||||
.byte 88
|
||||
.byte 96
|
||||
.byte 104
|
||||
.byte 112
|
||||
.byte 120
|
||||
.byte -128
|
||||
.byte -124
|
||||
.byte -120
|
||||
.byte -116
|
||||
.byte -112
|
||||
.byte -108
|
||||
.byte -104
|
||||
.byte -100
|
||||
.byte -96
|
||||
.byte -92
|
||||
.byte -88
|
||||
.byte -84
|
||||
.byte -80
|
||||
.byte -76
|
||||
.byte -72
|
||||
.byte -68
|
||||
.byte -64
|
||||
.byte -60
|
||||
.byte -56
|
||||
.byte -52
|
||||
.byte -48
|
||||
.byte -44
|
||||
.byte -40
|
||||
.byte -36
|
||||
.byte -32
|
||||
.byte -28
|
||||
.byte -24
|
||||
.byte -20
|
||||
.byte -16
|
||||
.byte -12
|
||||
.byte -8
|
||||
.byte -4
|
||||
.byte 0
|
||||
.byte 4
|
||||
.byte 8
|
||||
.byte 12
|
||||
.byte 16
|
||||
.byte 20
|
||||
.byte 24
|
||||
.byte 28
|
||||
.byte 32
|
||||
.byte 36
|
||||
.byte 40
|
||||
.byte 44
|
||||
.byte 48
|
||||
.byte 52
|
||||
.byte 56
|
||||
.byte 60
|
||||
.byte 64
|
||||
.byte 68
|
||||
.byte 72
|
||||
.byte 76
|
||||
.byte 80
|
||||
.byte 84
|
||||
.byte 88
|
||||
.byte 92
|
||||
.byte 96
|
||||
.byte 100
|
||||
.byte 104
|
||||
.byte 108
|
||||
.byte 112
|
||||
.byte 116
|
||||
.byte 120
|
||||
.byte 124
|
||||
.byte -128
|
||||
/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
|
||||
.balign 4
|
||||
LOCAL(zero_l):
|
||||
.long 0x0
|
||||
.long 0xF81F81F9
|
||||
.long 0xF07C1F08
|
||||
.long 0xE9131AC0
|
||||
.long 0xE1E1E1E2
|
||||
.long 0xDAE6076C
|
||||
.long 0xD41D41D5
|
||||
.long 0xCD856891
|
||||
.long 0xC71C71C8
|
||||
.long 0xC0E07039
|
||||
.long 0xBACF914D
|
||||
.long 0xB4E81B4F
|
||||
.long 0xAF286BCB
|
||||
.long 0xA98EF607
|
||||
.long 0xA41A41A5
|
||||
.long 0x9EC8E952
|
||||
.long 0x9999999A
|
||||
.long 0x948B0FCE
|
||||
.long 0x8F9C18FA
|
||||
.long 0x8ACB90F7
|
||||
.long 0x86186187
|
||||
.long 0x81818182
|
||||
.long 0x7D05F418
|
||||
.long 0x78A4C818
|
||||
.long 0x745D1746
|
||||
.long 0x702E05C1
|
||||
.long 0x6C16C16D
|
||||
.long 0x68168169
|
||||
.long 0x642C8591
|
||||
.long 0x60581606
|
||||
.long 0x5C9882BA
|
||||
.long 0x58ED2309
|
||||
LOCAL(div_table_inv):
|
||||
.long 0x55555556
|
||||
.long 0x51D07EAF
|
||||
.long 0x4E5E0A73
|
||||
.long 0x4AFD6A06
|
||||
.long 0x47AE147B
|
||||
.long 0x446F8657
|
||||
.long 0x41414142
|
||||
.long 0x3E22CBCF
|
||||
.long 0x3B13B13C
|
||||
.long 0x38138139
|
||||
.long 0x3521CFB3
|
||||
.long 0x323E34A3
|
||||
.long 0x2F684BDB
|
||||
.long 0x2C9FB4D9
|
||||
.long 0x29E4129F
|
||||
.long 0x27350B89
|
||||
.long 0x24924925
|
||||
.long 0x21FB7813
|
||||
.long 0x1F7047DD
|
||||
.long 0x1CF06ADB
|
||||
.long 0x1A7B9612
|
||||
.long 0x18118119
|
||||
.long 0x15B1E5F8
|
||||
.long 0x135C8114
|
||||
.long 0x11111112
|
||||
.long 0xECF56BF
|
||||
.long 0xC9714FC
|
||||
.long 0xA6810A7
|
||||
.long 0x8421085
|
||||
.long 0x624DD30
|
||||
.long 0x4104105
|
||||
.long 0x2040811
|
||||
/* maximum error: 0.987342 scaled: 0.921875*/
|
||||
|
||||
ENDFUNC(GLOBAL(sdivsi3_i4i))
|
||||
#endif /* SH3 / SH4 */
|
||||
|
||||
#endif /* L_div_table */
|
||||
#endif /* __SH5__ */
|
||||
|
@ -234,6 +234,9 @@ do { \
|
||||
#define TARGET_DIVIDE_INV20L (sh_div_strategy == SH_DIV_INV20L)
|
||||
#define TARGET_DIVIDE_INV_CALL (sh_div_strategy == SH_DIV_INV_CALL)
|
||||
#define TARGET_DIVIDE_INV_CALL2 (sh_div_strategy == SH_DIV_INV_CALL2)
|
||||
#define TARGET_DIVIDE_CALL_DIV1 (sh_div_strategy == SH_DIV_CALL_DIV1)
|
||||
#define TARGET_DIVIDE_CALL_FP (sh_div_strategy == SH_DIV_CALL_FP)
|
||||
#define TARGET_DIVIDE_CALL_TABLE (sh_div_strategy == SH_DIV_CALL_TABLE)
|
||||
|
||||
#define SELECT_SH1 (MASK_SH1)
|
||||
#define SELECT_SH2 (MASK_SH2 | SELECT_SH1)
|
||||
@ -467,7 +470,7 @@ do { \
|
||||
sh_div_str = SH_DIV_STR_FOR_SIZE ; \
|
||||
} \
|
||||
/* We can't meaningfully test TARGET_SHMEDIA here, because -m options \
|
||||
haven't been parsed yet, hence we';d read only the default. \
|
||||
haven't been parsed yet, hence we'd read only the default. \
|
||||
sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so \
|
||||
it's OK to always set flag_branch_target_load_optimize. */ \
|
||||
if (LEVEL > 1) \
|
||||
@ -492,16 +495,24 @@ do { \
|
||||
extern int assembler_dialect;
|
||||
|
||||
enum sh_divide_strategy_e {
|
||||
/* SH5 strategies. */
|
||||
SH_DIV_CALL,
|
||||
SH_DIV_CALL2,
|
||||
SH_DIV_FP,
|
||||
SH_DIV_FP, /* We could do this also for SH4. */
|
||||
SH_DIV_INV,
|
||||
SH_DIV_INV_MINLAT,
|
||||
SH_DIV_INV20U,
|
||||
SH_DIV_INV20L,
|
||||
SH_DIV_INV_CALL,
|
||||
SH_DIV_INV_CALL2,
|
||||
SH_DIV_INV_FP
|
||||
SH_DIV_INV_FP,
|
||||
/* SH1 .. SH4 strategies. Because of the small number of registers
|
||||
available, the compiler uses knowledge of the actual et of registers
|
||||
being clobbed by the different functions called. */
|
||||
SH_DIV_CALL_DIV1, /* No FPU, medium size, highest latency. */
|
||||
SH_DIV_CALL_FP, /* FPU needed, small size, high latency. */
|
||||
SH_DIV_CALL_TABLE, /* No FPU, large size, medium latency. */
|
||||
SH_DIV_INTRINSIC
|
||||
};
|
||||
|
||||
extern enum sh_divide_strategy_e sh_div_strategy;
|
||||
@ -611,17 +622,46 @@ do { \
|
||||
targetm.asm_out.aligned_op.di = NULL; \
|
||||
targetm.asm_out.unaligned_op.di = NULL; \
|
||||
} \
|
||||
if (TARGET_SH1) \
|
||||
{ \
|
||||
if (! strcmp (sh_div_str, "call-div1")) \
|
||||
sh_div_strategy = SH_DIV_CALL_DIV1; \
|
||||
else if (! strcmp (sh_div_str, "call-fp") \
|
||||
&& (TARGET_FPU_DOUBLE \
|
||||
|| (TARGET_HARD_SH4 && TARGET_SH2E) \
|
||||
|| (TARGET_SHCOMPACT && TARGET_FPU_ANY))) \
|
||||
sh_div_strategy = SH_DIV_CALL_FP; \
|
||||
else if (! strcmp (sh_div_str, "call-table") && TARGET_SH3) \
|
||||
sh_div_strategy = SH_DIV_CALL_TABLE; \
|
||||
else \
|
||||
/* Pick one that makes most sense for the target in general. \
|
||||
It is not much good to use different functions depending \
|
||||
on -Os, since then we'll end up with two different functions \
|
||||
when some of the code is compiled for size, and some for \
|
||||
speed. */ \
|
||||
\
|
||||
/* SH4 tends to emphasize speed. */ \
|
||||
if (TARGET_HARD_SH4) \
|
||||
sh_div_strategy = SH_DIV_CALL_TABLE; \
|
||||
/* These have their own way of doing things. */ \
|
||||
else if (TARGET_SH2A) \
|
||||
sh_div_strategy = SH_DIV_INTRINSIC; \
|
||||
/* ??? Should we use the integer SHmedia function instead? */ \
|
||||
else if (TARGET_SHCOMPACT && TARGET_FPU_ANY) \
|
||||
sh_div_strategy = SH_DIV_CALL_FP; \
|
||||
/* SH1 .. SH3 cores often go into small-footprint systems, so \
|
||||
default to the smallest implementation available. */ \
|
||||
else \
|
||||
sh_div_strategy = SH_DIV_CALL_DIV1; \
|
||||
} \
|
||||
if (sh_divsi3_libfunc[0]) \
|
||||
; /* User supplied - leave it alone. */ \
|
||||
else if (TARGET_HARD_SH4 && TARGET_SH2E) \
|
||||
else if (TARGET_DIVIDE_CALL_FP) \
|
||||
sh_divsi3_libfunc = "__sdivsi3_i4"; \
|
||||
else if (TARGET_DIVIDE_CALL_TABLE) \
|
||||
sh_divsi3_libfunc = "__sdivsi3_i4i"; \
|
||||
else if (TARGET_SH5) \
|
||||
{ \
|
||||
if (TARGET_FPU_ANY && TARGET_SH1) \
|
||||
sh_divsi3_libfunc = "__sdivsi3_i4"; \
|
||||
else \
|
||||
sh_divsi3_libfunc = "__sdivsi3_1"; \
|
||||
} \
|
||||
sh_divsi3_libfunc = "__sdivsi3_1"; \
|
||||
else \
|
||||
sh_divsi3_libfunc = "__sdivsi3"; \
|
||||
if (TARGET_FMOVD) \
|
||||
|
@ -1739,6 +1739,19 @@
|
||||
[(set_attr "type" "sfunc")
|
||||
(set_attr "needs_delay_slot" "yes")])
|
||||
|
||||
(define_insn "udivsi3_i4_int"
|
||||
[(set (match_operand:SI 0 "register_operand" "=z")
|
||||
(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
|
||||
(clobber (reg:SI T_REG))
|
||||
(clobber (reg:SI R1_REG))
|
||||
(clobber (reg:SI PR_REG))
|
||||
(use (match_operand:SI 1 "arith_reg_operand" "r"))]
|
||||
"TARGET_SH1"
|
||||
"jsr @%1%#"
|
||||
[(set_attr "type" "sfunc")
|
||||
(set_attr "needs_delay_slot" "yes")])
|
||||
|
||||
|
||||
(define_expand "udivsi3"
|
||||
[(set (match_dup 3) (symbol_ref:SI "__udivsi3"))
|
||||
(set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
|
||||
@ -1757,7 +1770,12 @@
|
||||
|
||||
operands[3] = gen_reg_rtx (Pmode);
|
||||
/* Emit the move of the address to a pseudo outside of the libcall. */
|
||||
if (TARGET_HARD_SH4 && TARGET_SH2E)
|
||||
if (TARGET_DIVIDE_CALL_TABLE)
|
||||
{
|
||||
function_symbol (operands[3], \"__udivsi3_i4i\", SFUNC_GOT);
|
||||
last = gen_udivsi3_i4_int (operands[0], operands[3]);
|
||||
}
|
||||
else if (TARGET_DIVIDE_CALL_FP)
|
||||
{
|
||||
function_symbol (operands[3], \"__udivsi3_i4\", SFUNC_STATIC);
|
||||
if (TARGET_FPU_SINGLE)
|
||||
@ -1975,6 +1993,18 @@
|
||||
[(set_attr "type" "sfunc")
|
||||
(set_attr "needs_delay_slot" "yes")])
|
||||
|
||||
(define_insn "divsi3_i4_int"
|
||||
[(set (match_operand:SI 0 "register_operand" "=z")
|
||||
(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
|
||||
(clobber (reg:SI T_REG))
|
||||
(clobber (reg:SI PR_REG))
|
||||
(clobber (reg:SI R1_REG))
|
||||
(use (match_operand:SI 1 "arith_reg_operand" "r"))]
|
||||
"TARGET_SH1"
|
||||
"jsr @%1%#"
|
||||
[(set_attr "type" "sfunc")
|
||||
(set_attr "needs_delay_slot" "yes")])
|
||||
|
||||
(define_expand "divsi3"
|
||||
[(set (match_dup 3) (symbol_ref:SI "__sdivsi3"))
|
||||
(set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
|
||||
@ -1995,7 +2025,12 @@
|
||||
|
||||
operands[3] = gen_reg_rtx (Pmode);
|
||||
/* Emit the move of the address to a pseudo outside of the libcall. */
|
||||
if (TARGET_HARD_SH4 && TARGET_SH2E)
|
||||
if (TARGET_DIVIDE_CALL_TABLE)
|
||||
{
|
||||
function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
|
||||
last = gen_divsi3_i4_int (operands[0], operands[3]);
|
||||
}
|
||||
else if (TARGET_DIVIDE_CALL_FP)
|
||||
{
|
||||
function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
|
||||
if (TARGET_FPU_SINGLE)
|
||||
|
@ -1,6 +1,6 @@
|
||||
; Options for the SH port of the compiler.
|
||||
|
||||
; Copyright (C) 2005 Free Software Foundation, Inc.
|
||||
; Copyright (C) 2005, 2006 Free Software Foundation, Inc.
|
||||
;
|
||||
; This file is part of GCC.
|
||||
;
|
||||
@ -158,7 +158,7 @@ Align doubles at 64-bit boundaries
|
||||
|
||||
mdiv=
|
||||
Target RejectNegative Joined Var(sh_div_str) Init("")
|
||||
Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp
|
||||
Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp call-div1 call-fp call-table
|
||||
|
||||
mdivsi3_libfunc=
|
||||
Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
|
||||
|
@ -5,6 +5,7 @@ sh-c.o: $(srcdir)/config/sh/sh-c.c \
|
||||
LIB1ASMSRC = sh/lib1funcs.asm
|
||||
LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \
|
||||
_movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
|
||||
_div_table \
|
||||
$(LIB1ASMFUNCS_CACHE)
|
||||
|
||||
# We want fine grained libraries, so use the new code to build the
|
||||
|
Loading…
Reference in New Issue
Block a user