gcc/libgcc/config/sh/lib1funcs-Os-4-200.S

/* Copyright (C) 2006-2017 Free Software Foundation, Inc.

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.

You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */

/* Moderately Space-optimized libgcc routines for the Renesas SH /
   STMicroelectronics ST40 CPUs.
   Contributed by J"orn Rennecke joern.rennecke@st.com.  */

#include "lib1funcs.h"

#ifdef L_udivsi3_i4i

/* 88 bytes; sh4-200 cycle counts:
   divisor  >= 2G: 11 cycles
   dividend <  2G: 48 cycles
   dividend >= 2G: divisor != 1: 54 cycles
   dividend >= 2G, divisor == 1: 22 cycles */
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
!! args in r4 and r5, result in r0, clobber r1

	.global GLOBAL(udivsi3_i4i)
	FUNC(GLOBAL(udivsi3_i4i))
GLOBAL(udivsi3_i4i):
	mova L1,r0
	cmp/pz r5
	sts fpscr,r1
	lds.l @r0+,fpscr
	sts.l fpul,@-r15
	bf LOCAL(huge_divisor)
	mov.l r1,@-r15
	lds r4,fpul
	cmp/pz r4
#ifdef FMOVD_WORKS
	fmov.d dr0,@-r15
	float fpul,dr0
	fmov.d dr2,@-r15
	bt LOCAL(dividend_adjusted)
	mov #1,r1
	fmov.d @r0,dr2
	cmp/eq r1,r5
	bt LOCAL(div_by_1)
	fadd dr2,dr0
LOCAL(dividend_adjusted):
	lds r5,fpul
	float fpul,dr2
	fdiv dr2,dr0
LOCAL(div_by_1):
	fmov.d @r15+,dr2
	ftrc dr0,fpul
	fmov.d @r15+,dr0
#else /* !FMOVD_WORKS */
	fmov.s DR01,@-r15
	mov #1,r1
	fmov.s DR00,@-r15
	float fpul,dr0
	fmov.s DR21,@-r15
	bt/s LOCAL(dividend_adjusted)
	fmov.s DR20,@-r15
	cmp/eq r1,r5
	bt LOCAL(div_by_1)
	fmov.s @r0+,DR20
	fmov.s @r0,DR21
	fadd dr2,dr0
LOCAL(dividend_adjusted):
	lds r5,fpul
	float fpul,dr2
	fdiv dr2,dr0
LOCAL(div_by_1):
	fmov.s @r15+,DR20
	fmov.s @r15+,DR21
	ftrc dr0,fpul
	fmov.s @r15+,DR00
	fmov.s @r15+,DR01
#endif /* !FMOVD_WORKS */
	lds.l @r15+,fpscr
	sts fpul,r0
	rts
	lds.l @r15+,fpul

#ifdef FMOVD_WORKS
	.p2align 3        ! make double below 8 byte aligned.
#endif
LOCAL(huge_divisor):
	lds r1,fpscr
	add #4,r15
	cmp/hs r5,r4
	rts
	movt r0

	.p2align 2
L1:
#ifndef FMOVD_WORKS
	.long 0x80000
#else
	.long 0x180000
#endif
	.double 4294967296

	ENDFUNC(GLOBAL(udivsi3_i4i))
#elif !defined (__sh1__)  /* !__SH_FPU_DOUBLE__ */

#if 0
/* With 36 bytes, the following would probably be the most compact
   implementation, but with 139 cycles on an sh4-200, it is extremely slow.  */
GLOBAL(udivsi3_i4i):
	mov.l r2,@-r15
	mov #0,r1
	div0u
	mov r1,r2
	mov.l r3,@-r15
	mov r1,r3
	sett
	mov r4,r0
LOCAL(loop):
	rotcr r2
	;
	bt/s LOCAL(end)
	cmp/gt r2,r3
	rotcl r0
	bra LOCAL(loop)
	div1 r5,r1
LOCAL(end):
	rotcl r0
	mov.l @r15+,r3
	rts
	mov.l @r15+,r2
#endif /* 0 */

/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
   sh4-200 run times:
   udiv small divisor: 55 cycles
   udiv large divisor: 52 cycles
   sdiv small divisor, positive result: 59 cycles
   sdiv large divisor, positive result: 56 cycles
   sdiv small divisor, negative result: 65 cycles (*)
   sdiv large divisor, negative result: 62 cycles (*)
   (*): r2 is restored in the rts delay slot and has a lingering latency
        of two more cycles.  */
	.balign 4
	.global	GLOBAL(udivsi3_i4i)
	FUNC(GLOBAL(udivsi3_i4i))
	FUNC(GLOBAL(sdivsi3_i4i))
GLOBAL(udivsi3_i4i):
	sts pr,r1
	mov.l r4,@-r15
	extu.w r5,r0
	cmp/eq r5,r0
	swap.w r4,r0
	shlr16 r4
	bf/s LOCAL(large_divisor)
	div0u
	mov.l r5,@-r15
	shll16 r5
LOCAL(sdiv_small_divisor):
	div1 r5,r4
	bsr LOCAL(div6)
	div1 r5,r4
	div1 r5,r4
	bsr LOCAL(div6)
	div1 r5,r4
	xtrct r4,r0
	xtrct r0,r4
	bsr LOCAL(div7)
	swap.w r4,r4
	div1 r5,r4
	bsr LOCAL(div7)
	div1 r5,r4
	xtrct r4,r0
	mov.l @r15+,r5
	swap.w r0,r0
	mov.l @r15+,r4
	jmp @r1
	rotcl r0
LOCAL(div7):
	div1 r5,r4
LOCAL(div6):
	            div1 r5,r4; div1 r5,r4; div1 r5,r4
	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4

LOCAL(divx3):
	rotcl r0
	div1 r5,r4
	rotcl r0
	div1 r5,r4
	rotcl r0
	rts
	div1 r5,r4

LOCAL(large_divisor):
	mov.l r5,@-r15
LOCAL(sdiv_large_divisor):
	xor r4,r0
	.rept 4
	rotcl r0
	bsr LOCAL(divx3)
	div1 r5,r4
	.endr
	mov.l @r15+,r5
	mov.l @r15+,r4
	jmp @r1
	rotcl r0
	ENDFUNC(GLOBAL(udivsi3_i4i))

	.global	GLOBAL(sdivsi3_i4i)
GLOBAL(sdivsi3_i4i):
	mov.l r4,@-r15
	cmp/pz r5
	mov.l r5,@-r15
	bt/s LOCAL(pos_divisor)
	cmp/pz r4
	neg r5,r5
	extu.w r5,r0
	bt/s LOCAL(neg_result)
	cmp/eq r5,r0
	neg r4,r4
LOCAL(pos_result):
	swap.w r4,r0
	bra LOCAL(sdiv_check_divisor)
	sts pr,r1
LOCAL(pos_divisor):
	extu.w r5,r0
	bt/s LOCAL(pos_result)
	cmp/eq r5,r0
	neg r4,r4
LOCAL(neg_result):
	mova LOCAL(negate_result),r0
	;
	mov r0,r1
	swap.w r4,r0
	lds r2,macl
	sts pr,r2
LOCAL(sdiv_check_divisor):
	shlr16 r4
	bf/s LOCAL(sdiv_large_divisor)
	div0u
	bra LOCAL(sdiv_small_divisor)
	shll16 r5
	.balign 4
LOCAL(negate_result):
	neg r0,r0
	jmp @r2
	sts macl,r2
	ENDFUNC(GLOBAL(sdivsi3_i4i))
#endif /* !__SH_FPU_DOUBLE__ */
#endif /* L_udivsi3_i4i */

#ifdef L_sdivsi3_i4i
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
/* 48 bytes, 45 cycles on sh4-200  */
!! args in r4 and r5, result in r0, clobber r1

	.global GLOBAL(sdivsi3_i4i)
	FUNC(GLOBAL(sdivsi3_i4i))
GLOBAL(sdivsi3_i4i):
	sts.l fpscr,@-r15
	sts fpul,r1
	mova L1,r0
	lds.l @r0+,fpscr
	lds r4,fpul
#ifdef FMOVD_WORKS
	fmov.d dr0,@-r15
	float fpul,dr0
	lds r5,fpul
	fmov.d dr2,@-r15
#else
	fmov.s DR01,@-r15
	fmov.s DR00,@-r15
	float fpul,dr0
	lds r5,fpul
	fmov.s DR21,@-r15
	fmov.s DR20,@-r15
#endif
	float fpul,dr2
	fdiv dr2,dr0
#ifdef FMOVD_WORKS
	fmov.d @r15+,dr2
#else
	fmov.s @r15+,DR20
	fmov.s @r15+,DR21
#endif
	ftrc dr0,fpul
#ifdef FMOVD_WORKS
	fmov.d @r15+,dr0
#else
	fmov.s @r15+,DR00
	fmov.s @r15+,DR01
#endif
	lds.l @r15+,fpscr
	sts fpul,r0
	rts
	lds r1,fpul

	.p2align 2
L1:
#ifndef FMOVD_WORKS
	.long 0x80000
#else
	.long 0x180000
#endif

	ENDFUNC(GLOBAL(sdivsi3_i4i))
#endif /* __SH_FPU_DOUBLE__ */
#endif /* L_sdivsi3_i4i */
Update copyright years. From-SVN: r243994 2017-01-01 13:07:43 +01:00			`/* Copyright (C) 2006-2017 Free Software Foundation, Inc.`
re PR target/27060 (divide libcall size has increased) 2006-04-12 J"orn Rennecke <joern.rennecke@st.com> PR target/27060 * config/sh/lib1funcs.h: New file, broken out of: * config/sh/lib1funcs.asm. * config/sh/lib1funcs.h (DR00, DR01, DR20, DR21, DR40, DR41): New macros. * config/sh/lib1funcs.asm (udivsi3_i4): Use them. * config/sh/lib1funcs-Os-4-200.asm: New file. * config/sh/embed-elf.h (LIBGCC_SPEC): Use -lgcc-Os-4-200. * config/sh/t-sh (OPT_EXTRA_PARTS): New variable. (EXTRA_MULTILIB_PARTS): Include it. ($(T)sdivsi3_i4i-Os-4-200.o): New rule. ($(T)udivsi3_i4i-Os-4-200.o, $(T)libgcc-Os-4-200.a): Likewise. * config/sh/t-superh (EXTRA_MULTILIB_PARTS): Include OPT_EXTRA_PARTS. * config/sh/sh.h (OVERRIDE_OPTIONS): Recognize -mdiv=call-table for TARGET_SH2. From-SVN: r112898 2006-04-12 22:09:41 +02:00
			`This file is free software; you can redistribute it and/or modify it`
			`under the terms of the GNU General Public License as published by the`
Licensing changes to GPLv3 resp. GPLv3 with GCC Runtime Exception. From-SVN: r145841 2009-04-09 17:00:19 +02:00			`Free Software Foundation; either version 3, or (at your option) any`
re PR target/27060 (divide libcall size has increased) 2006-04-12 J"orn Rennecke <joern.rennecke@st.com> PR target/27060 * config/sh/lib1funcs.h: New file, broken out of: * config/sh/lib1funcs.asm. * config/sh/lib1funcs.h (DR00, DR01, DR20, DR21, DR40, DR41): New macros. * config/sh/lib1funcs.asm (udivsi3_i4): Use them. * config/sh/lib1funcs-Os-4-200.asm: New file. * config/sh/embed-elf.h (LIBGCC_SPEC): Use -lgcc-Os-4-200. * config/sh/t-sh (OPT_EXTRA_PARTS): New variable. (EXTRA_MULTILIB_PARTS): Include it. ($(T)sdivsi3_i4i-Os-4-200.o): New rule. ($(T)udivsi3_i4i-Os-4-200.o, $(T)libgcc-Os-4-200.a): Likewise. * config/sh/t-superh (EXTRA_MULTILIB_PARTS): Include OPT_EXTRA_PARTS. * config/sh/sh.h (OVERRIDE_OPTIONS): Recognize -mdiv=call-table for TARGET_SH2. From-SVN: r112898 2006-04-12 22:09:41 +02:00			`later version.`

			`This file is distributed in the hope that it will be useful, but`
			`WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`General Public License for more details.`

Licensing changes to GPLv3 resp. GPLv3 with GCC Runtime Exception. From-SVN: r145841 2009-04-09 17:00:19 +02:00			`Under Section 7 of GPL version 3, you are granted additional`
			`permissions described in the GCC Runtime Library Exception, version`
			`3.1, as published by the Free Software Foundation.`

			`You should have received a copy of the GNU General Public License and`
			`a copy of the GCC Runtime Library Exception along with this program;`
			`see the files COPYING3 and COPYING.RUNTIME respectively. If not, see`
			`<http://www.gnu.org/licenses/>. */`
re PR target/27060 (divide libcall size has increased) 2006-04-12 J"orn Rennecke <joern.rennecke@st.com> PR target/27060 * config/sh/lib1funcs.h: New file, broken out of: * config/sh/lib1funcs.asm. * config/sh/lib1funcs.h (DR00, DR01, DR20, DR21, DR40, DR41): New macros. * config/sh/lib1funcs.asm (udivsi3_i4): Use them. * config/sh/lib1funcs-Os-4-200.asm: New file. * config/sh/embed-elf.h (LIBGCC_SPEC): Use -lgcc-Os-4-200. * config/sh/t-sh (OPT_EXTRA_PARTS): New variable. (EXTRA_MULTILIB_PARTS): Include it. ($(T)sdivsi3_i4i-Os-4-200.o): New rule. ($(T)udivsi3_i4i-Os-4-200.o, $(T)libgcc-Os-4-200.a): Likewise. * config/sh/t-superh (EXTRA_MULTILIB_PARTS): Include OPT_EXTRA_PARTS. * config/sh/sh.h (OVERRIDE_OPTIONS): Recognize -mdiv=call-table for TARGET_SH2. From-SVN: r112898 2006-04-12 22:09:41 +02:00
			`/* Moderately Space-optimized libgcc routines for the Renesas SH /`
			`STMicroelectronics ST40 CPUs.`
			`Contributed by J"orn Rennecke joern.rennecke@st.com. */`

			`#include "lib1funcs.h"`

			`#ifdef L_udivsi3_i4i`

			`/* 88 bytes; sh4-200 cycle counts:`
			`divisor >= 2G: 11 cycles`
			`dividend < 2G: 48 cycles`
			`dividend >= 2G: divisor != 1: 54 cycles`
			`dividend >= 2G, divisor == 1: 22 cycles */`
			`#if defined (__SH_FPU_DOUBLE__) \|\| defined (__SH4_SINGLE_ONLY__)`
			`!! args in r4 and r5, result in r0, clobber r1`

			`.global GLOBAL(udivsi3_i4i)`
			`FUNC(GLOBAL(udivsi3_i4i))`
			`GLOBAL(udivsi3_i4i):`
			`mova L1,r0`
			`cmp/pz r5`
			`sts fpscr,r1`
			`lds.l @r0+,fpscr`
			`sts.l fpul,@-r15`
			`bf LOCAL(huge_divisor)`
			`mov.l r1,@-r15`
			`lds r4,fpul`
			`cmp/pz r4`
			`#ifdef FMOVD_WORKS`
			`fmov.d dr0,@-r15`
			`float fpul,dr0`
			`fmov.d dr2,@-r15`
			`bt LOCAL(dividend_adjusted)`
			`mov #1,r1`
			`fmov.d @r0,dr2`
			`cmp/eq r1,r5`
			`bt LOCAL(div_by_1)`
			`fadd dr2,dr0`
			`LOCAL(dividend_adjusted):`
			`lds r5,fpul`
			`float fpul,dr2`
			`fdiv dr2,dr0`
			`LOCAL(div_by_1):`
			`fmov.d @r15+,dr2`
			`ftrc dr0,fpul`
			`fmov.d @r15+,dr0`
			`#else /* !FMOVD_WORKS */`
			`fmov.s DR01,@-r15`
			`mov #1,r1`
			`fmov.s DR00,@-r15`
			`float fpul,dr0`
			`fmov.s DR21,@-r15`
			`bt/s LOCAL(dividend_adjusted)`
			`fmov.s DR20,@-r15`
			`cmp/eq r1,r5`
			`bt LOCAL(div_by_1)`
			`fmov.s @r0+,DR20`
			`fmov.s @r0,DR21`
			`fadd dr2,dr0`
			`LOCAL(dividend_adjusted):`
			`lds r5,fpul`
			`float fpul,dr2`
			`fdiv dr2,dr0`
			`LOCAL(div_by_1):`
			`fmov.s @r15+,DR20`
			`fmov.s @r15+,DR21`
			`ftrc dr0,fpul`
			`fmov.s @r15+,DR00`
			`fmov.s @r15+,DR01`
			`#endif /* !FMOVD_WORKS */`
			`lds.l @r15+,fpscr`
			`sts fpul,r0`
			`rts`
			`lds.l @r15+,fpul`

			`#ifdef FMOVD_WORKS`
			`.p2align 3 ! make double below 8 byte aligned.`
			`#endif`
			`LOCAL(huge_divisor):`
			`lds r1,fpscr`
			`add #4,r15`
			`cmp/hs r5,r4`
			`rts`
			`movt r0`

			`.p2align 2`
			`L1:`
			`#ifndef FMOVD_WORKS`
			`.long 0x80000`
			`#else`
			`.long 0x180000`
			`#endif`
			`.double 4294967296`

			`ENDFUNC(GLOBAL(udivsi3_i4i))`
			`#elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */`

			`#if 0`
			`/* With 36 bytes, the following would probably be the most compact`
			`implementation, but with 139 cycles on an sh4-200, it is extremely slow. */`
			`GLOBAL(udivsi3_i4i):`
			`mov.l r2,@-r15`
			`mov #0,r1`
			`div0u`
			`mov r1,r2`
			`mov.l r3,@-r15`
			`mov r1,r3`
			`sett`
			`mov r4,r0`
			`LOCAL(loop):`
			`rotcr r2`
			`;`
			`bt/s LOCAL(end)`
			`cmp/gt r2,r3`
			`rotcl r0`
			`bra LOCAL(loop)`
			`div1 r5,r1`
			`LOCAL(end):`
			`rotcl r0`
			`mov.l @r15+,r3`
			`rts`
			`mov.l @r15+,r2`
			`#endif /* 0 */`

			`/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i`
			`sh4-200 run times:`
			`udiv small divisor: 55 cycles`
			`udiv large divisor: 52 cycles`
			`sdiv small divisor, positive result: 59 cycles`
			`sdiv large divisor, positive result: 56 cycles`
			`sdiv small divisor, negative result: 65 cycles (*)`
			`sdiv large divisor, negative result: 62 cycles (*)`
			`(*): r2 is restored in the rts delay slot and has a lingering latency`
			`of two more cycles. */`
			`.balign 4`
			`.global GLOBAL(udivsi3_i4i)`
			`FUNC(GLOBAL(udivsi3_i4i))`
			`FUNC(GLOBAL(sdivsi3_i4i))`
			`GLOBAL(udivsi3_i4i):`
			`sts pr,r1`
			`mov.l r4,@-r15`
			`extu.w r5,r0`
			`cmp/eq r5,r0`
			`swap.w r4,r0`
			`shlr16 r4`
			`bf/s LOCAL(large_divisor)`
			`div0u`
			`mov.l r5,@-r15`
			`shll16 r5`
			`LOCAL(sdiv_small_divisor):`
			`div1 r5,r4`
			`bsr LOCAL(div6)`
			`div1 r5,r4`
			`div1 r5,r4`
			`bsr LOCAL(div6)`
			`div1 r5,r4`
			`xtrct r4,r0`
			`xtrct r0,r4`
			`bsr LOCAL(div7)`
			`swap.w r4,r4`
			`div1 r5,r4`
			`bsr LOCAL(div7)`
			`div1 r5,r4`
			`xtrct r4,r0`
			`mov.l @r15+,r5`
			`swap.w r0,r0`
			`mov.l @r15+,r4`
			`jmp @r1`
			`rotcl r0`
			`LOCAL(div7):`
			`div1 r5,r4`
			`LOCAL(div6):`
			`div1 r5,r4; div1 r5,r4; div1 r5,r4`
			`div1 r5,r4; div1 r5,r4; rts; div1 r5,r4`

			`LOCAL(divx3):`
			`rotcl r0`
			`div1 r5,r4`
			`rotcl r0`
			`div1 r5,r4`
			`rotcl r0`
			`rts`
			`div1 r5,r4`

			`LOCAL(large_divisor):`
			`mov.l r5,@-r15`
			`LOCAL(sdiv_large_divisor):`
			`xor r4,r0`
			`.rept 4`
			`rotcl r0`
			`bsr LOCAL(divx3)`
			`div1 r5,r4`
			`.endr`
			`mov.l @r15+,r5`
			`mov.l @r15+,r4`
			`jmp @r1`
			`rotcl r0`
			`ENDFUNC(GLOBAL(udivsi3_i4i))`

			`.global GLOBAL(sdivsi3_i4i)`
			`GLOBAL(sdivsi3_i4i):`
			`mov.l r4,@-r15`
			`cmp/pz r5`
			`mov.l r5,@-r15`
			`bt/s LOCAL(pos_divisor)`
			`cmp/pz r4`
			`neg r5,r5`
			`extu.w r5,r0`
			`bt/s LOCAL(neg_result)`
			`cmp/eq r5,r0`
			`neg r4,r4`
			`LOCAL(pos_result):`
			`swap.w r4,r0`
			`bra LOCAL(sdiv_check_divisor)`
			`sts pr,r1`
			`LOCAL(pos_divisor):`
			`extu.w r5,r0`
			`bt/s LOCAL(pos_result)`
			`cmp/eq r5,r0`
			`neg r4,r4`
			`LOCAL(neg_result):`
			`mova LOCAL(negate_result),r0`
			`;`
			`mov r0,r1`
			`swap.w r4,r0`
			`lds r2,macl`
			`sts pr,r2`
			`LOCAL(sdiv_check_divisor):`
			`shlr16 r4`
			`bf/s LOCAL(sdiv_large_divisor)`
			`div0u`
			`bra LOCAL(sdiv_small_divisor)`
			`shll16 r5`
			`.balign 4`
			`LOCAL(negate_result):`
			`neg r0,r0`
			`jmp @r2`
			`sts macl,r2`
			`ENDFUNC(GLOBAL(sdivsi3_i4i))`
			`#endif /* !__SH_FPU_DOUBLE__ */`
			`#endif /* L_udivsi3_i4i */`

			`#ifdef L_sdivsi3_i4i`
			`#if defined (__SH_FPU_DOUBLE__) \|\| defined (__SH4_SINGLE_ONLY__)`
			`/* 48 bytes, 45 cycles on sh4-200 */`
			`!! args in r4 and r5, result in r0, clobber r1`

			`.global GLOBAL(sdivsi3_i4i)`
			`FUNC(GLOBAL(sdivsi3_i4i))`
			`GLOBAL(sdivsi3_i4i):`
			`sts.l fpscr,@-r15`
			`sts fpul,r1`
			`mova L1,r0`
			`lds.l @r0+,fpscr`
			`lds r4,fpul`
			`#ifdef FMOVD_WORKS`
			`fmov.d dr0,@-r15`
			`float fpul,dr0`
			`lds r5,fpul`
			`fmov.d dr2,@-r15`
			`#else`
			`fmov.s DR01,@-r15`
			`fmov.s DR00,@-r15`
			`float fpul,dr0`
			`lds r5,fpul`
			`fmov.s DR21,@-r15`
			`fmov.s DR20,@-r15`
			`#endif`
			`float fpul,dr2`
			`fdiv dr2,dr0`
			`#ifdef FMOVD_WORKS`
			`fmov.d @r15+,dr2`
			`#else`
			`fmov.s @r15+,DR20`
			`fmov.s @r15+,DR21`
			`#endif`
			`ftrc dr0,fpul`
			`#ifdef FMOVD_WORKS`
			`fmov.d @r15+,dr0`
			`#else`
			`fmov.s @r15+,DR00`
			`fmov.s @r15+,DR01`
			`#endif`
			`lds.l @r15+,fpscr`
			`sts fpul,r0`
			`rts`
			`lds r1,fpul`

			`.p2align 2`
			`L1:`
			`#ifndef FMOVD_WORKS`
			`.long 0x80000`
			`#else`
			`.long 0x180000`
			`#endif`

			`ENDFUNC(GLOBAL(sdivsi3_i4i))`
			`#endif /* __SH_FPU_DOUBLE__ */`
			`#endif /* L_sdivsi3_i4i */`