Adjust decimal point of signed accum mode to GCC default.

libgcc/
	Adjust decimal point of signed accum mode to GCC default.

	PR target/54222
	* config/avr/t-avr (LIB1ASMFUNCS): Add _fractsfsq _fractsfusq,
	_divqq_helper.
	* config/avr/lib1funcs-fixed.S (__fractqqsf, __fracthqsf)
	(__fractsasf, __fractsfha, __fractusqsf, __fractsfsa)
	(__mulha3, __mulsa3)
	(__divqq3, __divha3, __divsa3): Adjust to new position of
	decimal point of signed accum types. 
	
	(__mulusa3_round): New function.
	(__mulusa3): Use it.
	(__divqq_helper): New function.
	(__udivuqq3): Use it.

gcc/
	Adjust decimal point of signed accum mode to GCC default.

	PR target/54222
	* config/avr/avr-modes.def (HA, SA, DA): Remove mode adjustments.
	(TA): Move decimal point one bit to the right.
	* config/avr/avr.c (avr_out_fract): Rewrite.

From-SVN: r193721
This commit is contained in:
Georg-Johann Lay 2012-11-22 10:00:13 +00:00 committed by Georg-Johann Lay
parent 5eb4cb4768
commit e13d9d5aeb
6 changed files with 500 additions and 417 deletions

View File

@ -1,3 +1,12 @@
2012-11-22 Georg-Johann Lay <avr@gjlay.de>
Adjust decimal point of signed accum mode to GCC default.
PR target/54222
* config/avr/avr-modes.def (HA, SA, DA): Remove mode adjustments.
(TA): Move decimal point one bit to the right.
* config/avr/avr.c (avr_out_fract): Rewrite.
2012-11-21 Matthias Klose <doko@ubuntu.com>
* config/alpha/t-linux: New file; define MULTIARCH_DIRNAME.

View File

@ -1,26 +1,13 @@
FRACTIONAL_INT_MODE (PSI, 24, 3);
/* On 8 bit machines it requires fewer instructions for fixed point
routines if the decimal place is on a byte boundary which is not
the default for signed accum types. */
ADJUST_IBIT (HA, 7);
ADJUST_FBIT (HA, 8);
ADJUST_IBIT (SA, 15);
ADJUST_FBIT (SA, 16);
ADJUST_IBIT (DA, 31);
ADJUST_FBIT (DA, 32);
/* Make TA and UTA 64 bits wide.
128 bit wide modes would be insane on a 8-bit machine.
This needs special treatment in avr.c and avr-lib.h. */
ADJUST_BYTESIZE (TA, 8);
ADJUST_ALIGNMENT (TA, 1);
ADJUST_IBIT (TA, 15);
ADJUST_FBIT (TA, 48);
ADJUST_IBIT (TA, 16);
ADJUST_FBIT (TA, 47);
ADJUST_BYTESIZE (UTA, 8);
ADJUST_ALIGNMENT (UTA, 1);

View File

@ -6974,6 +6974,332 @@ avr_out_addto_sp (rtx *op, int *plen)
}
/* Outputs instructions needed for fixed point type conversion.
This includes converting between any fixed point type, as well
as converting to any integer type. Conversion between integer
types is not supported.
Converting signed fractional types requires a bit shift if converting
to or from any unsigned fractional type because the decimal place is
shifted by 1 bit. When the destination is a signed fractional, the sign
is stored in either the carry or T bit. */
const char*
avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
{
size_t i;
rtx xop[6];
RTX_CODE shift = UNKNOWN;
bool sign_in_carry = false;
bool msb_in_carry = false;
bool lsb_in_carry = false;
const char *code_ashift = "lsl %0";
#define MAY_CLOBBER(RR) \
/* Shorthand used below. */ \
((sign_bytes \
&& IN_RANGE (RR, dest.regno_msb - sign_bytes + 1, dest.regno_msb)) \
|| (reg_unused_after (insn, all_regs_rtx[RR]) \
&& !IN_RANGE (RR, dest.regno, dest.regno_msb)))
struct
{
/* bytes : Length of operand in bytes.
ibyte : Length of integral part in bytes.
fbyte, fbit : Length of fractional part in bytes, bits. */
bool sbit;
unsigned fbit, bytes, ibyte, fbyte;
unsigned regno, regno_msb;
} dest, src, *val[2] = { &dest, &src };
if (plen)
*plen = 0;
/* Step 0: Determine information on source and destination operand we
====== will need in the remainder. */
for (i = 0; i < sizeof (val) / sizeof (*val); i++)
{
enum machine_mode mode;
xop[i] = operands[i];
mode = GET_MODE (xop[i]);
val[i]->bytes = GET_MODE_SIZE (mode);
val[i]->regno = REGNO (xop[i]);
val[i]->regno_msb = REGNO (xop[i]) + val[i]->bytes - 1;
if (SCALAR_INT_MODE_P (mode))
{
val[i]->sbit = intsigned;
val[i]->fbit = 0;
}
else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
{
val[i]->sbit = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode);
val[i]->fbit = GET_MODE_FBIT (mode);
}
else
fatal_insn ("unsupported fixed-point conversion", insn);
val[i]->fbyte = (1 + val[i]->fbit) / BITS_PER_UNIT;
val[i]->ibyte = val[i]->bytes - val[i]->fbyte;
}
// Byte offset of the decimal point taking into account different place
// of the decimal point in input and output and different register numbers
// of input and output.
int offset = dest.regno - src.regno + dest.fbyte - src.fbyte;
// Number of destination bytes that will come from sign / zero extension.
int sign_bytes = (dest.ibyte - src.ibyte) * (dest.ibyte > src.ibyte);
// Number of bytes at the low end to be filled with zeros.
int zero_bytes = (dest.fbyte - src.fbyte) * (dest.fbyte > src.fbyte);
// Do we have a 16-Bit register that is cleared?
rtx clrw = NULL_RTX;
bool sign_extend = src.sbit && sign_bytes;
if (0 == dest.fbit % 8 && 7 == src.fbit % 8)
shift = ASHIFT;
else if (7 == dest.fbit % 8 && 0 == src.fbit % 8)
shift = ASHIFTRT;
else if (dest.fbit % 8 == src.fbit % 8)
shift = UNKNOWN;
else
gcc_unreachable();
/* Step 1: Clear bytes at the low end and copy payload bits from source
====== to destination. */
int step = offset < 0 ? 1 : -1;
unsigned d0 = offset < 0 ? dest.regno : dest.regno_msb;
// We leared at least that number of registers.
int clr_n = 0;
for (; d0 >= dest.regno && d0 <= dest.regno_msb; d0 += step)
{
// Next regno of destination is needed for MOVW
unsigned d1 = d0 + step;
// Current and next regno of source
unsigned s0 = d0 - offset;
unsigned s1 = s0 + step;
// Must current resp. next regno be CLRed? This applies to the low
// bytes of the destination that have no associated source bytes.
bool clr0 = s0 < src.regno;
bool clr1 = s1 < src.regno && d1 >= dest.regno;
// First gather what code to emit (if any) and additional step to
// apply if a MOVW is in use. xop[2] is destination rtx and xop[3]
// is the source rtx for the current loop iteration.
const char *code = NULL;
int stepw = 0;
if (clr0)
{
if (AVR_HAVE_MOVW && clr1 && clrw)
{
xop[2] = all_regs_rtx[d0 & ~1];
xop[3] = clrw;
code = "movw %2,%3";
stepw = step;
}
else
{
xop[2] = all_regs_rtx[d0];
code = "clr %2";
if (++clr_n >= 2
&& !clrw
&& d0 % 2 == (step > 0))
{
clrw = all_regs_rtx[d0 & ~1];
}
}
}
else if (offset && s0 <= src.regno_msb)
{
int movw = AVR_HAVE_MOVW && offset % 2 == 0
&& d0 % 2 == (offset > 0)
&& d1 <= dest.regno_msb && d1 >= dest.regno
&& s1 <= src.regno_msb && s1 >= src.regno;
xop[2] = all_regs_rtx[d0 & ~movw];
xop[3] = all_regs_rtx[s0 & ~movw];
code = movw ? "movw %2,%3" : "mov %2,%3";
stepw = step * movw;
}
if (code)
{
if (sign_extend && shift != ASHIFT && !sign_in_carry
&& (d0 == src.regno_msb || d0 + stepw == src.regno_msb))
{
/* We are going to override the sign bit. If we sign-extend,
store the sign in the Carry flag. This is not needed if
the destination will be ASHIFT is the remainder because
the ASHIFT will set Carry without extra instruction. */
avr_asm_len ("lsl %0", &all_regs_rtx[src.regno_msb], plen, 1);
sign_in_carry = true;
}
unsigned src_msb = dest.regno_msb - sign_bytes - offset + 1;
if (!sign_extend && shift == ASHIFTRT && !msb_in_carry
&& src.ibyte > dest.ibyte
&& (d0 == src_msb || d0 + stepw == src_msb))
{
/* We are going to override the MSB. If we shift right,
store the MSB in the Carry flag. This is only needed if
we don't sign-extend becaue with sign-extension the MSB
(the sign) will be produced by the sign extension. */
avr_asm_len ("lsr %0", &all_regs_rtx[src_msb], plen, 1);
msb_in_carry = true;
}
unsigned src_lsb = dest.regno - offset -1;
if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry
&& (d0 == src_lsb || d0 + stepw == src_lsb))
{
/* We are going to override the new LSB; store it into carry. */
avr_asm_len ("lsl %0", &all_regs_rtx[src_lsb], plen, 1);
code_ashift = "rol %0";
lsb_in_carry = true;
}
avr_asm_len (code, xop, plen, 1);
d0 += stepw;
}
}
/* Step 2: Shift destination left by 1 bit position. This might be needed
====== for signed input and unsigned output. */
if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry)
{
unsigned s0 = dest.regno - offset -1;
if (MAY_CLOBBER (s0))
avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1);
else
avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
"lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2);
code_ashift = "rol %0";
lsb_in_carry = true;
}
if (shift == ASHIFT)
{
for (d0 = dest.regno + zero_bytes;
d0 <= dest.regno_msb - sign_bytes; d0++)
{
avr_asm_len (code_ashift, &all_regs_rtx[d0], plen, 1);
code_ashift = "rol %0";
}
lsb_in_carry = false;
sign_in_carry = true;
}
/* Step 4a: Store MSB in carry if we don't already have it or will produce
======= it in sign-extension below. */
if (!sign_extend && shift == ASHIFTRT && !msb_in_carry
&& src.ibyte > dest.ibyte)
{
unsigned s0 = dest.regno_msb - sign_bytes - offset + 1;
if (MAY_CLOBBER (s0))
avr_asm_len ("lsr %0", &all_regs_rtx[s0], plen, 1);
else
avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
"lsr __tmp_reg__", &all_regs_rtx[s0], plen, 2);
msb_in_carry = true;
}
/* Step 3: Sign-extend or zero-extend the destination as needed.
====== */
if (sign_extend && !sign_in_carry)
{
unsigned s0 = src.regno_msb;
if (MAY_CLOBBER (s0))
avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1);
else
avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
"lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2);
sign_in_carry = true;
}
gcc_assert (sign_in_carry + msb_in_carry + lsb_in_carry <= 1);
unsigned copies = 0;
rtx movw = sign_extend ? NULL_RTX : clrw;
for (d0 = dest.regno_msb - sign_bytes + 1; d0 <= dest.regno_msb; d0++)
{
if (AVR_HAVE_MOVW && movw
&& d0 % 2 == 0 && d0 + 1 <= dest.regno_msb)
{
xop[2] = all_regs_rtx[d0];
xop[3] = movw;
avr_asm_len ("movw %2,%3", xop, plen, 1);
d0++;
}
else
{
avr_asm_len (sign_extend ? "sbc %0,%0" : "clr %0",
&all_regs_rtx[d0], plen, 1);
if (++copies >= 2 && !movw && d0 % 2 == 1)
movw = all_regs_rtx[d0-1];
}
} /* for */
/* Step 4: Right shift the destination. This might be needed for
====== conversions from unsigned to signed. */
if (shift == ASHIFTRT)
{
const char *code_ashiftrt = "lsr %0";
if (sign_extend || msb_in_carry)
code_ashiftrt = "ror %0";
if (src.sbit && src.ibyte == dest.ibyte)
code_ashiftrt = "asr %0";
for (d0 = dest.regno_msb - sign_bytes;
d0 >= dest.regno + zero_bytes - 1 && d0 >= dest.regno; d0--)
{
avr_asm_len (code_ashiftrt, &all_regs_rtx[d0], plen, 1);
code_ashiftrt = "ror %0";
}
}
#undef MAY_CLOBBER
return "";
}
/* Create RTL split patterns for byte sized rotate expressions. This
produces a series of move instructions and considers overlap situations.
Overlapping non-HImode operands need a scratch register. */
@ -7123,348 +7449,6 @@ avr_rotate_bytes (rtx operands[])
}
/* Outputs instructions needed for fixed point type conversion.
This includes converting between any fixed point type, as well
as converting to any integer type. Conversion between integer
types is not supported.
The number of instructions generated depends on the types
being converted and the registers assigned to them.
The number of instructions required to complete the conversion
is least if the registers for source and destination are overlapping
and are aligned at the decimal place as actual movement of data is
completely avoided. In some cases, the conversion may already be
complete without any instructions needed.
When converting to signed types from signed types, sign extension
is implemented.
Converting signed fractional types requires a bit shift if converting
to or from any unsigned fractional type because the decimal place is
shifted by 1 bit. When the destination is a signed fractional, the sign
is stored in either the carry or T bit. */
const char*
avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
{
int i;
bool sbit[2];
/* ilen: Length of integral part (in bytes)
flen: Length of fractional part (in bytes)
tlen: Length of operand (in bytes)
blen: Length of operand (in bits) */
int ilen[2], flen[2], tlen[2], blen[2];
int rdest, rsource, offset;
int start, end, dir;
bool sign_in_T = false, sign_in_Carry = false, sign_done = false;
bool widening_sign_extend = false;
int clrword = -1, lastclr = 0, clr = 0;
rtx xop[6];
const int dest = 0;
const int src = 1;
xop[dest] = operands[dest];
xop[src] = operands[src];
if (plen)
*plen = 0;
/* Determine format (integer and fractional parts)
of types needing conversion. */
for (i = 0; i < 2; i++)
{
enum machine_mode mode = GET_MODE (xop[i]);
tlen[i] = GET_MODE_SIZE (mode);
blen[i] = GET_MODE_BITSIZE (mode);
if (SCALAR_INT_MODE_P (mode))
{
sbit[i] = intsigned;
ilen[i] = GET_MODE_SIZE (mode);
flen[i] = 0;
}
else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
{
sbit[i] = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode);
ilen[i] = (GET_MODE_IBIT (mode) + 1) / 8;
flen[i] = (GET_MODE_FBIT (mode) + 1) / 8;
}
else
fatal_insn ("unsupported fixed-point conversion", insn);
}
/* Perform sign extension if source and dest are both signed,
and there are more integer parts in dest than in source. */
widening_sign_extend = sbit[dest] && sbit[src] && ilen[dest] > ilen[src];
rdest = REGNO (xop[dest]);
rsource = REGNO (xop[src]);
offset = flen[src] - flen[dest];
/* Position of MSB resp. sign bit. */
xop[2] = GEN_INT (blen[dest] - 1);
xop[3] = GEN_INT (blen[src] - 1);
/* Store the sign bit if the destination is a signed fract and the source
has a sign in the integer part. */
if (sbit[dest] && ilen[dest] == 0 && sbit[src] && ilen[src] > 0)
{
/* To avoid using BST and BLD if the source and destination registers
overlap or the source is unused after, we can use LSL to store the
sign bit in carry since we don't need the integral part of the source.
Restoring the sign from carry saves one BLD instruction below. */
if (reg_unused_after (insn, xop[src])
|| (rdest < rsource + tlen[src]
&& rdest + tlen[dest] > rsource))
{
avr_asm_len ("lsl %T1%t3", xop, plen, 1);
sign_in_Carry = true;
}
else
{
avr_asm_len ("bst %T1%T3", xop, plen, 1);
sign_in_T = true;
}
}
/* Pick the correct direction to shift bytes. */
if (rdest < rsource + offset)
{
dir = 1;
start = 0;
end = tlen[dest];
}
else
{
dir = -1;
start = tlen[dest] - 1;
end = -1;
}
/* Perform conversion by moving registers into place, clearing
destination registers that do not overlap with any source. */
for (i = start; i != end; i += dir)
{
int destloc = rdest + i;
int sourceloc = rsource + i + offset;
/* Source register location is outside range of source register,
so clear this byte in the dest. */
if (sourceloc < rsource
|| sourceloc >= rsource + tlen[src])
{
if (AVR_HAVE_MOVW
&& i + dir != end
&& (sourceloc + dir < rsource
|| sourceloc + dir >= rsource + tlen[src])
&& ((dir == 1 && !(destloc % 2) && !(sourceloc % 2))
|| (dir == -1 && (destloc % 2) && (sourceloc % 2)))
&& clrword != -1)
{
/* Use already cleared word to clear two bytes at a time. */
int even_i = i & ~1;
int even_clrword = clrword & ~1;
xop[4] = GEN_INT (8 * even_i);
xop[5] = GEN_INT (8 * even_clrword);
avr_asm_len ("movw %T0%t4,%T0%t5", xop, plen, 1);
i += dir;
}
else
{
if (i == tlen[dest] - 1
&& widening_sign_extend
&& blen[src] - 1 - 8 * offset < 0)
{
/* The SBRC below that sign-extends would come
up with a negative bit number because the sign
bit is out of reach. ALso avoid some early-clobber
situations because of premature CLR. */
if (reg_unused_after (insn, xop[src]))
avr_asm_len ("lsl %T1%t3" CR_TAB
"sbc %T0%t2,%T0%t2", xop, plen, 2);
else
avr_asm_len ("mov __tmp_reg__,%T1%t3" CR_TAB
"lsl __tmp_reg__" CR_TAB
"sbc %T0%t2,%T0%t2", xop, plen, 3);
sign_done = true;
continue;
}
/* Do not clear the register if it is going to get
sign extended with a MOV later. */
if (sbit[dest] && sbit[src]
&& i != tlen[dest] - 1
&& i >= flen[dest])
{
continue;
}
xop[4] = GEN_INT (8 * i);
avr_asm_len ("clr %T0%t4", xop, plen, 1);
/* If the last byte was cleared too, we have a cleared
word we can MOVW to clear two bytes at a time. */
if (lastclr)
clrword = i;
clr = 1;
}
}
else if (destloc == sourceloc)
{
/* Source byte is already in destination: Nothing needed. */
continue;
}
else
{
/* Registers do not line up and source register location
is within range: Perform move, shifting with MOV or MOVW. */
if (AVR_HAVE_MOVW
&& i + dir != end
&& sourceloc + dir >= rsource
&& sourceloc + dir < rsource + tlen[src]
&& ((dir == 1 && !(destloc % 2) && !(sourceloc % 2))
|| (dir == -1 && (destloc % 2) && (sourceloc % 2))))
{
int even_i = i & ~1;
int even_i_plus_offset = (i + offset) & ~1;
xop[4] = GEN_INT (8 * even_i);
xop[5] = GEN_INT (8 * even_i_plus_offset);
avr_asm_len ("movw %T0%t4,%T1%t5", xop, plen, 1);
i += dir;
}
else
{
xop[4] = GEN_INT (8 * i);
xop[5] = GEN_INT (8 * (i + offset));
avr_asm_len ("mov %T0%t4,%T1%t5", xop, plen, 1);
}
}
lastclr = clr;
clr = 0;
}
/* Perform sign extension if source and dest are both signed,
and there are more integer parts in dest than in source. */
if (widening_sign_extend)
{
if (!sign_done)
{
xop[4] = GEN_INT (blen[src] - 1 - 8 * offset);
/* Register was cleared above, so can become 0xff and extended.
Note: Instead of the CLR/SBRC/COM the sign extension could
be performed after the LSL below by means of a SBC if only
one byte has to be shifted left. */
avr_asm_len ("sbrc %T0%T4" CR_TAB
"com %T0%t2", xop, plen, 2);
}
/* Sign extend additional bytes by MOV and MOVW. */
start = tlen[dest] - 2;
end = flen[dest] + ilen[src] - 1;
for (i = start; i != end; i--)
{
if (AVR_HAVE_MOVW && i != start && i-1 != end)
{
i--;
xop[4] = GEN_INT (8 * i);
xop[5] = GEN_INT (8 * (tlen[dest] - 2));
avr_asm_len ("movw %T0%t4,%T0%t5", xop, plen, 1);
}
else
{
xop[4] = GEN_INT (8 * i);
xop[5] = GEN_INT (8 * (tlen[dest] - 1));
avr_asm_len ("mov %T0%t4,%T0%t5", xop, plen, 1);
}
}
}
/* If destination is a signed fract, and the source was not, a shift
by 1 bit is needed. Also restore sign from carry or T. */
if (sbit[dest] && !ilen[dest] && (!sbit[src] || ilen[src]))
{
/* We have flen[src] non-zero fractional bytes to shift.
Because of the right shift, handle one byte more so that the
LSB won't be lost. */
int nonzero = flen[src] + 1;
/* If the LSB is in the T flag and there are no fractional
bits, the high byte is zero and no shift needed. */
if (flen[src] == 0 && sign_in_T)
nonzero = 0;
start = flen[dest] - 1;
end = start - nonzero;
for (i = start; i > end && i >= 0; i--)
{
xop[4] = GEN_INT (8 * i);
if (i == start && !sign_in_Carry)
avr_asm_len ("lsr %T0%t4", xop, plen, 1);
else
avr_asm_len ("ror %T0%t4", xop, plen, 1);
}
if (sign_in_T)
{
avr_asm_len ("bld %T0%T2", xop, plen, 1);
}
}
else if (sbit[src] && !ilen[src] && (!sbit[dest] || ilen[dest]))
{
/* If source was a signed fract and dest was not, shift 1 bit
other way. */
start = flen[dest] - flen[src];
if (start < 0)
start = 0;
for (i = start; i < flen[dest]; i++)
{
xop[4] = GEN_INT (8 * i);
if (i == start)
avr_asm_len ("lsl %T0%t4", xop, plen, 1);
else
avr_asm_len ("rol %T0%t4", xop, plen, 1);
}
}
return "";
}
/* Modifies the length assigned to instruction INSN
LEN is the initially computed length of the insn. */

View File

@ -1,3 +1,21 @@
2012-11-22 Georg-Johann Lay <avr@gjlay.de>
Adjust decimal point of signed accum mode to GCC default.
PR target/54222
* config/avr/t-avr (LIB1ASMFUNCS): Add _fractsfsq _fractsfusq,
_divqq_helper.
* config/avr/lib1funcs-fixed.S (__fractqqsf, __fracthqsf)
(__fractsasf, __fractsfha, __fractusqsf, __fractsfsa)
(__mulha3, __mulsa3)
(__divqq3, __divha3, __divsa3): Adjust to new position of
decimal point of signed accum types.
(__mulusa3_round): New function.
(__mulusa3): Use it.
(__divqq_helper): New function.
(__udivuqq3): Use it.
2012-11-20 Jakub Jelinek <jakub@redhat.com>
PR bootstrap/55370

View File

@ -43,8 +43,8 @@ DEFUN __fractqqsf
;; Move in place for SA -> SF conversion
clr r22
mov r23, r24
lsl r23
;; Sign-extend
lsl r24
sbc r24, r24
mov r25, r24
XJMP __fractsasf
@ -67,9 +67,8 @@ ENDF __fractuqqsf
DEFUN __fracthqsf
;; Move in place for SA -> SF conversion
wmov 22, 24
lsl r22
rol r23
;; Sign-extend
lsl r25
sbc r24, r24
mov r25, r24
XJMP __fractsasf
@ -140,11 +139,13 @@ ENDF __fractusqsf
#if defined (L_fractsasf)
DEFUN __fractsasf
XCALL __floatsisf
;; Divide non-zero results by 2^16 to move the
;; Divide non-zero results by 2^15 to move the
;; decimal point into place
cpse r25, __zero_reg__
subi r25, exp_hi (16)
ret
tst r25
breq 0f
subi r24, exp_lo (15)
sbci r25, exp_hi (15)
0: ret
ENDF __fractsasf
#endif /* L_fractsasf */
@ -186,8 +187,9 @@ ENDF __fractsfuqq
#if defined (L_fractsfha)
DEFUN __fractsfha
;; Multiply with 2^24 to get a HA result in r25:r24
subi r25, exp_hi (-24)
;; Multiply with 2^{16+7} to get a HA result in r25:r24
subi r24, exp_lo (-23)
sbci r25, exp_hi (-23)
XJMP __fixsfsi
ENDF __fractsfha
#endif /* L_fractsfha */
@ -201,8 +203,7 @@ ENDF __fractsfuha
#endif /* L_fractsfuha */
#if defined (L_fractsfhq)
DEFUN __fractsfsq
ENDF __fractsfsq
FALIAS __fractsfsq
DEFUN __fractsfhq
;; Multiply with 2^{16+15} to get a HQ result in r25:r24
@ -214,8 +215,7 @@ ENDF __fractsfhq
#endif /* L_fractsfhq */
#if defined (L_fractsfuhq)
DEFUN __fractsfusq
ENDF __fractsfusq
FALIAS __fractsfusq
DEFUN __fractsfuhq
;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
@ -227,8 +227,9 @@ ENDF __fractsfuhq
#if defined (L_fractsfsa)
DEFUN __fractsfsa
;; Multiply with 2^16 to get a SA result in r25:r22
subi r25, exp_hi (-16)
;; Multiply with 2^15 to get a SA result in r25:r22
subi r24, exp_lo (-15)
sbci r25, exp_hi (-15)
XJMP __fixsfsi
ENDF __fractsfsa
#endif /* L_fractsfsa */
@ -325,6 +326,9 @@ ENDF __muluhq3
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
DEFUN __mulha3
XCALL __mulhisi3
lsl r22
rol r23
rol r24
XJMP __muluha3_round
ENDF __mulha3
#endif /* L_mulha3 */
@ -359,6 +363,9 @@ ENDF __muluha3_round
Fixed Multiplication 16.16 x 16.16
*******************************************************/
;; Bits outside the result (below LSB), used in the signed version
#define GUARD __tmp_reg__
#if defined (__AVR_HAVE_MUL__)
;; Multiplier
@ -381,9 +388,16 @@ ENDF __muluha3_round
#if defined (L_mulusa3)
;;; (C3:C0) = (A3:A0) * (B3:B0)
;;; Clobbers: __tmp_reg__
;;; Rounding: -0.5 LSB < error <= 0.5 LSB
DEFUN __mulusa3
DEFUN __mulusa3
set
;; Fallthru
ENDF __mulusa3
;;; Round for last digit iff T = 1
;;; Return guard bits in GUARD (__tmp_reg__).
;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
DEFUN __mulusa3_round
;; Some of the MUL instructions have LSBs outside the result.
;; Don't ignore these LSBs in order to tame rounding error.
;; Use C2/C3 for these LSBs.
@ -395,9 +409,12 @@ DEFUN __mulusa3
mul A1, B0 $ add C3, r0 $ adc C0, r1
mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
;; Round
;; Round if T = 1. Store guarding bits outside the result for rounding
;; and left-shift by the signed version (function below).
brtc 0f
sbrc C3, 7
adiw C0, 1
0: push C3
;; The following MULs don't have LSBs outside the result.
;; C2/C3 is the high part.
@ -420,25 +437,42 @@ DEFUN __mulusa3
mul A2, B3 $ add C3, r0
mul A3, B2 $ add C3, r0
;; Guard bits used in the signed version below.
pop GUARD
clr __zero_reg__
ret
ENDF __mulusa3
ENDF __mulusa3_round
#endif /* L_mulusa3 */
#if defined (L_mulsa3)
;;; (C3:C0) = (A3:A0) * (B3:B0)
;;; Clobbers: __tmp_reg__
;;; Clobbers: __tmp_reg__, T
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
DEFUN __mulsa3
XCALL __mulusa3
clt
XCALL __mulusa3_round
;; A posteriori sign extension of the operands
tst B3
brpl 1f
brpl 1f
sub C2, A0
sbc C3, A1
1: sbrs A3, 7
ret
rjmp 2f
sub C2, B0
sbc C3, B1
2:
;; Shift 1 bit left to adjust for 15 fractional bits
lsl GUARD
rol C0
rol C1
rol C2
rol C3
;; Round last digit
lsl GUARD
adc C0, __zero_reg__
adc C1, __zero_reg__
adc C2, __zero_reg__
adc C3, __zero_reg__
ret
ENDF __mulsa3
#endif /* L_mulsa3 */
@ -492,27 +526,56 @@ ENDF __mulsa3
DEFUN __mulsa3
push B0
push B1
bst B3, 7
XCALL __mulusa3
;; A survived in 31:30:27:26
rcall 1f
pop AA1
pop AA0
bst AA3, 7
1: brtc 9f
;; 1-extend A/B
push B3
clt
XCALL __mulusa3_round
pop r30
;; sign-extend B
bst r30, 7
brtc 1f
;; A1, A0 survived in R27:R26
sub C2, AA0
sbc C3, AA1
9: ret
1:
pop AA1 ;; B1
pop AA0 ;; B0
;; sign-extend A. A3 survived in R31
bst AA3, 7
brtc 2f
sub C2, AA0
sbc C3, AA1
2:
;; Shift 1 bit left to adjust for 15 fractional bits
lsl GUARD
rol C0
rol C1
rol C2
rol C3
;; Round last digit
lsl GUARD
adc C0, __zero_reg__
adc C1, __zero_reg__
adc C2, __zero_reg__
adc C3, __zero_reg__
ret
ENDF __mulsa3
#endif /* L_mulsa3 */
#if defined (L_mulusa3)
;;; (R25:R22) *= (R21:R18)
;;; Clobbers: ABI, called by optabs and __mulsua
;;; Clobbers: ABI, called by optabs
;;; Rounding: -1 LSB <= error <= 1 LSB
;;; Does not clobber T and A[] survives in 26, 27, 30, 31
DEFUN __mulusa3
DEFUN __mulusa3
set
;; Fallthru
ENDF __mulusa3
;;; A[] survives in 26, 27, 30, 31
;;; Also used by __mulsa3 with T = 0
;;; Round if T = 1
;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
DEFUN __mulusa3_round
push CC2
push CC3
; clear result
@ -560,21 +623,26 @@ DEFUN __mulusa3
sbci B0, 0
brne 5b
;; Move result into place and round
;; Save guard bits and set carry for rounding
push B3
lsl B3
;; Move result into place
wmov C2, CC2
wmov C0, CC0
clr __zero_reg__
brtc 6f
;; Round iff T = 1
adc C0, __zero_reg__
adc C1, __zero_reg__
adc C2, __zero_reg__
adc C3, __zero_reg__
6:
pop GUARD
;; Epilogue
pop CC3
pop CC2
ret
ENDF __mulusa3
ENDF __mulusa3_round
#endif /* L_mulusa3 */
#undef A0
@ -600,6 +668,8 @@ ENDF __mulusa3
#endif /* __AVR_HAVE_MUL__ */
#undef GUARD
/*******************************************************
Fractional Division 8 / 8
*******************************************************/
@ -607,30 +677,38 @@ ENDF __mulusa3
#define r_divd r25 /* dividend */
#define r_quo r24 /* quotient */
#define r_div r22 /* divisor */
#define r_sign __tmp_reg__
#if defined (L_divqq3)
DEFUN __divqq3
mov r0, r_divd
eor r0, r_div
mov r_sign, r_divd
eor r_sign, r_div
sbrc r_div, 7
neg r_div
sbrc r_divd, 7
neg r_divd
cp r_divd, r_div
breq __divqq3_minus1 ; if equal return -1
XCALL __udivuqq3
XCALL __divqq_helper
lsr r_quo
sbrc r0, 7 ; negate result if needed
sbrc r_sign, 7 ; negate result if needed
neg r_quo
ret
__divqq3_minus1:
ldi r_quo, 0x80
ret
ENDF __divqq3
#endif /* defined (L_divqq3) */
#endif /* L_divqq3 */
#if defined (L_udivuqq3)
DEFUN __udivuqq3
cp r_divd, r_div
brsh 0f
XJMP __divqq_helper
;; Result is out of [0, 1) ==> Return 1 - eps.
0: ldi r_quo, 0xff
ret
ENDF __udivuqq3
#endif /* L_udivuqq3 */
#if defined (L_divqq_helper)
DEFUN __divqq_helper
clr r_quo ; clear quotient
inc __zero_reg__ ; init loop counter, used per shift
__udivuqq3_loop:
@ -649,12 +727,13 @@ __udivuqq3_cont:
com r_quo ; complement result
; because C flag was complemented in loop
ret
ENDF __udivuqq3
#endif /* defined (L_udivuqq3) */
ENDF __divqq_helper
#endif /* L_divqq_helper */
#undef r_divd
#undef r_quo
#undef r_div
#undef r_sign
/*******************************************************
@ -746,6 +825,8 @@ DEFUN __divha3
NEG2 r_divdL
2:
XCALL __udivuha3
lsr r_quoH ; adjust to 7 fractional bits
ror r_quoL
sbrs r0, 7 ; negate result if needed
ret
NEG2 r_quoL
@ -806,6 +887,10 @@ DEFUN __divsa3
NEG4 r_arg1L
2:
XCALL __udivusa3
lsr r_quoHH ; adjust to 15 fractional bits
ror r_quoHL
ror r_quoH
ror r_quoL
sbrs r0, 7 ; negate result if needed
ret
;; negate r_quoL
@ -1024,8 +1109,8 @@ DEFUN __usadd_8
XCALL __adddi3
brcs 0f
ret
;; A[] = 0xffffffff
0: XJMP __sbc_8
0: ;; A[] = 0xffffffff
XJMP __sbc_8
ENDF __usadd_8
#endif /* L_usadd_8 */
@ -1038,8 +1123,8 @@ DEFUN __ussub_8
XCALL __subdi3
brcs 0f
ret
;; A[] = 0
0: XJMP __clr_8
0: ;; A[] = 0
XJMP __clr_8
ENDF __ussub_8
#endif /* L_ussub_8 */
@ -1049,9 +1134,9 @@ FALIAS __ssaddda3
FALIAS __ssadddq3
DEFUN __ssadd_8
;; A = (B >= 0) ? INT64_MAX : INT64_MIN
XCALL __adddi3
brvc 0f
;; A = (B >= 0) ? INT64_MAX : INT64_MIN
cpi B7, 0x80
XCALL __sbc_8
subi A7, 0x80
@ -1067,7 +1152,7 @@ FALIAS __sssubdq3
DEFUN __sssub_8
XCALL __subdi3
brvc 0f
;; A = (B < 0) ? INT64_MAX : INT64_MIN
;; A = (B < 0) ? INT64_MAX : INT64_MIN
ldi A7, 0x7f
cp A7, B7
XCALL __sbc_8

View File

@ -64,12 +64,12 @@ LIB1ASMFUNCS += \
\
_fractsfqq _fractsfuqq \
_fractsfhq _fractsfuhq _fractsfha _fractsfuha \
_fractsfsa _fractsfusa \
_fractsfsq _fractsfusq _fractsfsa _fractsfusa \
_mulqq3 \
_mulhq3 _muluhq3 \
_mulha3 _muluha3 _muluha3_round \
_mulsa3 _mulusa3 \
_divqq3 _udivuqq3 \
_divqq3 _udivuqq3 _divqq_helper \
_divhq3 _udivuhq3 \
_divha3 _udivuha3 \
_divsa3 _udivusa3 \