re PR target/57516 ([avr] Incorrect fixed-point rounding result in the overflow case)
gcc/ PR target/57516 * config/avr/avr-fixed.md (round<mode>3_const): Turn expander to insn. * config/avr/avr.md (adjust_len): Add `round'. * config/avr/avr-protos.h (avr_out_round): New prototype. (avr_out_plus): Add `out_label' argument. * config/avr/avr.c (avr_out_plus_1): Add `out_label' argument. (avr_out_plus): Pass down `out_label' to avr_out_plus_1. Handle the case where `insn' is just a pattern. (avr_out_bitop): Handle the case where `insn' is just a pattern. (avr_out_round): New function. (avr_adjust_insn_length): Handle ADJUST_LEN_ROUND. libgcc/ PR target/57516 * config/avr/lib1funcs-fixed.S (__roundqq3, __rounduqq3) (__round_s2_const, __round_u2_const) (__round_s4_const, __round_u4_const, __round_x8): Saturate result if addition result cannot be represented. gcc/testsuite/ PR target/57516 * gcc.target/avr/torture/builtins-4-roundfx.c (test2hr, test2k): Adjust to corrected rounding. From-SVN: r201051
This commit is contained in:
parent
1545921d63
commit
0237179843
@ -1,3 +1,17 @@
|
||||
2013-07-19 Georg-Johann Lay <avr@gjlay.de>
|
||||
|
||||
PR target/57516
|
||||
* config/avr/avr-fixed.md (round<mode>3_const): Turn expander to insn.
|
||||
* config/avr/avr.md (adjust_len): Add `round'.
|
||||
* config/avr/avr-protos.h (avr_out_round): New prototype.
|
||||
(avr_out_plus): Add `out_label' argument.
|
||||
* config/avr/avr.c (avr_out_plus_1): Add `out_label' argument.
|
||||
(avr_out_plus): Pass down `out_label' to avr_out_plus_1.
|
||||
Handle the case where `insn' is just a pattern.
|
||||
(avr_out_bitop): Handle the case where `insn' is just a pattern.
|
||||
(avr_out_round): New function.
|
||||
(avr_adjust_insn_length): Handle ADJUST_LEN_ROUND.
|
||||
|
||||
2013-07-18 David Holsgrove <david.holsgrove@xilinx.com>
|
||||
|
||||
* config/microblaze/microblaze.c (microblaze_expand_prologue):
|
||||
|
@ -447,49 +447,18 @@
|
||||
;; "roundqq3_const" "rounduqq3_const"
|
||||
;; "roundhq3_const" "rounduhq3_const" "roundha3_const" "rounduha3_const"
|
||||
;; "roundsq3_const" "roundusq3_const" "roundsa3_const" "roundusa3_const"
|
||||
(define_expand "round<mode>3_const"
|
||||
[(parallel [(match_operand:ALL124QA 0 "register_operand" "")
|
||||
(match_operand:ALL124QA 1 "register_operand" "")
|
||||
(match_operand:HI 2 "const_int_operand" "")])]
|
||||
(define_insn "round<mode>3_const"
|
||||
[(set (match_operand:ALL124QA 0 "register_operand" "=d")
|
||||
(unspec:ALL124QA [(match_operand:ALL124QA 1 "register_operand" "0")
|
||||
(match_operand:HI 2 "const_int_operand" "n")
|
||||
(const_int 0)]
|
||||
UNSPEC_ROUND))]
|
||||
""
|
||||
{
|
||||
// The rounding point RP is $2. The smallest fractional
|
||||
// bit that is not cleared by the rounding is 2^(-RP).
|
||||
|
||||
enum machine_mode imode = int_mode_for_mode (<MODE>mode);
|
||||
int fbit = (int) GET_MODE_FBIT (<MODE>mode);
|
||||
|
||||
// Add-Saturate 1/2 * 2^(-RP)
|
||||
|
||||
double_int i_add = double_int_zero.set_bit (fbit-1 - INTVAL (operands[2]));
|
||||
rtx x_add = const_fixed_from_double_int (i_add, <MODE>mode);
|
||||
|
||||
if (SIGNED_FIXED_POINT_MODE_P (<MODE>mode))
|
||||
emit_move_insn (operands[0],
|
||||
gen_rtx_SS_PLUS (<MODE>mode, operands[1], x_add));
|
||||
else
|
||||
emit_move_insn (operands[0],
|
||||
gen_rtx_US_PLUS (<MODE>mode, operands[1], x_add));
|
||||
|
||||
// Keep all bits from RP and higher: ... 2^(-RP)
|
||||
// Clear all bits from RP+1 and lower: 2^(-RP-1) ...
|
||||
// Rounding point ^^^^^^^
|
||||
// Added above ^^^^^^^^^
|
||||
|
||||
rtx xreg = simplify_gen_subreg (imode, operands[0], <MODE>mode, 0);
|
||||
rtx xmask = immed_double_int_const (-i_add - i_add, imode);
|
||||
|
||||
if (SImode == imode)
|
||||
emit_insn (gen_andsi3 (xreg, xreg, xmask));
|
||||
else if (HImode == imode)
|
||||
emit_insn (gen_andhi3 (xreg, xreg, xmask));
|
||||
else if (QImode == imode)
|
||||
emit_insn (gen_andqi3 (xreg, xreg, xmask));
|
||||
else
|
||||
gcc_unreachable();
|
||||
|
||||
DONE;
|
||||
})
|
||||
return avr_out_round (insn, operands);
|
||||
}
|
||||
[(set_attr "cc" "clobber")
|
||||
(set_attr "adjust_len" "round")])
|
||||
|
||||
|
||||
;; "*roundqq3.libgcc" "*rounduqq3.libgcc"
|
||||
|
@ -86,7 +86,8 @@ extern int avr_starting_frame_offset (void);
|
||||
extern void avr_output_addr_vec_elt (FILE *stream, int value);
|
||||
extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]);
|
||||
extern const char* avr_out_bitop (rtx, rtx*, int*);
|
||||
extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL);
|
||||
extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL, bool =true);
|
||||
extern const char* avr_out_round (rtx, rtx*, int* =NULL);
|
||||
extern const char* avr_out_addto_sp (rtx*, int*);
|
||||
extern const char* avr_out_xload (rtx, rtx*, int*);
|
||||
extern const char* avr_out_movmem (rtx, rtx*, int*);
|
||||
|
@ -6232,11 +6232,14 @@ lshrsi3_out (rtx insn, rtx operands[], int *len)
|
||||
the subtrahend in the original insn, provided it is a compile time constant.
|
||||
In all other cases, SIGN is 0.
|
||||
|
||||
Return "". */
|
||||
If OUT_LABEL is true, print the final 0: label which is needed for
|
||||
saturated addition / subtraction. The only case where OUT_LABEL = false
|
||||
is useful is for saturated addition / subtraction performed during
|
||||
fixed-point rounding, cf. `avr_out_round'. */
|
||||
|
||||
static void
|
||||
avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
|
||||
enum rtx_code code_sat = UNKNOWN, int sign = 0)
|
||||
enum rtx_code code_sat, int sign, bool out_label)
|
||||
{
|
||||
/* MODE of the operation. */
|
||||
enum machine_mode mode = GET_MODE (xop[0]);
|
||||
@ -6675,7 +6678,8 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
|
||||
"mov %r0+5,%0", xop, plen, 4);
|
||||
}
|
||||
|
||||
avr_asm_len ("0:", op, plen, 0);
|
||||
if (out_label)
|
||||
avr_asm_len ("0:", op, plen, 0);
|
||||
}
|
||||
|
||||
|
||||
@ -6713,8 +6717,8 @@ avr_out_plus_symbol (rtx *xop, enum rtx_code code, int *plen, int *pcc)
|
||||
|
||||
/* Prepare operands of addition/subtraction to be used with avr_out_plus_1.
|
||||
|
||||
INSN is a single_set insn with a binary operation as SET_SRC that is
|
||||
one of: PLUS, SS_PLUS, US_PLUS, MINUS, SS_MINUS, US_MINUS.
|
||||
INSN is a single_set insn or an insn pattern with a binary operation as
|
||||
SET_SRC that is one of: PLUS, SS_PLUS, US_PLUS, MINUS, SS_MINUS, US_MINUS.
|
||||
|
||||
XOP are the operands of INSN. In the case of 64-bit operations with
|
||||
constant XOP[] has just one element: The summand/subtrahend in XOP[0].
|
||||
@ -6729,19 +6733,22 @@ avr_out_plus_symbol (rtx *xop, enum rtx_code code, int *plen, int *pcc)
|
||||
|
||||
PLEN and PCC default to NULL.
|
||||
|
||||
OUT_LABEL defaults to TRUE. For a description, see AVR_OUT_PLUS_1.
|
||||
|
||||
Return "" */
|
||||
|
||||
const char*
|
||||
avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc)
|
||||
avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc, bool out_label)
|
||||
{
|
||||
int cc_plus, cc_minus, cc_dummy;
|
||||
int len_plus, len_minus;
|
||||
rtx op[4];
|
||||
rtx xdest = SET_DEST (single_set (insn));
|
||||
rtx xpattern = INSN_P (insn) ? single_set (insn) : insn;
|
||||
rtx xdest = SET_DEST (xpattern);
|
||||
enum machine_mode mode = GET_MODE (xdest);
|
||||
enum machine_mode imode = int_mode_for_mode (mode);
|
||||
int n_bytes = GET_MODE_SIZE (mode);
|
||||
enum rtx_code code_sat = GET_CODE (SET_SRC (single_set (insn)));
|
||||
enum rtx_code code_sat = GET_CODE (SET_SRC (xpattern));
|
||||
enum rtx_code code
|
||||
= (PLUS == code_sat || SS_PLUS == code_sat || US_PLUS == code_sat
|
||||
? PLUS : MINUS);
|
||||
@ -6756,7 +6763,7 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc)
|
||||
|
||||
if (n_bytes <= 4 && REG_P (xop[2]))
|
||||
{
|
||||
avr_out_plus_1 (xop, plen, code, pcc, code_sat);
|
||||
avr_out_plus_1 (xop, plen, code, pcc, code_sat, 0, out_label);
|
||||
return "";
|
||||
}
|
||||
|
||||
@ -6783,7 +6790,8 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc)
|
||||
/* Saturations and 64-bit operations don't have a clobber operand.
|
||||
For the other cases, the caller will provide a proper XOP[3]. */
|
||||
|
||||
op[3] = PARALLEL == GET_CODE (PATTERN (insn)) ? xop[3] : NULL_RTX;
|
||||
xpattern = INSN_P (insn) ? PATTERN (insn) : insn;
|
||||
op[3] = PARALLEL == GET_CODE (xpattern) ? xop[3] : NULL_RTX;
|
||||
|
||||
/* Saturation will need the sign of the original operand. */
|
||||
|
||||
@ -6798,8 +6806,8 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc)
|
||||
|
||||
/* Work out the shortest sequence. */
|
||||
|
||||
avr_out_plus_1 (op, &len_minus, MINUS, &cc_plus, code_sat, sign);
|
||||
avr_out_plus_1 (op, &len_plus, PLUS, &cc_minus, code_sat, sign);
|
||||
avr_out_plus_1 (op, &len_minus, MINUS, &cc_plus, code_sat, sign, out_label);
|
||||
avr_out_plus_1 (op, &len_plus, PLUS, &cc_minus, code_sat, sign, out_label);
|
||||
|
||||
if (plen)
|
||||
{
|
||||
@ -6807,9 +6815,9 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc)
|
||||
*pcc = (len_minus <= len_plus) ? cc_minus : cc_plus;
|
||||
}
|
||||
else if (len_minus <= len_plus)
|
||||
avr_out_plus_1 (op, NULL, MINUS, pcc, code_sat, sign);
|
||||
avr_out_plus_1 (op, NULL, MINUS, pcc, code_sat, sign, out_label);
|
||||
else
|
||||
avr_out_plus_1 (op, NULL, PLUS, pcc, code_sat, sign);
|
||||
avr_out_plus_1 (op, NULL, PLUS, pcc, code_sat, sign, out_label);
|
||||
|
||||
return "";
|
||||
}
|
||||
@ -6823,13 +6831,15 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc)
|
||||
and return "". If PLEN == NULL, print assembler instructions to perform the
|
||||
operation; otherwise, set *PLEN to the length of the instruction sequence
|
||||
(in words) printed with PLEN == NULL. XOP[3] is either an 8-bit clobber
|
||||
register or SCRATCH if no clobber register is needed for the operation. */
|
||||
register or SCRATCH if no clobber register is needed for the operation.
|
||||
INSN is an INSN_P or a pattern of an insn. */
|
||||
|
||||
const char*
|
||||
avr_out_bitop (rtx insn, rtx *xop, int *plen)
|
||||
{
|
||||
/* CODE and MODE of the operation. */
|
||||
enum rtx_code code = GET_CODE (SET_SRC (single_set (insn)));
|
||||
rtx xpattern = INSN_P (insn) ? single_set (insn) : insn;
|
||||
enum rtx_code code = GET_CODE (SET_SRC (xpattern));
|
||||
enum machine_mode mode = GET_MODE (xop[0]);
|
||||
|
||||
/* Number of bytes to operate on. */
|
||||
@ -7332,6 +7342,67 @@ avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
|
||||
}
|
||||
|
||||
|
||||
/* Output fixed-point rounding. XOP[0] = XOP[1] is the operand to round.
|
||||
XOP[2] is the rounding point, a CONST_INT. The function prints the
|
||||
instruction sequence if PLEN = NULL and computes the length in words
|
||||
of the sequence if PLEN != NULL. Most of this function deals with
|
||||
preparing operands for calls to `avr_out_plus' and `avr_out_bitop'. */
|
||||
|
||||
const char*
|
||||
avr_out_round (rtx insn ATTRIBUTE_UNUSED, rtx *xop, int *plen)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (xop[0]);
|
||||
enum machine_mode imode = int_mode_for_mode (mode);
|
||||
// The smallest fractional bit not cleared by the rounding is 2^(-RP).
|
||||
int fbit = (int) GET_MODE_FBIT (mode);
|
||||
double_int i_add = double_int_zero.set_bit (fbit-1 - INTVAL (xop[2]));
|
||||
// Lengths of PLUS and AND parts.
|
||||
int len_add = 0, *plen_add = plen ? &len_add : NULL;
|
||||
int len_and = 0, *plen_and = plen ? &len_and : NULL;
|
||||
|
||||
// Add-Saturate 1/2 * 2^(-RP). Don't print the label "0:" when printing
|
||||
// the saturated addition so that we can emit the "rjmp 1f" before the
|
||||
// "0:" below.
|
||||
|
||||
rtx xadd = const_fixed_from_double_int (i_add, mode);
|
||||
rtx xpattern, xsrc, op[4];
|
||||
|
||||
xsrc = SIGNED_FIXED_POINT_MODE_P (mode)
|
||||
? gen_rtx_SS_PLUS (mode, xop[1], xadd)
|
||||
: gen_rtx_US_PLUS (mode, xop[1], xadd);
|
||||
xpattern = gen_rtx_SET (VOIDmode, xop[0], xsrc);
|
||||
|
||||
op[0] = xop[0];
|
||||
op[1] = xop[1];
|
||||
op[2] = xadd;
|
||||
avr_out_plus (xpattern, op, plen_add, NULL, false /* Don't print "0:" */);
|
||||
|
||||
avr_asm_len ("rjmp 1f" CR_TAB
|
||||
"0:", NULL, plen_add, 1);
|
||||
|
||||
// Keep all bits from RP and higher: ... 2^(-RP)
|
||||
// Clear all bits from RP+1 and lower: 2^(-RP-1) ...
|
||||
// Rounding point ^^^^^^^
|
||||
// Added above ^^^^^^^^^
|
||||
rtx xreg = simplify_gen_subreg (imode, xop[0], mode, 0);
|
||||
rtx xmask = immed_double_int_const (-i_add - i_add, imode);
|
||||
|
||||
xpattern = gen_rtx_SET (VOIDmode, xreg, gen_rtx_AND (imode, xreg, xmask));
|
||||
|
||||
op[0] = xreg;
|
||||
op[1] = xreg;
|
||||
op[2] = xmask;
|
||||
op[3] = gen_rtx_SCRATCH (QImode);
|
||||
avr_out_bitop (xpattern, op, plen_and);
|
||||
avr_asm_len ("1:", NULL, plen, 0);
|
||||
|
||||
if (plen)
|
||||
*plen = len_add + len_and;
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
/* Create RTL split patterns for byte sized rotate expressions. This
|
||||
produces a series of move instructions and considers overlap situations.
|
||||
Overlapping non-HImode operands need a scratch register. */
|
||||
@ -7540,6 +7611,7 @@ avr_adjust_insn_length (rtx insn, int len)
|
||||
|
||||
case ADJUST_LEN_SFRACT: avr_out_fract (insn, op, true, &len); break;
|
||||
case ADJUST_LEN_UFRACT: avr_out_fract (insn, op, false, &len); break;
|
||||
case ADJUST_LEN_ROUND: avr_out_round (insn, op, &len); break;
|
||||
|
||||
case ADJUST_LEN_TSTHI: avr_out_tsthi (insn, op, &len); break;
|
||||
case ADJUST_LEN_TSTPSI: avr_out_tstpsi (insn, op, &len); break;
|
||||
|
@ -140,7 +140,7 @@
|
||||
"out_bitop, plus, addto_sp,
|
||||
tsthi, tstpsi, tstsi, compare, compare64, call,
|
||||
mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32,
|
||||
ufract, sfract,
|
||||
ufract, sfract, round,
|
||||
xload, lpm, movmem,
|
||||
ashlqi, ashrqi, lshrqi,
|
||||
ashlhi, ashrhi, lshrhi,
|
||||
|
@ -1,3 +1,9 @@
|
||||
2013-07-19 Georg-Johann Lay <avr@gjlay.de>
|
||||
|
||||
PR target/57516
|
||||
* gcc.target/avr/torture/builtins-4-roundfx.c (test2hr, test2k):
|
||||
Adjust to corrected rounding.
|
||||
|
||||
2013-07-19 Georg-Johann Lay <avr@gjlay.de>
|
||||
|
||||
* lib/target-supports.exp (check_effective_target_cilkplus): New proc.
|
||||
|
@ -72,11 +72,11 @@ DEFTEST1 (long long accum, llk)
|
||||
|
||||
static void test2hr (void)
|
||||
{
|
||||
TEST2 (hr, 1, 0x7f, 0x40);
|
||||
TEST2 (hr, 2, 0x7f, 0b1100000);
|
||||
TEST2 (hr, 3, 0x7f, 0b1110000);
|
||||
TEST2 (hr, 4, 0x7f, 0b1111000);
|
||||
|
||||
TEST2 (hr, 1, 0x7f, 0x7f);
|
||||
TEST2 (hr, 2, 0x70, 0x7f);
|
||||
TEST2 (hr, 3, 0x78, 0x7f);
|
||||
TEST2 (hr, 4, 0x7f, 0x7f);
|
||||
|
||||
TEST2 (uhr, 1, 0x7f, 0x80);
|
||||
TEST2 (uhr, 2, 0x7f, 0x80);
|
||||
TEST2 (uhr, 3, 0x7f, 0x80);
|
||||
@ -85,10 +85,13 @@ static void test2hr (void)
|
||||
|
||||
void test2k (void)
|
||||
{
|
||||
TEST2 (k, 1, 0x7fffffff, 0x7fff8000 | 0b100000000000000);
|
||||
TEST2 (k, 2, 0x7fffffff, 0x7fff8000 | 0b110000000000000);
|
||||
TEST2 (k, 3, 0x7fffffff, 0x7fff8000 | 0b111000000000000);
|
||||
TEST2 (k, 4, 0x7fffffff, 0x7fff8000 | 0b111100000000000);
|
||||
TEST2 (k, 1, 0x7fffff00, 0x7fffffff);
|
||||
TEST2 (k, 2, 0x7ffffff0, 0x7fffffff);
|
||||
TEST2 (k, 2, 0x7ffff000, 0x7fffffff);
|
||||
TEST2 (k, 3, 0x7ffff000, 0x7ffff000);
|
||||
TEST2 (k, 3, 0x7ffff800, 0x7fffffff);
|
||||
TEST2 (k, 3, 0x7ffff7ff, 0x7ffff000);
|
||||
TEST2 (k, 4, 0x7ffff7ff, 0x7ffff800);
|
||||
|
||||
TEST2 (uk, 1, 0x7fffffff, 1ul << 31);
|
||||
TEST2 (uk, 2, 0x7fffffff, 1ul << 31);
|
||||
|
@ -1,3 +1,11 @@
|
||||
2013-07-19 Georg-Johann Lay <avr@gjlay.de>
|
||||
|
||||
PR target/57516
|
||||
* config/avr/lib1funcs-fixed.S (__roundqq3, __rounduqq3)
|
||||
(__round_s2_const, __round_u2_const)
|
||||
(__round_s4_const, __round_u4_const, __round_x8):
|
||||
Saturate result if addition result cannot be represented.
|
||||
|
||||
2013-07-15 Matthias Klose <doko@ubuntu.com>
|
||||
|
||||
* libgcc2.c: Don't include <limits.h>.
|
||||
|
@ -1464,12 +1464,13 @@ DEFUN __roundqq3
|
||||
;; Add-Saturate 2^{-RP-1}
|
||||
add A0, C0
|
||||
brvc 0f
|
||||
ldi A0, 0x7f
|
||||
ldi C0, 0x7f
|
||||
rjmp 9f
|
||||
0: ;; Mask out bits beyond RP
|
||||
lsl C0
|
||||
neg C0
|
||||
and C0, A0
|
||||
mov C1, __tmp_reg__
|
||||
9: mov C1, __tmp_reg__
|
||||
ret
|
||||
ENDF __roundqq3
|
||||
#endif /* L_roundqq3 */
|
||||
@ -1488,12 +1489,13 @@ DEFUN __rounduqq3
|
||||
;; Add-Saturate 2^{-RP-1}
|
||||
add A0, C0
|
||||
brcc 0f
|
||||
ldi A0, 0xff
|
||||
ldi C0, 0xff
|
||||
rjmp 9f
|
||||
0: ;; Mask out bits beyond RP
|
||||
lsl C0
|
||||
neg C0
|
||||
and C0, A0
|
||||
mov C1, __tmp_reg__
|
||||
9: mov C1, __tmp_reg__
|
||||
ret
|
||||
ENDF __rounduqq3
|
||||
#endif /* L_rounduqq3 */
|
||||
@ -1565,16 +1567,17 @@ ENDF __rounduha3
|
||||
|
||||
DEFUN __round_s2_const
|
||||
brvc 2f
|
||||
ldi A1, 0x7f
|
||||
ldi C1, 0x7f
|
||||
rjmp 1f
|
||||
;; FALLTHRU (Barrier)
|
||||
ENDF __round_s2_const
|
||||
|
||||
DEFUN __round_u2_const
|
||||
brcc 2f
|
||||
ldi A1, 0xff
|
||||
ldi C1, 0xff
|
||||
1:
|
||||
ldi A0, 0xff
|
||||
ldi C0, 0xff
|
||||
rjmp 9f
|
||||
2:
|
||||
;; Saturation is performed now.
|
||||
;; Currently, we have C[] = 2^{-RP-1}
|
||||
@ -1586,7 +1589,7 @@ DEFUN __round_u2_const
|
||||
;; Clear the bits beyond the rounding point.
|
||||
and C0, A0
|
||||
and C1, A1
|
||||
ret
|
||||
9: ret
|
||||
ENDF __round_u2_const
|
||||
|
||||
#endif /* L_round_2_const */
|
||||
@ -1681,18 +1684,19 @@ ENDF __roundusa3
|
||||
|
||||
DEFUN __round_s4_const
|
||||
brvc 2f
|
||||
ldi A3, 0x7f
|
||||
ldi C3, 0x7f
|
||||
rjmp 1f
|
||||
;; FALLTHRU (Barrier)
|
||||
ENDF __round_s4_const
|
||||
|
||||
DEFUN __round_u4_const
|
||||
brcc 2f
|
||||
ldi A3, 0xff
|
||||
ldi C3, 0xff
|
||||
1:
|
||||
ldi A2, 0xff
|
||||
ldi A1, 0xff
|
||||
ldi A0, 0xff
|
||||
ldi C2, 0xff
|
||||
ldi C1, 0xff
|
||||
ldi C0, 0xff
|
||||
rjmp 9f
|
||||
2:
|
||||
;; Saturation is performed now.
|
||||
;; Currently, we have C[] = 2^{-RP-1}
|
||||
@ -1707,7 +1711,7 @@ DEFUN __round_u4_const
|
||||
and C1, A1
|
||||
and C2, A2
|
||||
and C3, A3
|
||||
ret
|
||||
9: ret
|
||||
ENDF __round_u4_const
|
||||
|
||||
#endif /* L_round_4_const */
|
||||
@ -1847,12 +1851,13 @@ DEFUN __round_x8
|
||||
1: ;; Unsigned
|
||||
brcc 3f
|
||||
;; Unsigned overflow: A[] = 0xff...
|
||||
2: ldi A7, 0xff
|
||||
ldi A6, 0xff
|
||||
wmov A0, A6
|
||||
wmov A2, A6
|
||||
wmov A4, A6
|
||||
bld A7, 7
|
||||
2: ldi C7, 0xff
|
||||
ldi C6, 0xff
|
||||
wmov C0, C6
|
||||
wmov C2, C6
|
||||
wmov C4, C6
|
||||
bld C7, 7
|
||||
rjmp 9f
|
||||
3:
|
||||
;; C[] = -C[] - C[]
|
||||
push A0
|
||||
@ -1869,7 +1874,7 @@ DEFUN __round_x8
|
||||
and C5, A5
|
||||
and C6, A6
|
||||
and C7, A7
|
||||
;; Epilogue
|
||||
9: ;; Epilogue
|
||||
pop r29
|
||||
pop r28
|
||||
pop r17
|
||||
|
Loading…
Reference in New Issue
Block a user