From c17f08e1a828b0d08b13c72a0a327cd51ddfb500 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 29 Dec 2000 03:51:01 -0800 Subject: [PATCH] alpha.c (alpha_expand_block_move): Initialize src_align and dst_align in bits. * config/alpha/alpha.c (alpha_expand_block_move): Initialize src_align and dst_align in bits. Do unaligned quadword loads if possible for BWX too. (alpha_expand_block_clear): Initialize align in bits. Track small leading offsets into a larger alignment. Play games with stq_u for large 4-byte aligned blocks. Use load/mask/store for appropreately aligned heads and tails. From-SVN: r38532 --- gcc/ChangeLog | 10 ++ gcc/config/alpha/alpha.c | 231 ++++++++++++++++++++++++++++++++------- 2 files changed, 202 insertions(+), 39 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 63a9049ad00..476801bbe45 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2000-12-29 Richard Henderson + + * config/alpha/alpha.c (alpha_expand_block_move): Initialize + src_align and dst_align in bits. Do unaligned quadword loads + if possible for BWX too. + (alpha_expand_block_clear): Initialize align in bits. Track + small leading offsets into a larger alignment. Play games with + stq_u for large 4-byte aligned blocks. Use load/mask/store + for appropreately aligned heads and tails. + 2000-12-29 Alexandre Oliva * function.c (assign_parms): Convert arguments passed by diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index a363be83067..cfebe6a9f09 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -2833,18 +2833,18 @@ alpha_expand_block_move (operands) rtx bytes_rtx = operands[2]; rtx align_rtx = operands[3]; HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); - unsigned HOST_WIDE_INT bytes = orig_bytes; - unsigned HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT; - unsigned HOST_WIDE_INT dst_align = src_align; + HOST_WIDE_INT bytes = orig_bytes; + HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT; + HOST_WIDE_INT dst_align = src_align; rtx orig_src = operands[1]; rtx orig_dst = operands[0]; rtx data_regs[2 * MAX_MOVE_WORDS + 16]; rtx tmp; - unsigned int i, words, ofs, nregs = 0; + int i, words, ofs, nregs = 0; if (orig_bytes <= 0) return 1; - else if (bytes > MAX_MOVE_WORDS * BITS_PER_UNIT) + else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) return 0; /* Look for additional alignment information from recorded register info. */ @@ -2920,6 +2920,7 @@ alpha_expand_block_move (operands) /* No appropriate mode; fall back on memory. */ orig_src = change_address (orig_src, GET_MODE (orig_src), copy_addr_to_reg (XEXP (orig_src, 0))); + src_align = GET_MODE_BITSIZE (GET_MODE (XEXP (tmp, 0))); } ofs = 0; @@ -2959,7 +2960,7 @@ alpha_expand_block_move (operands) ofs += words * 4; } - if (bytes >= 16) + if (bytes >= 8) { words = bytes / 8; @@ -2974,14 +2975,6 @@ alpha_expand_block_move (operands) ofs += words * 8; } - if (! TARGET_BWX && bytes >= 8) - { - data_regs[nregs++] = tmp = gen_reg_rtx (DImode); - alpha_expand_unaligned_load (tmp, orig_src, 8, ofs, 0); - bytes -= 8; - ofs += 8; - } - if (! TARGET_BWX && bytes >= 4) { data_regs[nregs++] = tmp = gen_reg_rtx (SImode); @@ -3004,7 +2997,6 @@ alpha_expand_block_move (operands) ofs += 2; } while (bytes >= 2); } - else if (! TARGET_BWX) { data_regs[nregs++] = tmp = gen_reg_rtx (HImode); @@ -3082,7 +3074,7 @@ alpha_expand_block_move (operands) up by recognizing extra alignment information. */ orig_dst = change_address (orig_dst, GET_MODE (orig_dst), copy_addr_to_reg (XEXP (orig_dst, 0))); - dst_align = GET_MODE_SIZE (GET_MODE (tmp)); + dst_align = GET_MODE_BITSIZE (GET_MODE (XEXP (tmp, 0))); } /* Write out the data in whatever chunks reading the source allowed. */ @@ -3202,15 +3194,16 @@ alpha_expand_block_clear (operands) rtx bytes_rtx = operands[1]; rtx align_rtx = operands[2]; HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); - unsigned HOST_WIDE_INT bytes = orig_bytes; - unsigned HOST_WIDE_INT align = INTVAL (align_rtx); + HOST_WIDE_INT bytes = orig_bytes; + HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT; + HOST_WIDE_INT alignofs = 0; rtx orig_dst = operands[0]; rtx tmp; - unsigned HOST_WIDE_INT i, words, ofs = 0; + int i, words, ofs = 0; if (orig_bytes <= 0) return 1; - if (bytes > MAX_MOVE_WORDS*8) + if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) return 0; /* Look for stricter alignment. */ @@ -3221,20 +3214,19 @@ alpha_expand_block_clear (operands) && GET_CODE (XEXP (tmp, 0)) == REG && GET_CODE (XEXP (tmp, 1)) == CONST_INT) { - unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); - unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); if (a > align) { - if (a >= 64 && c % 8 == 0) - align = 64; - else if (a >= 32 && c % 4 == 0) - align = 32; - else if (a >= 16 && c % 2 == 0) - align = 16; + if (a >= 64) + align = a, alignofs = 8 - c % 8; + else if (a >= 32) + align = a, alignofs = 4 - c % 4; + else if (a >= 16) + align = a, alignofs = 2 - c % 2; } } - else if (GET_CODE (tmp) == ADDRESSOF) { enum machine_mode mode; @@ -3249,10 +3241,93 @@ alpha_expand_block_clear (operands) /* No appropriate mode; fall back on memory. */ orig_dst = change_address (orig_dst, GET_MODE (orig_dst), copy_addr_to_reg (tmp)); - align = GET_MODE_SIZE (GET_MODE (XEXP (tmp, 0))); + align = GET_MODE_BITSIZE (GET_MODE (XEXP (tmp, 0))); } - /* Handle a block of contiguous words first. */ + /* Handle an unaligned prefix first. */ + + if (alignofs > 0) + { +#if HOST_BITS_PER_WIDE_INT >= 64 + /* Given that alignofs is bounded by align, the only time BWX could + generate three stores is for a 7 byte fill. Prefer two individual + stores over a load/mask/store sequence. */ + if ((!TARGET_BWX || alignofs == 7) + && align >= 32 + && !(alignofs == 4 && bytes >= 4)) + { + enum machine_mode mode = (align >= 64 ? DImode : SImode); + int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs; + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = change_address (orig_dst, mode, + plus_constant (XEXP (orig_dst, 0), + ofs - inv_alignofs)); + MEM_ALIAS_SET (mem) = 0; + + mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8)); + if (bytes < alignofs) + { + mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8); + ofs += bytes; + bytes = 0; + } + else + { + bytes -= alignofs; + ofs += alignofs; + } + alignofs = 0; + + tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + } +#endif + + if (TARGET_BWX && (alignofs & 1) && bytes >= 1) + { + emit_move_insn (change_address (orig_dst, QImode, + plus_constant (XEXP (orig_dst, 0), + ofs)), + const0_rtx); + bytes -= 1; + ofs += 1; + alignofs -= 1; + } + if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2) + { + emit_move_insn (change_address (orig_dst, HImode, + plus_constant (XEXP (orig_dst, 0), + ofs)), + const0_rtx); + bytes -= 2; + ofs += 2; + alignofs -= 2; + } + if (alignofs == 4 && bytes >= 4) + { + emit_move_insn (change_address (orig_dst, SImode, + plus_constant (XEXP (orig_dst, 0), + ofs)), + const0_rtx); + bytes -= 4; + ofs += 4; + alignofs = 0; + } + + /* If we've not used the extra lead alignment information by now, + we won't be able to. Downgrade align to match what's left over. */ + if (alignofs > 0) + { + alignofs = alignofs & -alignofs; + align = MIN (align, alignofs * BITS_PER_UNIT); + } + } + + /* Handle a block of contiguous long-words. */ if (align >= 64 && bytes >= 8) { @@ -3268,7 +3343,42 @@ alpha_expand_block_clear (operands) ofs += words * 8; } - if (align >= 16 && bytes >= 4) + /* If the block is large and appropriately aligned, emit a single + store followed by a sequence of stq_u insns. */ + + if (align >= 32 && bytes > 16) + { + emit_move_insn (change_address (orig_dst, SImode, + plus_constant (XEXP (orig_dst, 0), ofs)), + const0_rtx); + bytes -= 4; + ofs += 4; + + words = bytes / 8; + for (i = 0; i < words; ++i) + { + rtx mem; + mem = change_address (orig_dst, DImode, + gen_rtx_AND (DImode, + plus_constant (XEXP (orig_dst, 0), + ofs + i*8), + GEN_INT (-8))); + MEM_ALIAS_SET (mem) = 0; + emit_move_insn (mem, const0_rtx); + } + + /* Depending on the alignment, the first stq_u may have overlapped + with the initial stl, which means that the last stq_u didn't + write as much as it would appear. Leave those questionable bytes + unaccounted for. */ + bytes -= words * 8 - 4; + ofs += words * 8 - 4; + } + + /* Handle a smaller block of aligned words. */ + + if ((align >= 64 && bytes == 4) + || (align == 32 && bytes >= 4)) { words = bytes / 4; @@ -3282,7 +3392,9 @@ alpha_expand_block_clear (operands) ofs += words * 4; } - if (bytes >= 16) + /* An unaligned block uses stq_u stores for as many as possible. */ + + if (bytes >= 8) { words = bytes / 8; @@ -3292,15 +3404,56 @@ alpha_expand_block_clear (operands) ofs += words * 8; } - /* Next clean up any trailing pieces. We know from the contiguous - block move that there are no aligned SImode or DImode hunks left. */ + /* Next clean up any trailing pieces. */ - if (! TARGET_BWX && bytes >= 8) +#if HOST_BITS_PER_WIDE_INT >= 64 + /* Count the number of bits in BYTES for which aligned stores could + be emitted. */ + words = 0; + for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1) + if (bytes & i) + words += 1; + + /* If we have appropriate alignment (and it wouldn't take too many + instructions otherwise), mask out the bytes we need. */ + if (TARGET_BWX ? words > 2 : bytes > 0) { - alpha_expand_unaligned_store (orig_dst, const0_rtx, 8, ofs); - bytes -= 8; - ofs += 8; + if (align >= 64) + { + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = change_address (orig_dst, DImode, + plus_constant (XEXP (orig_dst, 0), ofs)); + MEM_ALIAS_SET (mem) = 0; + + mask = ~(HOST_WIDE_INT)0 << (bytes * 8); + + tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + return 1; + } + else if (align >= 32 && bytes < 4) + { + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = change_address (orig_dst, SImode, + plus_constant (XEXP (orig_dst, 0), ofs)); + MEM_ALIAS_SET (mem) = 0; + + mask = ~(HOST_WIDE_INT)0 << (bytes * 8); + + tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + return 1; + } } +#endif if (!TARGET_BWX && bytes >= 4) {