diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 68224f94618..ac8b40dae68 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2016-03-14 Bernd Schmidt + + PR target/70083 + * lra-lives.c (process_bb_lives): Also update biggest mode for hard + regs. + (lra_create_live_ranges_1): initialize hard register biggest_mode to + VOIDmode. + * lra-constraints.c (split_reg): For hard regs, try to find the + biggest single-register mode used in the function. + 2016-03-14 Richard Biener PR tree-optimization/56365 diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index b070218145a..171ed657c85 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -4972,6 +4972,7 @@ split_reg (bool before_p, int original_regno, rtx_insn *insn, rtx_insn *restore, *save; bool after_p; bool call_save_p; + machine_mode mode; if (original_regno < FIRST_PSEUDO_REGISTER) { @@ -4979,24 +4980,32 @@ split_reg (bool before_p, int original_regno, rtx_insn *insn, hard_regno = original_regno; call_save_p = false; nregs = 1; + mode = lra_reg_info[hard_regno].biggest_mode; + machine_mode reg_rtx_mode = GET_MODE (regno_reg_rtx[hard_regno]); + if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (reg_rtx_mode)) + { + original_reg = regno_reg_rtx[hard_regno]; + mode = reg_rtx_mode; + } + else + original_reg = gen_rtx_REG (mode, hard_regno); } else { + mode = PSEUDO_REGNO_MODE (original_regno); hard_regno = reg_renumber[original_regno]; - nregs = hard_regno_nregs[hard_regno][PSEUDO_REGNO_MODE (original_regno)]; + nregs = hard_regno_nregs[hard_regno][mode]; rclass = lra_get_allocno_class (original_regno); original_reg = regno_reg_rtx[original_regno]; call_save_p = need_for_call_save_p (original_regno); } - original_reg = regno_reg_rtx[original_regno]; lra_assert (hard_regno >= 0); if (lra_dump_file != NULL) fprintf (lra_dump_file, " ((((((((((((((((((((((((((((((((((((((((((((((((\n"); + if (call_save_p) { - machine_mode mode = GET_MODE (original_reg); - mode = HARD_REGNO_CALLER_SAVE_MODE (hard_regno, hard_regno_nregs[hard_regno][mode], mode); @@ -5004,8 +5013,7 @@ split_reg (bool before_p, int original_regno, rtx_insn *insn, } else { - rclass = choose_split_class (rclass, hard_regno, - GET_MODE (original_reg)); + rclass = choose_split_class (rclass, hard_regno, mode); if (rclass == NO_REGS) { if (lra_dump_file != NULL) @@ -5023,8 +5031,7 @@ split_reg (bool before_p, int original_regno, rtx_insn *insn, } return false; } - new_reg = lra_create_new_reg (GET_MODE (original_reg), original_reg, - rclass, "split"); + new_reg = lra_create_new_reg (mode, original_reg, rclass, "split"); reg_renumber[REGNO (new_reg)] = hard_regno; } save = emit_spill_move (true, new_reg, original_reg); diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c index 67dda47df2a..8811198cfd3 100644 --- a/gcc/lra-lives.c +++ b/gcc/lra-lives.c @@ -700,12 +700,13 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) /* Update max ref width and hard reg usage. */ for (reg = curr_id->regs; reg != NULL; reg = reg->next) - if (reg->regno >= FIRST_PSEUDO_REGISTER - && (GET_MODE_SIZE (reg->biggest_mode) - > GET_MODE_SIZE (lra_reg_info[reg->regno].biggest_mode))) - lra_reg_info[reg->regno].biggest_mode = reg->biggest_mode; - else if (reg->regno < FIRST_PSEUDO_REGISTER) - lra_hard_reg_usage[reg->regno] += freq; + { + if (GET_MODE_SIZE (reg->biggest_mode) + > GET_MODE_SIZE (lra_reg_info[reg->regno].biggest_mode)) + lra_reg_info[reg->regno].biggest_mode = reg->biggest_mode; + if (reg->regno < FIRST_PSEUDO_REGISTER) + lra_hard_reg_usage[reg->regno] += freq; + } call_p = CALL_P (curr_insn); src_regno = (set != NULL_RTX && REG_P (SET_SRC (set)) @@ -1208,7 +1209,7 @@ lra_create_live_ranges_1 (bool all_p, bool dead_insn_p) conservative because of recent transformation. Here in this file we recalculate it again as it costs practically nothing. */ - if (regno_reg_rtx[i] != NULL_RTX) + if (i >= FIRST_PSEUDO_REGISTER && regno_reg_rtx[i] != NULL_RTX) lra_reg_info[i].biggest_mode = GET_MODE (regno_reg_rtx[i]); else lra_reg_info[i].biggest_mode = VOIDmode; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 54e063eb3c3..b2fdd0b2383 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-03-14 Bernd Schmidt + + PR target/70083 + * gcc.dg/torture/pr70083.c: New test. + * gcc.target/i386/pr70083.c: New test. + 2016-03-14 Richard Biener PR tree-optimization/56365 diff --git a/gcc/testsuite/gcc.dg/torture/pr70083.c b/gcc/testsuite/gcc.dg/torture/pr70083.c new file mode 100644 index 00000000000..7f047a5b044 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr70083.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-Wno-psabi" } */ + +typedef short v16hi __attribute__ ((vector_size (32))); +typedef int v8si __attribute__ ((vector_size (32))); +typedef long long v4di __attribute__ ((vector_size (32))); + +int +foo(int u32_0, int u64_0, int u64_1, v16hi v32u16_0, v8si v32u32_0, v4di v32u64_0, v16hi v32u16_1, v8si v32u32_1, v4di v32u64_1) +{ + v32u32_1 %= (v8si) v32u16_1 | 1; + v32u64_1[1] |= ((1)); + v32u16_0 /= (v16hi){~u64_1, 1, 0xb56c, 0xd279, 0x26b6, 0x74d9, 0xf764, 0, 0, -v32u16_1[6]} | 1; + v32u16_1 ^= (v16hi){0xc98d, 1, 0x8c71, u32_0, 0x5366, 0, ~v32u64_1[1]} & 31; + v32u32_0 -= (v8si)~v32u64_1; + v32u32_1[2] |= 0x1f; + v32u16_0 %= (v16hi){2, 0xffff, u32_0, 1, v32u64_0[1], u32_0 }; + v32u32_1 /= (v8si){0x1e7390, v32u16_0[12], ~v32u16_1[2], -u64_0}; + return v32u16_0[4] + v32u16_0[5] + v32u32_0[5] + v32u32_1[6] + v32u64_1[3]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr70083.c b/gcc/testsuite/gcc.target/i386/pr70083.c new file mode 100644 index 00000000000..cfce62047b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr70083.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-Wno-psabi -O2 -fno-dce -fschedule-insns -fno-sched-critical-path-heuristic -mavx512dq --param=max-cse-insns=1" } */ + +typedef short v16hi __attribute__ ((vector_size (32))); +typedef int v8si __attribute__ ((vector_size (32))); +typedef long long v4di __attribute__ ((vector_size (32))); + +int +foo(int u32_0, int u64_0, int u64_1, v16hi v32u16_0, v8si v32u32_0, v4di v32u64_0, v16hi v32u16_1, v8si v32u32_1, v4di v32u64_1) +{ + v32u32_1 %= (v8si) v32u16_1 | 1; + v32u64_1[1] |= ((1)); + v32u16_0 /= (v16hi){~u64_1, 1, 0xb56c, 0xd279, 0x26b6, 0x74d9, 0xf764, 0, 0, -v32u16_1[6]} | 1; + v32u16_1 ^= (v16hi){0xc98d, 1, 0x8c71, u32_0, 0x5366, 0, ~v32u64_1[1]} & 31; + v32u32_0 -= (v8si)~v32u64_1; + v32u32_1[2] |= 0x1f; + v32u16_0 %= (v16hi){2, 0xffff, u32_0, 1, v32u64_0[1], u32_0 }; + v32u32_1 /= (v8si){0x1e7390, v32u16_0[12], ~v32u16_1[2], -u64_0}; + return v32u16_0[4] + v32u16_0[5] + v32u32_0[5] + v32u32_1[6] + v32u64_1[3]; +}