Repeat processing all basic blocks for vzeroupper optimization.
gcc/ 2010-12-30 H.J. Lu <hongjiu.lu@intel.com> PR target/46519 * config/i386/i386.c (block_info_def): Remove referenced, count and rescanned. (move_or_delete_vzeroupper_2): Updated. (move_or_delete_vzeroupper_1): Rewritten to avoid recursive call. (rescan_move_or_delete_vzeroupper): Removed. (move_or_delete_vzeroupper): Repeat processing all basic blocks until no basic block state is changed to used at exit. gcc/testsuite/ 2010-12-30 H.J. Lu <hongjiu.lu@intel.com> PR target/46519 * gfortran.dg/pr46519-2.f90: New. From-SVN: r168342
This commit is contained in:
parent
ecdee6655c
commit
310a21aa31
@ -1,3 +1,14 @@
|
||||
2010-12-30 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/46519
|
||||
* config/i386/i386.c (block_info_def): Remove referenced, count
|
||||
and rescanned.
|
||||
(move_or_delete_vzeroupper_2): Updated.
|
||||
(move_or_delete_vzeroupper_1): Rewritten to avoid recursive call.
|
||||
(rescan_move_or_delete_vzeroupper): Removed.
|
||||
(move_or_delete_vzeroupper): Repeat processing all basic blocks
|
||||
until no basic block state is changed to used at exit.
|
||||
|
||||
2010-12-30 Paul Koning <ni1d@arrl.net>
|
||||
|
||||
* config/pdp11/pdp11.md (movmemhi, movmemhi1): Correct
|
||||
|
@ -68,14 +68,8 @@ typedef struct block_info_def
|
||||
{
|
||||
/* State of the upper 128bits of any AVX registers at exit. */
|
||||
enum upper_128bits_state state;
|
||||
/* If the upper 128bits of any AVX registers are referenced. */
|
||||
enum upper_128bits_state referenced;
|
||||
/* Number of vzerouppers in this block. */
|
||||
unsigned int count;
|
||||
/* TRUE if block has been processed. */
|
||||
bool processed;
|
||||
/* TRUE if block has been rescanned. */
|
||||
bool rescanned;
|
||||
} *block_info;
|
||||
|
||||
#define BLOCK_INFO(B) ((block_info) (B)->aux)
|
||||
@ -127,8 +121,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
|
||||
rtx vzeroupper_insn = NULL_RTX;
|
||||
rtx pat;
|
||||
int avx256;
|
||||
enum upper_128bits_state referenced = BLOCK_INFO (bb)->referenced;
|
||||
int count = BLOCK_INFO (bb)->count;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
|
||||
@ -191,24 +183,20 @@ move_or_delete_vzeroupper_2 (basic_block bb,
|
||||
/* Delete pending vzeroupper insertion. */
|
||||
if (vzeroupper_insn)
|
||||
{
|
||||
count--;
|
||||
delete_insn (vzeroupper_insn);
|
||||
vzeroupper_insn = NULL_RTX;
|
||||
}
|
||||
}
|
||||
else if (state != used && referenced != unused)
|
||||
else if (state != used)
|
||||
{
|
||||
/* No need to call note_stores if the upper 128bits of
|
||||
AVX registers are never referenced. */
|
||||
note_stores (pat, check_avx256_stores, &state);
|
||||
if (state == used)
|
||||
referenced = used;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Process vzeroupper intrinsic. */
|
||||
count++;
|
||||
avx256 = INTVAL (XVECEXP (pat, 0, 0));
|
||||
|
||||
if (state == unused)
|
||||
@ -226,7 +214,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
|
||||
fprintf (dump_file, "Delete redundant vzeroupper:\n");
|
||||
print_rtl_single (dump_file, insn);
|
||||
}
|
||||
count--;
|
||||
delete_insn (insn);
|
||||
}
|
||||
else
|
||||
@ -246,7 +233,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
|
||||
fprintf (dump_file, "Delete callee pass vzeroupper:\n");
|
||||
print_rtl_single (dump_file, insn);
|
||||
}
|
||||
count--;
|
||||
delete_insn (insn);
|
||||
}
|
||||
else
|
||||
@ -256,30 +242,22 @@ move_or_delete_vzeroupper_2 (basic_block bb,
|
||||
|
||||
BLOCK_INFO (bb)->state = state;
|
||||
|
||||
if (BLOCK_INFO (bb)->referenced == unknown)
|
||||
{
|
||||
/* The upper 128bits of AVX registers are never referenced if
|
||||
REFERENCED isn't updated. */
|
||||
if (referenced == unknown)
|
||||
referenced = unused;
|
||||
BLOCK_INFO (bb)->referenced = referenced;
|
||||
BLOCK_INFO (bb)->count = count;
|
||||
}
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
|
||||
bb->index, state);
|
||||
}
|
||||
|
||||
/* Helper function for move_or_delete_vzeroupper. Process vzeroupper
|
||||
in BLOCK and its predecessor blocks recursively. */
|
||||
in BLOCK and check its predecessor blocks. Treat UNKNOWN state
|
||||
as USED if UNKNOWN_IS_UNUSED is true. */
|
||||
|
||||
static void
|
||||
move_or_delete_vzeroupper_1 (basic_block block)
|
||||
move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
|
||||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
enum upper_128bits_state state;
|
||||
enum upper_128bits_state state, old_state, new_state;
|
||||
bool seen_unknown;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Process [bb %i]: status: %d\n",
|
||||
@ -288,83 +266,42 @@ move_or_delete_vzeroupper_1 (basic_block block)
|
||||
if (BLOCK_INFO (block)->processed)
|
||||
return;
|
||||
|
||||
BLOCK_INFO (block)->processed = true;
|
||||
state = unused;
|
||||
|
||||
state = unknown;
|
||||
|
||||
/* Process all predecessor edges of this block. */
|
||||
/* Check all predecessor edges of this block. */
|
||||
seen_unknown = false;
|
||||
FOR_EACH_EDGE (e, ei, block->preds)
|
||||
{
|
||||
if (e->src == block)
|
||||
continue;
|
||||
move_or_delete_vzeroupper_1 (e->src);
|
||||
switch (BLOCK_INFO (e->src)->state)
|
||||
{
|
||||
case unknown:
|
||||
if (state == unused)
|
||||
state = unknown;
|
||||
if (!unknown_is_unused)
|
||||
seen_unknown = true;
|
||||
case unused:
|
||||
break;
|
||||
case used:
|
||||
state = used;
|
||||
break;
|
||||
case unused:
|
||||
break;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
/* If state of any predecessor edges is unknown, we need to rescan. */
|
||||
if (state == unknown)
|
||||
cfun->machine->rescan_vzeroupper_p = 1;
|
||||
if (seen_unknown)
|
||||
state = unknown;
|
||||
|
||||
/* Process this block. */
|
||||
done:
|
||||
old_state = BLOCK_INFO (block)->state;
|
||||
move_or_delete_vzeroupper_2 (block, state);
|
||||
}
|
||||
new_state = BLOCK_INFO (block)->state;
|
||||
|
||||
/* Helper function for move_or_delete_vzeroupper. Rescan vzeroupper
|
||||
in BLOCK and its predecessor blocks recursively. */
|
||||
if (state != unknown || new_state == used)
|
||||
BLOCK_INFO (block)->processed = true;
|
||||
|
||||
static void
|
||||
rescan_move_or_delete_vzeroupper (basic_block block)
|
||||
{
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
enum upper_128bits_state state;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Rescan [bb %i]: status: %d\n",
|
||||
block->index, BLOCK_INFO (block)->rescanned);
|
||||
|
||||
if (BLOCK_INFO (block)->rescanned)
|
||||
return;
|
||||
|
||||
BLOCK_INFO (block)->rescanned = true;
|
||||
|
||||
state = unused;
|
||||
|
||||
/* Rescan all predecessor edges of this block. */
|
||||
FOR_EACH_EDGE (e, ei, block->preds)
|
||||
{
|
||||
if (e->src == block)
|
||||
continue;
|
||||
rescan_move_or_delete_vzeroupper (e->src);
|
||||
/* For rescan, UKKNOWN state is treated as UNUSED. */
|
||||
if (BLOCK_INFO (e->src)->state == used)
|
||||
state = used;
|
||||
}
|
||||
|
||||
/* Rescan this block only if there are vzerouppers or the upper
|
||||
128bits of AVX registers are referenced. */
|
||||
if (BLOCK_INFO (block)->count == 0
|
||||
&& (state == used || BLOCK_INFO (block)->referenced != used))
|
||||
{
|
||||
if (state == used)
|
||||
BLOCK_INFO (block)->state = state;
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
|
||||
block->index, BLOCK_INFO (block)->state);
|
||||
}
|
||||
else
|
||||
move_or_delete_vzeroupper_2 (block, state);
|
||||
/* Need to rescan if the upper 128bits of AVX registers are changed
|
||||
to USED at exit. */
|
||||
if (new_state != old_state && new_state == used)
|
||||
cfun->machine->rescan_vzeroupper_p = 1;
|
||||
}
|
||||
|
||||
/* Go through the instruction stream looking for vzeroupper. Delete
|
||||
@ -377,7 +314,7 @@ move_or_delete_vzeroupper (void)
|
||||
edge e;
|
||||
edge_iterator ei;
|
||||
basic_block bb;
|
||||
unsigned int count = 0;
|
||||
unsigned int count;
|
||||
|
||||
/* Set up block info for each basic block. */
|
||||
alloc_aux_for_blocks (sizeof (struct block_info_def));
|
||||
@ -392,28 +329,30 @@ move_or_delete_vzeroupper (void)
|
||||
cfun->machine->caller_pass_avx256_p
|
||||
? used : unused);
|
||||
BLOCK_INFO (e->dest)->processed = true;
|
||||
BLOCK_INFO (e->dest)->rescanned = true;
|
||||
}
|
||||
|
||||
/* Process all basic blocks. */
|
||||
count = 0;
|
||||
do
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Process all basic blocks: trip %d\n",
|
||||
count);
|
||||
cfun->machine->rescan_vzeroupper_p = 0;
|
||||
FOR_EACH_BB (bb)
|
||||
move_or_delete_vzeroupper_1 (bb, false);
|
||||
}
|
||||
while (cfun->machine->rescan_vzeroupper_p && count++ < 20);
|
||||
|
||||
/* FIXME: Is 20 big enough? */
|
||||
if (count >= 20)
|
||||
gcc_unreachable ();
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Process all basic blocks\n");
|
||||
|
||||
FOR_EACH_BB (bb)
|
||||
{
|
||||
move_or_delete_vzeroupper_1 (bb);
|
||||
count += BLOCK_INFO (bb)->count;
|
||||
}
|
||||
|
||||
/* Rescan all basic blocks if needed. */
|
||||
if (count && cfun->machine->rescan_vzeroupper_p)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Rescan all basic blocks\n");
|
||||
|
||||
FOR_EACH_BB (bb)
|
||||
rescan_move_or_delete_vzeroupper (bb);
|
||||
}
|
||||
move_or_delete_vzeroupper_1 (bb, true);
|
||||
|
||||
free_aux_for_blocks ();
|
||||
}
|
||||
|
@ -1,3 +1,8 @@
|
||||
2010-12-30 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/46519
|
||||
* gfortran.dg/pr46519-2.f90: New.
|
||||
|
||||
2010-12-30 Janus Weil <janus@gcc.gnu.org>
|
||||
|
||||
PR fortran/47085
|
||||
|
31
gcc/testsuite/gfortran.dg/pr46519-2.f90
Normal file
31
gcc/testsuite/gfortran.dg/pr46519-2.f90
Normal file
@ -0,0 +1,31 @@
|
||||
! { dg-do compile { target i?86-*-* x86_64-*-* } }
|
||||
! { dg-options "-O3 -mavx -mvzeroupper -mtune=generic -dp" }
|
||||
|
||||
SUBROUTINE func(kts, kte, qrz, qiz, rho)
|
||||
IMPLICIT NONE
|
||||
INTEGER, INTENT(IN) :: kts, kte
|
||||
REAL, DIMENSION(kts:kte), INTENT(INOUT) :: qrz, qiz, rho
|
||||
INTEGER :: k
|
||||
REAL, DIMENSION(kts:kte) :: praci, vtiold
|
||||
REAL :: fluxout
|
||||
INTEGER :: min_q, max_q, var
|
||||
do k=kts,kte
|
||||
praci(k)=0.0
|
||||
enddo
|
||||
min_q=kte
|
||||
max_q=kts-1
|
||||
DO var=1,20
|
||||
do k=max_q,min_q,-1
|
||||
fluxout=rho(k)*qrz(k)
|
||||
enddo
|
||||
qrz(min_q-1)=qrz(min_q-1)+fluxout
|
||||
ENDDO
|
||||
DO var=1,20
|
||||
do k=kts,kte-1
|
||||
vtiold(k)= (rho(k))**0.16
|
||||
enddo
|
||||
ENDDO
|
||||
STOP
|
||||
END SUBROUTINE func
|
||||
|
||||
! { dg-final { scan-assembler "avx_vzeroupper" } }
|
Loading…
x
Reference in New Issue
Block a user