Repeat processing all basic blocks for vzeroupper optimization.

gcc/

2010-12-30  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/46519
	* config/i386/i386.c (block_info_def): Remove referenced, count
	and rescanned.
	(move_or_delete_vzeroupper_2): Updated.
	(move_or_delete_vzeroupper_1): Rewritten to avoid recursive call.
	(rescan_move_or_delete_vzeroupper): Removed.
	(move_or_delete_vzeroupper): Repeat processing all basic blocks
	until no basic block state is changed to used at exit.

gcc/testsuite/

2010-12-30  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/46519
	* gfortran.dg/pr46519-2.f90: New.

From-SVN: r168342
This commit is contained in:
H.J. Lu 2010-12-30 13:12:02 +00:00 committed by H.J. Lu
parent ecdee6655c
commit 310a21aa31
4 changed files with 89 additions and 103 deletions

View File

@ -1,3 +1,14 @@
2010-12-30 H.J. Lu <hongjiu.lu@intel.com>
PR target/46519
* config/i386/i386.c (block_info_def): Remove referenced, count
and rescanned.
(move_or_delete_vzeroupper_2): Updated.
(move_or_delete_vzeroupper_1): Rewritten to avoid recursive call.
(rescan_move_or_delete_vzeroupper): Removed.
(move_or_delete_vzeroupper): Repeat processing all basic blocks
until no basic block state is changed to used at exit.
2010-12-30 Paul Koning <ni1d@arrl.net>
* config/pdp11/pdp11.md (movmemhi, movmemhi1): Correct

View File

@ -68,14 +68,8 @@ typedef struct block_info_def
{
/* State of the upper 128bits of any AVX registers at exit. */
enum upper_128bits_state state;
/* If the upper 128bits of any AVX registers are referenced. */
enum upper_128bits_state referenced;
/* Number of vzerouppers in this block. */
unsigned int count;
/* TRUE if block has been processed. */
bool processed;
/* TRUE if block has been rescanned. */
bool rescanned;
} *block_info;
#define BLOCK_INFO(B) ((block_info) (B)->aux)
@ -127,8 +121,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
rtx vzeroupper_insn = NULL_RTX;
rtx pat;
int avx256;
enum upper_128bits_state referenced = BLOCK_INFO (bb)->referenced;
int count = BLOCK_INFO (bb)->count;
if (dump_file)
fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
@ -191,24 +183,20 @@ move_or_delete_vzeroupper_2 (basic_block bb,
/* Delete pending vzeroupper insertion. */
if (vzeroupper_insn)
{
count--;
delete_insn (vzeroupper_insn);
vzeroupper_insn = NULL_RTX;
}
}
else if (state != used && referenced != unused)
else if (state != used)
{
/* No need to call note_stores if the upper 128bits of
AVX registers are never referenced. */
note_stores (pat, check_avx256_stores, &state);
if (state == used)
referenced = used;
}
continue;
}
/* Process vzeroupper intrinsic. */
count++;
avx256 = INTVAL (XVECEXP (pat, 0, 0));
if (state == unused)
@ -226,7 +214,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
fprintf (dump_file, "Delete redundant vzeroupper:\n");
print_rtl_single (dump_file, insn);
}
count--;
delete_insn (insn);
}
else
@ -246,7 +233,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
fprintf (dump_file, "Delete callee pass vzeroupper:\n");
print_rtl_single (dump_file, insn);
}
count--;
delete_insn (insn);
}
else
@ -256,30 +242,22 @@ move_or_delete_vzeroupper_2 (basic_block bb,
BLOCK_INFO (bb)->state = state;
if (BLOCK_INFO (bb)->referenced == unknown)
{
/* The upper 128bits of AVX registers are never referenced if
REFERENCED isn't updated. */
if (referenced == unknown)
referenced = unused;
BLOCK_INFO (bb)->referenced = referenced;
BLOCK_INFO (bb)->count = count;
}
if (dump_file)
fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
bb->index, state);
}
/* Helper function for move_or_delete_vzeroupper. Process vzeroupper
in BLOCK and its predecessor blocks recursively. */
in BLOCK and check its predecessor blocks. Treat UNKNOWN state
as USED if UNKNOWN_IS_UNUSED is true. */
static void
move_or_delete_vzeroupper_1 (basic_block block)
move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
{
edge e;
edge_iterator ei;
enum upper_128bits_state state;
enum upper_128bits_state state, old_state, new_state;
bool seen_unknown;
if (dump_file)
fprintf (dump_file, " Process [bb %i]: status: %d\n",
@ -288,83 +266,42 @@ move_or_delete_vzeroupper_1 (basic_block block)
if (BLOCK_INFO (block)->processed)
return;
BLOCK_INFO (block)->processed = true;
state = unused;
state = unknown;
/* Process all predecessor edges of this block. */
/* Check all predecessor edges of this block. */
seen_unknown = false;
FOR_EACH_EDGE (e, ei, block->preds)
{
if (e->src == block)
continue;
move_or_delete_vzeroupper_1 (e->src);
switch (BLOCK_INFO (e->src)->state)
{
case unknown:
if (state == unused)
state = unknown;
if (!unknown_is_unused)
seen_unknown = true;
case unused:
break;
case used:
state = used;
break;
case unused:
break;
goto done;
}
}
/* If state of any predecessor edges is unknown, we need to rescan. */
if (state == unknown)
cfun->machine->rescan_vzeroupper_p = 1;
if (seen_unknown)
state = unknown;
/* Process this block. */
done:
old_state = BLOCK_INFO (block)->state;
move_or_delete_vzeroupper_2 (block, state);
}
new_state = BLOCK_INFO (block)->state;
/* Helper function for move_or_delete_vzeroupper. Rescan vzeroupper
in BLOCK and its predecessor blocks recursively. */
if (state != unknown || new_state == used)
BLOCK_INFO (block)->processed = true;
static void
rescan_move_or_delete_vzeroupper (basic_block block)
{
edge e;
edge_iterator ei;
enum upper_128bits_state state;
if (dump_file)
fprintf (dump_file, " Rescan [bb %i]: status: %d\n",
block->index, BLOCK_INFO (block)->rescanned);
if (BLOCK_INFO (block)->rescanned)
return;
BLOCK_INFO (block)->rescanned = true;
state = unused;
/* Rescan all predecessor edges of this block. */
FOR_EACH_EDGE (e, ei, block->preds)
{
if (e->src == block)
continue;
rescan_move_or_delete_vzeroupper (e->src);
/* For rescan, UKKNOWN state is treated as UNUSED. */
if (BLOCK_INFO (e->src)->state == used)
state = used;
}
/* Rescan this block only if there are vzerouppers or the upper
128bits of AVX registers are referenced. */
if (BLOCK_INFO (block)->count == 0
&& (state == used || BLOCK_INFO (block)->referenced != used))
{
if (state == used)
BLOCK_INFO (block)->state = state;
if (dump_file)
fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
block->index, BLOCK_INFO (block)->state);
}
else
move_or_delete_vzeroupper_2 (block, state);
/* Need to rescan if the upper 128bits of AVX registers are changed
to USED at exit. */
if (new_state != old_state && new_state == used)
cfun->machine->rescan_vzeroupper_p = 1;
}
/* Go through the instruction stream looking for vzeroupper. Delete
@ -377,7 +314,7 @@ move_or_delete_vzeroupper (void)
edge e;
edge_iterator ei;
basic_block bb;
unsigned int count = 0;
unsigned int count;
/* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (struct block_info_def));
@ -392,28 +329,30 @@ move_or_delete_vzeroupper (void)
cfun->machine->caller_pass_avx256_p
? used : unused);
BLOCK_INFO (e->dest)->processed = true;
BLOCK_INFO (e->dest)->rescanned = true;
}
/* Process all basic blocks. */
count = 0;
do
{
if (dump_file)
fprintf (dump_file, "Process all basic blocks: trip %d\n",
count);
cfun->machine->rescan_vzeroupper_p = 0;
FOR_EACH_BB (bb)
move_or_delete_vzeroupper_1 (bb, false);
}
while (cfun->machine->rescan_vzeroupper_p && count++ < 20);
/* FIXME: Is 20 big enough? */
if (count >= 20)
gcc_unreachable ();
if (dump_file)
fprintf (dump_file, "Process all basic blocks\n");
FOR_EACH_BB (bb)
{
move_or_delete_vzeroupper_1 (bb);
count += BLOCK_INFO (bb)->count;
}
/* Rescan all basic blocks if needed. */
if (count && cfun->machine->rescan_vzeroupper_p)
{
if (dump_file)
fprintf (dump_file, "Rescan all basic blocks\n");
FOR_EACH_BB (bb)
rescan_move_or_delete_vzeroupper (bb);
}
move_or_delete_vzeroupper_1 (bb, true);
free_aux_for_blocks ();
}

View File

@ -1,3 +1,8 @@
2010-12-30 H.J. Lu <hongjiu.lu@intel.com>
PR target/46519
* gfortran.dg/pr46519-2.f90: New.
2010-12-30 Janus Weil <janus@gcc.gnu.org>
PR fortran/47085

View File

@ -0,0 +1,31 @@
! { dg-do compile { target i?86-*-* x86_64-*-* } }
! { dg-options "-O3 -mavx -mvzeroupper -mtune=generic -dp" }
SUBROUTINE func(kts, kte, qrz, qiz, rho)
IMPLICIT NONE
INTEGER, INTENT(IN) :: kts, kte
REAL, DIMENSION(kts:kte), INTENT(INOUT) :: qrz, qiz, rho
INTEGER :: k
REAL, DIMENSION(kts:kte) :: praci, vtiold
REAL :: fluxout
INTEGER :: min_q, max_q, var
do k=kts,kte
praci(k)=0.0
enddo
min_q=kte
max_q=kts-1
DO var=1,20
do k=max_q,min_q,-1
fluxout=rho(k)*qrz(k)
enddo
qrz(min_q-1)=qrz(min_q-1)+fluxout
ENDDO
DO var=1,20
do k=kts,kte-1
vtiold(k)= (rho(k))**0.16
enddo
ENDDO
STOP
END SUBROUTINE func
! { dg-final { scan-assembler "avx_vzeroupper" } }