[nvptx] Don't emit barriers for empty loops -- fix
When compiling an empty loop: ... long long v1; #pragma acc parallel num_gangs (640) num_workers(1) vector_length (128) #pragma acc loop for (v1 = 0; v1 < 20; v1 += 2) ; ... the compiler emits two subsequent bar.syncs. This triggers some bug on my quadro m1200 (I'm assuming in the ptxas/JIT compiler) that hangs the testcase. This patch works around the bug by doing an optimization: we detect that this is an empty loop (a forked immediately followed by a joining), and don't emit the barriers. The patch does not include the test-case yet, since vector_length (128) is not yet supported at this point. 2019-01-07 Tom de Vries <tdevries@suse.de> PR target/85381 * config/nvptx/nvptx.c (nvptx_process_pars): Don't emit barriers for empty loops. From-SVN: r267630
This commit is contained in:
parent
0024c32045
commit
d495b5ccd2
|
@ -1,3 +1,9 @@
|
|||
2019-01-07 Tom de Vries <tdevries@suse.de>
|
||||
|
||||
PR target/85381
|
||||
* config/nvptx/nvptx.c (nvptx_process_pars): Don't emit barriers for
|
||||
empty loops.
|
||||
|
||||
2019-01-07 Tom de Vries <tdevries@suse.de>
|
||||
|
||||
* config/nvptx/nvptx.c (oacc_bcast_partition): Declare.
|
||||
|
|
|
@ -4636,9 +4636,12 @@ nvptx_process_pars (parallel *par)
|
|||
{
|
||||
nvptx_shared_propagate (false, is_call, par->forked_block,
|
||||
par->forked_insn, !worker);
|
||||
bool empty = nvptx_shared_propagate (true, is_call,
|
||||
par->forked_block, par->fork_insn,
|
||||
!worker);
|
||||
bool no_prop_p
|
||||
= nvptx_shared_propagate (true, is_call, par->forked_block,
|
||||
par->fork_insn, !worker);
|
||||
bool empty_loop_p
|
||||
= !is_call && (NEXT_INSN (par->forked_insn)
|
||||
&& NEXT_INSN (par->forked_insn) == par->joining_insn);
|
||||
rtx barrier = GEN_INT (0);
|
||||
int threads = 0;
|
||||
|
||||
|
@ -4648,7 +4651,11 @@ nvptx_process_pars (parallel *par)
|
|||
threads = nvptx_mach_vector_length ();
|
||||
}
|
||||
|
||||
if (!empty || !is_call)
|
||||
if (no_prop_p && empty_loop_p)
|
||||
;
|
||||
else if (no_prop_p && is_call)
|
||||
;
|
||||
else
|
||||
{
|
||||
/* Insert begin and end synchronizations. */
|
||||
emit_insn_before (nvptx_cta_sync (barrier, threads),
|
||||
|
|
Loading…
Reference in New Issue