[nvptx] Fix bug in jit bug workaround

2018-01-19  Tom de Vries  <tom@codesourcery.com>
	    Cesar Philippidis  <cesar@codesourcery.com>

	PR target/83920

	* config/nvptx/nvptx.c (nvptx_single): Fix jit workaround.

	* testsuite/libgomp.oacc-c-c++-common/pr83920.c: New test.
	* testsuite/libgomp.oacc-fortran/pr83920.f90: New test.

Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>

From-SVN: r256894
This commit is contained in:
Tom de Vries 2018-01-19 16:29:41 +00:00 committed by Tom de Vries
parent 6c7c47081a
commit 8c8e9a6bb6
5 changed files with 99 additions and 2 deletions

View File

@ -1,3 +1,9 @@
2018-01-19 Tom de Vries <tom@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
PR target/83920
* config/nvptx/nvptx.c (nvptx_single): Fix jit workaround.
2018-01-19 Cesar Philippidis <cesar@codesourcery.com>
PR target/83790

View File

@ -4102,9 +4102,33 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
There is nothing in the PTX spec to suggest that this is wrong, or
to explain why the extra initialization is needed. So, we classify
it as a JIT bug, and the extra initialization as workaround. */
emit_insn_before (gen_movbi (pvar, const0_rtx),
it as a JIT bug, and the extra initialization as workaround:
{
.reg .u32 %x;
mov.u32 %x,%tid.x;
setp.ne.u32 %rnotvzero,%x,0;
}
+.reg .pred %rcond2;
+setp.eq.u32 %rcond2, 1, 0;
@%rnotvzero bra Lskip;
setp.<op>.<type> %rcond,op1,op2;
+mov.pred %rcond2, %rcond;
Lskip:
+mov.pred %rcond, %rcond2;
selp.u32 %rcondu32,1,0,%rcond;
shfl.idx.b32 %rcondu32,%rcondu32,0,31;
setp.ne.u32 %rcond,%rcondu32,0;
*/
rtx_insn *label = PREV_INSN (tail);
gcc_assert (label && LABEL_P (label));
rtx tmp = gen_reg_rtx (BImode);
emit_insn_before (gen_movbi (tmp, const0_rtx),
bb_first_real_insn (from));
emit_insn_before (gen_rtx_SET (tmp, pvar), label);
emit_insn_before (gen_rtx_SET (pvar, tmp), tail);
#endif
emit_insn_before (nvptx_gen_vcast (pvar), tail);
}

View File

@ -1,3 +1,10 @@
2018-01-19 Tom de Vries <tom@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
PR target/83920
* testsuite/libgomp.oacc-c-c++-common/pr83920.c: New test.
* testsuite/libgomp.oacc-fortran/pr83920.f90: New test.
2018-01-03 Jakub Jelinek <jakub@redhat.com>
Update copyright years.

View File

@ -0,0 +1,32 @@
/* { dg-do run } */
#include <stdlib.h>
#define n 10
static void __attribute__((noinline)) __attribute__((noclone))
foo (int beta, int *c)
{
#pragma acc parallel copy(c[0:(n * n) - 1]) num_gangs(2)
#pragma acc loop gang
for (int j = 0; j < n; ++j)
if (beta != 1)
{
#pragma acc loop vector
for (int i = 0; i < n; ++i)
c[i + (j * n)] = 0;
}
}
int
main (void)
{
int c[n * n];
c[0] = 1;
foo (0, c);
if (c[0] != 0)
abort ();
return 0;
}

View File

@ -0,0 +1,28 @@
! { dg-do run }
subroutine foo (BETA, C)
real :: C(100,100)
integer :: i, j, l
real, parameter :: one = 1.0
real :: beta
!$acc parallel copy(c(1:100,1:100)) num_gangs(2)
!$acc loop gang
do j = 1, 100
if (beta /= one) then
!$acc loop vector
do i = 1, 100
C(i,j) = 0.0
end do
end if
end do
!$acc end parallel
end subroutine foo
program test_foo
real :: c(100,100), beta
beta = 0.0
c(:,:) = 1.0
call foo (beta, c)
if (c(1,1) /= 0.0) call abort ()
end program test_foo