[nvptx] Commit passing pr85381-*.c test-cases

Add pr85381*.c test-cases that are already passing without the fix for PR85381.

Build and reg-tested on x86_64 with nvptx accelerator.

2018-12-19  Tom de Vries  <tdevries@suse.de>

	* testsuite/libgomp.oacc-c-c++-common/pr85381-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/pr85381-3.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/pr85381-4.c: New test.

From-SVN: r267268
This commit is contained in:
Tom de Vries 2018-12-19 14:20:54 +00:00 committed by Tom de Vries
parent 49188cd1f2
commit a152954ea4
4 changed files with 104 additions and 0 deletions

View File

@ -1,3 +1,9 @@
2018-12-19 Tom de Vries <tdevries@suse.de>
* testsuite/libgomp.oacc-c-c++-common/pr85381-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr85381-3.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr85381-4.c: New test.
2018-12-19 Tom de Vries <tdevries@suse.de>
* testsuite/lib/libgomp.exp: Add load_lib of scanoffloadrtl.exp.

View File

@ -0,0 +1,36 @@
/* { dg-additional-options "-save-temps" } */
/* { dg-do run { target openacc_nvidia_accel_selected } }
{ dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
int
main (void)
{
int v1;
#pragma acc parallel
#pragma acc loop worker
for (v1 = 0; v1 < 20; v1 += 2)
;
return 0;
}
/* Todo: Boths bar.syncs can be removed.
Atm we generate this dead code inbetween forked and joining:
mov.u32 %r28, %ntid.y;
mov.u32 %r29, %tid.y;
add.u32 %r30, %r29, %r29;
setp.gt.s32 %r31, %r30, 19;
@%r31 bra $L2;
add.u32 %r25, %r28, %r28;
mov.u32 %r24, %r30;
$L3:
add.u32 %r24, %r24, %r25;
setp.le.s32 %r33, %r24, 19;
@%r33 bra $L3;
$L2:
so the loop is not recognized as empty loop (which we detect by seeing if
joining immediately follows forked). */
/* { dg-final { scan-assembler-times "bar.sync" 2 } } */

View File

@ -0,0 +1,35 @@
/* { dg-additional-options "-save-temps -w" } */
/* { dg-do run { target openacc_nvidia_accel_selected } }
{ dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
int a;
#pragma acc declare create(a)
#pragma acc routine vector
void __attribute__((noinline, noclone))
foo_v (void)
{
a = 1;
}
#pragma acc routine worker
void __attribute__((noinline, noclone))
foo_w (void)
{
a = 2;
}
int
main (void)
{
#pragma acc parallel
foo_v ();
#pragma acc parallel
foo_w ();
return 0;
}
/* { dg-final { scan-assembler-not "bar.sync" } } */

View File

@ -0,0 +1,27 @@
/* { dg-additional-options "-save-temps -w" } */
/* { dg-do run { target openacc_nvidia_accel_selected } }
{ dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
#define n 1024
int
main (void)
{
#pragma acc parallel
{
#pragma acc loop worker
for (int i = 0; i < n; i++)
;
#pragma acc loop worker
for (int i = 0; i < n; i++)
;
}
return 0;
}
/* Atm, %ntid.y is broadcast from one loop to the next, so there are 2 bar.syncs
for that (the other two are there for the same reason as in pr85381-2.c).
Todo: Recompute %ntid.y instead of broadcasting it. */
/* { dg-final { scan-assembler-times "bar.sync" 4 } } */