3717fbe35e
2017-07-19 Tom de Vries <tom@codesourcery.com> * config/nvptx/nvptx-modes.def: Add V2DImode. * config/nvptx/nvptx-protos.h (nvptx_data_alignment): Declare. * config/nvptx/nvptx.c (nvptx_ptx_type_from_mode): Handle V2DImode. (nvptx_output_mov_insn): Handle lack of mov.b128. (nvptx_print_operand): Handle 'H' and 'L' codes. (nvptx_vector_mode_supported): Allow V2DImode. (nvptx_preferred_simd_mode): New function. (nvptx_data_alignment): New function. (TARGET_VECTORIZE_PREFERRED_SIMD_MODE): Redefine to nvptx_preferred_simd_mode. * config/nvptx/nvptx.h (STACK_BOUNDARY, BIGGEST_ALIGNMENT): Change from 64 to 128 bits. (DATA_ALIGNMENT): Define. Set to nvptx_data_alignment. * config/nvptx/nvptx.md (VECIM): Add V2DI. * gcc.target/nvptx/decl-init.c: Update alignment. * gcc.target/nvptx/slp-2-run.c: New test. * gcc.target/nvptx/slp-2.c: New test. * gcc.target/nvptx/v2di.c: New test. * testsuite/libgomp.oacc-c/vec.c: New test. From-SVN: r250341
49 lines
1.4 KiB
C
49 lines
1.4 KiB
C
/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
|
|
/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
|
|
/* { dg-additional-options "-std=c99 -ftree-slp-vectorize -foffload=-ftree-slp-vectorize -foffload=-fdump-tree-slp1 -foffload=-save-temps -save-temps" } */
|
|
|
|
#include <stdio.h>
|
|
#include <sys/time.h>
|
|
|
|
long long int p[32 *1000] __attribute__((aligned(16)));
|
|
long long int p2[32 *1000] __attribute__((aligned(16)));
|
|
|
|
int
|
|
main (void)
|
|
{
|
|
#pragma acc parallel num_gangs(1) num_workers(1) vector_length(32)
|
|
{
|
|
if (((unsigned long int)p & (0xfULL)) != 0)
|
|
__builtin_abort ();
|
|
if (((unsigned long int)p2 & (0xfULL)) != 0)
|
|
__builtin_abort ();
|
|
|
|
for (unsigned int k = 0; k < 10000; k += 1)
|
|
{
|
|
#pragma acc loop vector
|
|
for (unsigned long long int j = 0; j < 32; j += 1)
|
|
{
|
|
unsigned long long a, b;
|
|
unsigned long long *p3, *p4;
|
|
p3 = (unsigned long long *)((unsigned long long int)p & (~0xfULL));
|
|
p4 = (unsigned long long *)((unsigned long long int)p2 & (~0xfULL));
|
|
|
|
for (unsigned int i = 0; i < 1000; i += 2)
|
|
{
|
|
a = p3[j * 1000 + i];
|
|
b = p3[j * 1000 + i + 1];
|
|
|
|
p4[j * 1000 + i] = a;
|
|
p4[j * 1000 + i + 1] = b;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Todo: make a scan-tree-dump variant that scans vec.o instead. */
|
|
/* { dg-final { file copy -force [glob vec.o.*] [regsub \.o\. [glob vec.o.*] \.c\.] } } */
|
|
/* { dg-final { scan-tree-dump "vector\\(2\\) long long unsigned int" "slp1" } } */
|