Split 32-byte AVX unaligned load/store.
gcc/ 2011-03-27 H.J. Lu <hongjiu.lu@intel.com> * config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load and -mavx256-split-unaligned-store. (ix86_option_override_internal): Split 32-byte AVX unaligned load/store by default. (ix86_avx256_split_vector_move_misalign): New. (ix86_expand_vector_move_misalign): Use it. * config/i386/i386.opt: Add -mavx256-split-unaligned-load and -mavx256-split-unaligned-store. * config/i386/sse.md (*avx_mov<mode>_internal): Verify unaligned 256bit load/store. Generate unaligned store on misaligned memory operand. (*avx_movu<ssemodesuffix><avxmodesuffix>): Verify unaligned 256bit load/store. (*avx_movdqu<avxmodesuffix>): Likewise. * doc/invoke.texi: Document -mavx256-split-unaligned-load and -mavx256-split-unaligned-store. gcc/testsuite/ 2011-03-27 H.J. Lu <hongjiu.lu@intel.com> * gcc.target/i386/avx256-unaligned-load-1.c: New. * gcc.target/i386/avx256-unaligned-load-2.c: Likewise. * gcc.target/i386/avx256-unaligned-load-3.c: Likewise. * gcc.target/i386/avx256-unaligned-load-4.c: Likewise. * gcc.target/i386/avx256-unaligned-load-5.c: Likewise. * gcc.target/i386/avx256-unaligned-load-6.c: Likewise. * gcc.target/i386/avx256-unaligned-load-7.c: Likewise. * gcc.target/i386/avx256-unaligned-store-1.c: Likewise. * gcc.target/i386/avx256-unaligned-store-2.c: Likewise. * gcc.target/i386/avx256-unaligned-store-3.c: Likewise. * gcc.target/i386/avx256-unaligned-store-4.c: Likewise. * gcc.target/i386/avx256-unaligned-store-5.c: Likewise. * gcc.target/i386/avx256-unaligned-store-6.c: Likewise. * gcc.target/i386/avx256-unaligned-store-7.c: Likewise. From-SVN: r171578
This commit is contained in:
parent
c570af00f6
commit
d253656a7b
|
@ -1,3 +1,25 @@
|
|||
2011-03-27 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load
|
||||
and -mavx256-split-unaligned-store.
|
||||
(ix86_option_override_internal): Split 32-byte AVX unaligned
|
||||
load/store by default.
|
||||
(ix86_avx256_split_vector_move_misalign): New.
|
||||
(ix86_expand_vector_move_misalign): Use it.
|
||||
|
||||
* config/i386/i386.opt: Add -mavx256-split-unaligned-load and
|
||||
-mavx256-split-unaligned-store.
|
||||
|
||||
* config/i386/sse.md (*avx_mov<mode>_internal): Verify unaligned
|
||||
256bit load/store. Generate unaligned store on misaligned memory
|
||||
operand.
|
||||
(*avx_movu<ssemodesuffix><avxmodesuffix>): Verify unaligned
|
||||
256bit load/store.
|
||||
(*avx_movdqu<avxmodesuffix>): Likewise.
|
||||
|
||||
* doc/invoke.texi: Document -mavx256-split-unaligned-load and
|
||||
-mavx256-split-unaligned-store.
|
||||
|
||||
2011-03-27 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
PR target/38598
|
||||
|
|
|
@ -3130,6 +3130,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
|
|||
{ "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
|
||||
{ "-m8bit-idiv", MASK_USE_8BIT_IDIV },
|
||||
{ "-mvzeroupper", MASK_VZEROUPPER },
|
||||
{ "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
|
||||
{ "-mavx256-split-unaligned-stroe", MASK_AVX256_SPLIT_UNALIGNED_STORE},
|
||||
};
|
||||
|
||||
const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
|
||||
|
@ -4274,11 +4276,18 @@ ix86_option_override_internal (bool main_args_p)
|
|||
if (TARGET_AVX)
|
||||
{
|
||||
/* When not optimize for size, enable vzeroupper optimization for
|
||||
TARGET_AVX with -fexpensive-optimizations. */
|
||||
if (!optimize_size
|
||||
&& flag_expensive_optimizations
|
||||
&& !(target_flags_explicit & MASK_VZEROUPPER))
|
||||
target_flags |= MASK_VZEROUPPER;
|
||||
TARGET_AVX with -fexpensive-optimizations and split 32-byte
|
||||
AVX unaligned load/store. */
|
||||
if (!optimize_size)
|
||||
{
|
||||
if (flag_expensive_optimizations
|
||||
&& !(target_flags_explicit & MASK_VZEROUPPER))
|
||||
target_flags |= MASK_VZEROUPPER;
|
||||
if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
|
||||
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
|
||||
if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
|
||||
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -15588,6 +15597,57 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
|
|||
emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
|
||||
}
|
||||
|
||||
/* Split 32-byte AVX unaligned load and store if needed. */
|
||||
|
||||
static void
|
||||
ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
|
||||
{
|
||||
rtx m;
|
||||
rtx (*extract) (rtx, rtx, rtx);
|
||||
rtx (*move_unaligned) (rtx, rtx);
|
||||
enum machine_mode mode;
|
||||
|
||||
switch (GET_MODE (op0))
|
||||
{
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
case V32QImode:
|
||||
extract = gen_avx_vextractf128v32qi;
|
||||
move_unaligned = gen_avx_movdqu256;
|
||||
mode = V16QImode;
|
||||
break;
|
||||
case V8SFmode:
|
||||
extract = gen_avx_vextractf128v8sf;
|
||||
move_unaligned = gen_avx_movups256;
|
||||
mode = V4SFmode;
|
||||
break;
|
||||
case V4DFmode:
|
||||
extract = gen_avx_vextractf128v4df;
|
||||
move_unaligned = gen_avx_movupd256;
|
||||
mode = V2DFmode;
|
||||
break;
|
||||
}
|
||||
|
||||
if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
|
||||
{
|
||||
rtx r = gen_reg_rtx (mode);
|
||||
m = adjust_address (op1, mode, 0);
|
||||
emit_move_insn (r, m);
|
||||
m = adjust_address (op1, mode, 16);
|
||||
r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
|
||||
emit_move_insn (op0, r);
|
||||
}
|
||||
else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
|
||||
{
|
||||
m = adjust_address (op0, mode, 0);
|
||||
emit_insn (extract (m, op1, const0_rtx));
|
||||
m = adjust_address (op0, mode, 16);
|
||||
emit_insn (extract (m, op1, const1_rtx));
|
||||
}
|
||||
else
|
||||
emit_insn (move_unaligned (op0, op1));
|
||||
}
|
||||
|
||||
/* Implement the movmisalign patterns for SSE. Non-SSE modes go
|
||||
straight to ix86_expand_vector_move. */
|
||||
/* Code generation for scalar reg-reg moves of single and double precision data:
|
||||
|
@ -15672,7 +15732,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
case 32:
|
||||
op0 = gen_lowpart (V32QImode, op0);
|
||||
op1 = gen_lowpart (V32QImode, op1);
|
||||
emit_insn (gen_avx_movdqu256 (op0, op1));
|
||||
ix86_avx256_split_vector_move_misalign (op0, op1);
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
|
@ -15688,7 +15748,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
emit_insn (gen_avx_movups (op0, op1));
|
||||
break;
|
||||
case V8SFmode:
|
||||
emit_insn (gen_avx_movups256 (op0, op1));
|
||||
ix86_avx256_split_vector_move_misalign (op0, op1);
|
||||
break;
|
||||
case V2DFmode:
|
||||
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
|
@ -15701,7 +15761,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
emit_insn (gen_avx_movupd (op0, op1));
|
||||
break;
|
||||
case V4DFmode:
|
||||
emit_insn (gen_avx_movupd256 (op0, op1));
|
||||
ix86_avx256_split_vector_move_misalign (op0, op1);
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
|
|
|
@ -420,3 +420,11 @@ Emit profiling counter call at function entry before prologue.
|
|||
m8bit-idiv
|
||||
Target Report Mask(USE_8BIT_IDIV) Save
|
||||
Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check
|
||||
|
||||
mavx256-split-unaligned-load
|
||||
Target Report Mask(AVX256_SPLIT_UNALIGNED_LOAD) Save
|
||||
Split 32-byte AVX unaligned load
|
||||
|
||||
mavx256-split-unaligned-store
|
||||
Target Report Mask(AVX256_SPLIT_UNALIGNED_STORE) Save
|
||||
Split 32-byte AVX unaligned store
|
||||
|
|
|
@ -203,19 +203,35 @@
|
|||
return standard_sse_constant_opcode (insn, operands[1]);
|
||||
case 1:
|
||||
case 2:
|
||||
if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
|
||||
&& ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
|
||||
&& misaligned_operand (operands[0], <MODE>mode))
|
||||
|| (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
|
||||
&& misaligned_operand (operands[1], <MODE>mode))))
|
||||
gcc_unreachable ();
|
||||
switch (get_attr_mode (insn))
|
||||
{
|
||||
case MODE_V8SF:
|
||||
case MODE_V4SF:
|
||||
return "vmovaps\t{%1, %0|%0, %1}";
|
||||
if (misaligned_operand (operands[0], <MODE>mode)
|
||||
|| misaligned_operand (operands[1], <MODE>mode))
|
||||
return "vmovups\t{%1, %0|%0, %1}";
|
||||
else
|
||||
return "vmovaps\t{%1, %0|%0, %1}";
|
||||
case MODE_V4DF:
|
||||
case MODE_V2DF:
|
||||
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
if (misaligned_operand (operands[0], <MODE>mode)
|
||||
|| misaligned_operand (operands[1], <MODE>mode))
|
||||
return "vmovupd\t{%1, %0|%0, %1}";
|
||||
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
return "vmovaps\t{%1, %0|%0, %1}";
|
||||
else
|
||||
return "vmovapd\t{%1, %0|%0, %1}";
|
||||
default:
|
||||
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
if (misaligned_operand (operands[0], <MODE>mode)
|
||||
|| misaligned_operand (operands[1], <MODE>mode))
|
||||
return "vmovdqu\t{%1, %0|%0, %1}";
|
||||
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
return "vmovaps\t{%1, %0|%0, %1}";
|
||||
else
|
||||
return "vmovdqa\t{%1, %0|%0, %1}";
|
||||
|
@ -400,7 +416,15 @@
|
|||
UNSPEC_MOVU))]
|
||||
"AVX_VEC_FLOAT_MODE_P (<MODE>mode)
|
||||
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
|
||||
"vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
|
||||
{
|
||||
if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
|
||||
&& ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
|
||||
&& misaligned_operand (operands[0], <MODE>mode))
|
||||
|| (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
|
||||
&& misaligned_operand (operands[1], <MODE>mode))))
|
||||
gcc_unreachable ();
|
||||
return "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
|
||||
}
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "movu" "1")
|
||||
(set_attr "prefix" "vex")
|
||||
|
@ -459,7 +483,15 @@
|
|||
[(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
|
||||
UNSPEC_MOVU))]
|
||||
"TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
|
||||
"vmovdqu\t{%1, %0|%0, %1}"
|
||||
{
|
||||
if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
|
||||
&& ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
|
||||
&& misaligned_operand (operands[0], <MODE>mode))
|
||||
|| (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
|
||||
&& misaligned_operand (operands[1], <MODE>mode))))
|
||||
gcc_unreachable ();
|
||||
return "vmovdqu\t{%1, %0|%0, %1}";
|
||||
}
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "movu" "1")
|
||||
(set_attr "prefix" "vex")
|
||||
|
|
|
@ -602,7 +602,8 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol
|
||||
-mcmodel=@var{code-model} -mabi=@var{name} @gol
|
||||
-m32 -m64 -mlarge-data-threshold=@var{num} @gol
|
||||
-msse2avx -mfentry -m8bit-idiv}
|
||||
-msse2avx -mfentry -m8bit-idiv @gol
|
||||
-mavx256-split-unaligned-load -mavx256-split-unaligned-store}
|
||||
|
||||
@emph{i386 and x86-64 Windows Options}
|
||||
@gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol
|
||||
|
@ -12669,6 +12670,12 @@ runt-time check. If both dividend and divisor are within range of 0
|
|||
to 255, 8bit unsigned integer divide will be used instead of
|
||||
32bit/64bit integer divide.
|
||||
|
||||
@item -mavx256-split-unaligned-load
|
||||
@item -mavx256-split-unaligned-store
|
||||
@opindex avx256-split-unaligned-load
|
||||
@opindex avx256-split-unaligned-store
|
||||
Split 32-byte AVX unaligned load and store.
|
||||
|
||||
@end table
|
||||
|
||||
These @samp{-m} switches are supported in addition to the above
|
||||
|
|
|
@ -1,3 +1,20 @@
|
|||
2011-03-27 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* gcc.target/i386/avx256-unaligned-load-1.c: New.
|
||||
* gcc.target/i386/avx256-unaligned-load-2.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-load-3.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-load-4.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-load-5.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-load-6.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-load-7.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-store-1.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-store-2.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-store-4.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-store-5.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-store-6.c: Likewise.
|
||||
* gcc.target/i386/avx256-unaligned-store-7.c: Likewise.
|
||||
|
||||
2011-03-27 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||
|
||||
PR fortran/47065
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
float a[N], b[N+3], c[N];
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = a[i] * b[i+3];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "\\*avx_movups256/1" } } */
|
||||
/* { dg-final { scan-assembler "\\*avx_movups/1" } } */
|
||||
/* { dg-final { scan-assembler "vinsertf128" } } */
|
|
@ -0,0 +1,29 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
char **ep;
|
||||
char **fp;
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
char **ap;
|
||||
char **bp;
|
||||
char **cp;
|
||||
|
||||
ap = ep;
|
||||
bp = fp;
|
||||
for (i = 128; i >= 0; i--)
|
||||
{
|
||||
*ap++ = *cp++;
|
||||
*bp++ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "\\*avx_movdqu256/1" } } */
|
||||
/* { dg-final { scan-assembler "\\*avx_movdqu/1" } } */
|
||||
/* { dg-final { scan-assembler "vinsertf128" } } */
|
|
@ -0,0 +1,19 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
double a[N], b[N+3], c[N];
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = a[i] * b[i+3];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "\\*avx_movupd256/1" } } */
|
||||
/* { dg-final { scan-assembler "\\*avx_movupd/1" } } */
|
||||
/* { dg-final { scan-assembler "vinsertf128" } } */
|
|
@ -0,0 +1,19 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
float a[N], b[N+3];
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i] = a[i+3] * 2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "\\*avx_movups256/1" } } */
|
||||
/* { dg-final { scan-assembler-not "\\*avx_movups/1" } } */
|
||||
/* { dg-final { scan-assembler-not "vinsertf128" } } */
|
|
@ -0,0 +1,43 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
float a[N+3] = { -1, -1, -1, 24.43, 68.346, 43.35,
|
||||
546.46, 46.79, 82.78, 82.7, 9.4 };
|
||||
float b[N];
|
||||
float c[N];
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i] = a[i+3] * 2;
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
float
|
||||
bar (float x)
|
||||
{
|
||||
return x * 2;
|
||||
}
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
foo ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = bar (a[i+3]);
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
if (b[i] != c[i])
|
||||
abort ();
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#define N 4
|
||||
|
||||
double a[N+3] = { -1, -1, -1, 24.43, 68.346, 43.35, 546.46 };
|
||||
double b[N];
|
||||
double c[N];
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i] = a[i+3] * 2;
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
double
|
||||
bar (double x)
|
||||
{
|
||||
return x * 2;
|
||||
}
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
foo ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = bar (a[i+3]);
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
if (b[i] != c[i])
|
||||
abort ();
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#define N 128
|
||||
|
||||
char **ep;
|
||||
char **fp;
|
||||
char **mp;
|
||||
char **lp;
|
||||
|
||||
__attribute__ ((noinline))
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
mp = (char **) malloc (N);
|
||||
lp = (char **) malloc (N);
|
||||
ep = (char **) malloc (N);
|
||||
fp = (char **) malloc (N);
|
||||
}
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
char **ap, **bp, **cp, **dp;
|
||||
char *str = "STR";
|
||||
|
||||
foo ();
|
||||
|
||||
cp = mp;
|
||||
dp = lp;
|
||||
|
||||
for (i = N; i >= 0; i--)
|
||||
{
|
||||
*cp++ = str;
|
||||
*dp++ = str;
|
||||
}
|
||||
|
||||
ap = ep;
|
||||
bp = fp;
|
||||
cp = mp;
|
||||
dp = lp;
|
||||
|
||||
for (i = N; i >= 0; i--)
|
||||
{
|
||||
*ap++ = *cp++;
|
||||
*bp++ = *dp++;
|
||||
}
|
||||
|
||||
for (i = N; i >= 0; i--)
|
||||
{
|
||||
if (strcmp (*--ap, "STR") != 0)
|
||||
abort ();
|
||||
if (strcmp (*--bp, "STR") != 0)
|
||||
abort ();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
float a[N], b[N+3], c[N], d[N];
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i+3] = a[i] * 10.0;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
d[i] = c[i] * 20.0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "\\*avx_movups256/2" } } */
|
||||
/* { dg-final { scan-assembler "movups.*\\*avx_movv4sf_internal/3" } } */
|
||||
/* { dg-final { scan-assembler "vextractf128" } } */
|
|
@ -0,0 +1,29 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target lp64 } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
char **ep;
|
||||
char **fp;
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
char **ap;
|
||||
char **bp;
|
||||
char **cp;
|
||||
|
||||
ap = ep;
|
||||
bp = fp;
|
||||
for (i = 128; i >= 0; i--)
|
||||
{
|
||||
*ap++ = *cp++;
|
||||
*bp++ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "\\*avx_movdqu256/2" } } */
|
||||
/* { dg-final { scan-assembler "movdqu.*\\*avx_movv16qi_internal/3" } } */
|
||||
/* { dg-final { scan-assembler "vextractf128" } } */
|
|
@ -0,0 +1,22 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
double a[N], b[N+3], c[N], d[N];
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i+3] = a[i] * 10.0;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
d[i] = c[i] * 20.0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "\\*avx_movupd256/2" } } */
|
||||
/* { dg-final { scan-assembler "movupd.*\\*avx_movv2df_internal/3" } } */
|
||||
/* { dg-final { scan-assembler "vextractf128" } } */
|
|
@ -0,0 +1,20 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */
|
||||
|
||||
#define N 1024
|
||||
|
||||
float a[N], b[N+3], c[N];
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i+3] = a[i] * c[i];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "\\*avx_movups256/2" } } */
|
||||
/* { dg-final { scan-assembler-not "\\*avx_movups/2" } } */
|
||||
/* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */
|
||||
/* { dg-final { scan-assembler-not "vextractf128" } } */
|
|
@ -0,0 +1,42 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
float a[N] = { 24.43, 68.346, 43.35, 546.46, 46.79, 82.78, 82.7, 9.4 };
|
||||
float b[N+3];
|
||||
float c[N+3];
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i+3] = a[i] * 2;
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
float
|
||||
bar (float x)
|
||||
{
|
||||
return x * 2;
|
||||
}
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
foo ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
c[i+3] = bar (a[i]);
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
if (b[i+3] != c[i+3])
|
||||
abort ();
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#define N 4
|
||||
|
||||
double a[N] = { 24.43, 68.346, 43.35, 546.46 };
|
||||
double b[N+3];
|
||||
double c[N+3];
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i+3] = a[i] * 2;
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
double
|
||||
bar (double x)
|
||||
{
|
||||
return x * 2;
|
||||
}
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
foo ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
c[i+3] = bar (a[i]);
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
if (b[i+3] != c[i+3])
|
||||
abort ();
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#define N 128
|
||||
|
||||
char **ep;
|
||||
char **fp;
|
||||
|
||||
__attribute__ ((noinline))
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
ep = (char **) malloc (N);
|
||||
fp = (char **) malloc (N);
|
||||
}
|
||||
|
||||
void
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
char **ap, **bp;
|
||||
char *str = "STR";
|
||||
|
||||
foo ();
|
||||
|
||||
ap = ep;
|
||||
bp = fp;
|
||||
|
||||
for (i = N; i >= 0; i--)
|
||||
{
|
||||
*ap++ = str;
|
||||
*bp++ = str;
|
||||
}
|
||||
|
||||
for (i = N; i >= 0; i--)
|
||||
{
|
||||
if (strcmp (*--ap, "STR") != 0)
|
||||
abort ();
|
||||
if (strcmp (*--bp, "STR") != 0)
|
||||
abort ();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue