Split 32-byte AVX unaligned load/store.

gcc/

2011-03-27  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load
	and -mavx256-split-unaligned-store.
	(ix86_option_override_internal): Split 32-byte AVX unaligned
	load/store by default.
	(ix86_avx256_split_vector_move_misalign): New.
	(ix86_expand_vector_move_misalign): Use it.

	* config/i386/i386.opt: Add -mavx256-split-unaligned-load and
	-mavx256-split-unaligned-store.

	* config/i386/sse.md (*avx_mov<mode>_internal): Verify unaligned
	256bit load/store.  Generate unaligned store on misaligned memory
	operand.
	(*avx_movu<ssemodesuffix><avxmodesuffix>): Verify unaligned
	256bit load/store.
	(*avx_movdqu<avxmodesuffix>): Likewise.

	* doc/invoke.texi: Document -mavx256-split-unaligned-load and
	-mavx256-split-unaligned-store.

gcc/testsuite/

2011-03-27  H.J. Lu  <hongjiu.lu@intel.com>

	* gcc.target/i386/avx256-unaligned-load-1.c: New.
	* gcc.target/i386/avx256-unaligned-load-2.c: Likewise.
	* gcc.target/i386/avx256-unaligned-load-3.c: Likewise.
	* gcc.target/i386/avx256-unaligned-load-4.c: Likewise.
	* gcc.target/i386/avx256-unaligned-load-5.c: Likewise.
	* gcc.target/i386/avx256-unaligned-load-6.c: Likewise.
	* gcc.target/i386/avx256-unaligned-load-7.c: Likewise.
	* gcc.target/i386/avx256-unaligned-store-1.c: Likewise.
	* gcc.target/i386/avx256-unaligned-store-2.c: Likewise.
	* gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
	* gcc.target/i386/avx256-unaligned-store-4.c: Likewise.
	* gcc.target/i386/avx256-unaligned-store-5.c: Likewise.
	* gcc.target/i386/avx256-unaligned-store-6.c: Likewise.
	* gcc.target/i386/avx256-unaligned-store-7.c: Likewise.

From-SVN: r171578
This commit is contained in:
H.J. Lu 2011-03-27 18:56:00 +00:00 committed by H.J. Lu
parent c570af00f6
commit d253656a7b
20 changed files with 613 additions and 14 deletions

View File

@ -1,3 +1,25 @@
2011-03-27 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load
and -mavx256-split-unaligned-store.
(ix86_option_override_internal): Split 32-byte AVX unaligned
load/store by default.
(ix86_avx256_split_vector_move_misalign): New.
(ix86_expand_vector_move_misalign): Use it.
* config/i386/i386.opt: Add -mavx256-split-unaligned-load and
-mavx256-split-unaligned-store.
* config/i386/sse.md (*avx_mov<mode>_internal): Verify unaligned
256bit load/store. Generate unaligned store on misaligned memory
operand.
(*avx_movu<ssemodesuffix><avxmodesuffix>): Verify unaligned
256bit load/store.
(*avx_movdqu<avxmodesuffix>): Likewise.
* doc/invoke.texi: Document -mavx256-split-unaligned-load and
-mavx256-split-unaligned-store.
2011-03-27 Richard Sandiford <rdsandiford@googlemail.com>
PR target/38598

View File

@ -3130,6 +3130,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
{ "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
{ "-m8bit-idiv", MASK_USE_8BIT_IDIV },
{ "-mvzeroupper", MASK_VZEROUPPER },
{ "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
{ "-mavx256-split-unaligned-stroe", MASK_AVX256_SPLIT_UNALIGNED_STORE},
};
const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
@ -4274,11 +4276,18 @@ ix86_option_override_internal (bool main_args_p)
if (TARGET_AVX)
{
/* When not optimize for size, enable vzeroupper optimization for
TARGET_AVX with -fexpensive-optimizations. */
if (!optimize_size
&& flag_expensive_optimizations
&& !(target_flags_explicit & MASK_VZEROUPPER))
target_flags |= MASK_VZEROUPPER;
TARGET_AVX with -fexpensive-optimizations and split 32-byte
AVX unaligned load/store. */
if (!optimize_size)
{
if (flag_expensive_optimizations
&& !(target_flags_explicit & MASK_VZEROUPPER))
target_flags |= MASK_VZEROUPPER;
if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
}
}
else
{
@ -15588,6 +15597,57 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
}
/* Split 32-byte AVX unaligned load and store if needed. */
static void
ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
{
rtx m;
rtx (*extract) (rtx, rtx, rtx);
rtx (*move_unaligned) (rtx, rtx);
enum machine_mode mode;
switch (GET_MODE (op0))
{
default:
gcc_unreachable ();
case V32QImode:
extract = gen_avx_vextractf128v32qi;
move_unaligned = gen_avx_movdqu256;
mode = V16QImode;
break;
case V8SFmode:
extract = gen_avx_vextractf128v8sf;
move_unaligned = gen_avx_movups256;
mode = V4SFmode;
break;
case V4DFmode:
extract = gen_avx_vextractf128v4df;
move_unaligned = gen_avx_movupd256;
mode = V2DFmode;
break;
}
if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
{
rtx r = gen_reg_rtx (mode);
m = adjust_address (op1, mode, 0);
emit_move_insn (r, m);
m = adjust_address (op1, mode, 16);
r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
emit_move_insn (op0, r);
}
else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
{
m = adjust_address (op0, mode, 0);
emit_insn (extract (m, op1, const0_rtx));
m = adjust_address (op0, mode, 16);
emit_insn (extract (m, op1, const1_rtx));
}
else
emit_insn (move_unaligned (op0, op1));
}
/* Implement the movmisalign patterns for SSE. Non-SSE modes go
straight to ix86_expand_vector_move. */
/* Code generation for scalar reg-reg moves of single and double precision data:
@ -15672,7 +15732,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
case 32:
op0 = gen_lowpart (V32QImode, op0);
op1 = gen_lowpart (V32QImode, op1);
emit_insn (gen_avx_movdqu256 (op0, op1));
ix86_avx256_split_vector_move_misalign (op0, op1);
break;
default:
gcc_unreachable ();
@ -15688,7 +15748,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
emit_insn (gen_avx_movups (op0, op1));
break;
case V8SFmode:
emit_insn (gen_avx_movups256 (op0, op1));
ix86_avx256_split_vector_move_misalign (op0, op1);
break;
case V2DFmode:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
@ -15701,7 +15761,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
emit_insn (gen_avx_movupd (op0, op1));
break;
case V4DFmode:
emit_insn (gen_avx_movupd256 (op0, op1));
ix86_avx256_split_vector_move_misalign (op0, op1);
break;
default:
gcc_unreachable ();

View File

@ -420,3 +420,11 @@ Emit profiling counter call at function entry before prologue.
m8bit-idiv
Target Report Mask(USE_8BIT_IDIV) Save
Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check
mavx256-split-unaligned-load
Target Report Mask(AVX256_SPLIT_UNALIGNED_LOAD) Save
Split 32-byte AVX unaligned load
mavx256-split-unaligned-store
Target Report Mask(AVX256_SPLIT_UNALIGNED_STORE) Save
Split 32-byte AVX unaligned store

View File

@ -203,19 +203,35 @@
return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
&& ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
&& misaligned_operand (operands[0], <MODE>mode))
|| (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
&& misaligned_operand (operands[1], <MODE>mode))))
gcc_unreachable ();
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "vmovaps\t{%1, %0|%0, %1}";
if (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))
return "vmovups\t{%1, %0|%0, %1}";
else
return "vmovaps\t{%1, %0|%0, %1}";
case MODE_V4DF:
case MODE_V2DF:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
if (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))
return "vmovupd\t{%1, %0|%0, %1}";
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "vmovaps\t{%1, %0|%0, %1}";
else
return "vmovapd\t{%1, %0|%0, %1}";
default:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
if (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))
return "vmovdqu\t{%1, %0|%0, %1}";
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "vmovaps\t{%1, %0|%0, %1}";
else
return "vmovdqa\t{%1, %0|%0, %1}";
@ -400,7 +416,15 @@
UNSPEC_MOVU))]
"AVX_VEC_FLOAT_MODE_P (<MODE>mode)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
{
if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
&& ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
&& misaligned_operand (operands[0], <MODE>mode))
|| (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
&& misaligned_operand (operands[1], <MODE>mode))))
gcc_unreachable ();
return "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "vex")
@ -459,7 +483,15 @@
[(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"vmovdqu\t{%1, %0|%0, %1}"
{
if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
&& ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
&& misaligned_operand (operands[0], <MODE>mode))
|| (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
&& misaligned_operand (operands[1], <MODE>mode))))
gcc_unreachable ();
return "vmovdqu\t{%1, %0|%0, %1}";
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "vex")

View File

@ -602,7 +602,8 @@ Objective-C and Objective-C++ Dialects}.
-momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol
-mcmodel=@var{code-model} -mabi=@var{name} @gol
-m32 -m64 -mlarge-data-threshold=@var{num} @gol
-msse2avx -mfentry -m8bit-idiv}
-msse2avx -mfentry -m8bit-idiv @gol
-mavx256-split-unaligned-load -mavx256-split-unaligned-store}
@emph{i386 and x86-64 Windows Options}
@gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol
@ -12669,6 +12670,12 @@ runt-time check. If both dividend and divisor are within range of 0
to 255, 8bit unsigned integer divide will be used instead of
32bit/64bit integer divide.
@item -mavx256-split-unaligned-load
@item -mavx256-split-unaligned-store
@opindex avx256-split-unaligned-load
@opindex avx256-split-unaligned-store
Split 32-byte AVX unaligned load and store.
@end table
These @samp{-m} switches are supported in addition to the above

View File

@ -1,3 +1,20 @@
2011-03-27 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/avx256-unaligned-load-1.c: New.
* gcc.target/i386/avx256-unaligned-load-2.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-3.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-4.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-5.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-6.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-7.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-1.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-2.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-4.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-5.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-6.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-7.c: Likewise.
2011-03-27 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/47065

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
#define N 1024
float a[N], b[N+3], c[N];
void
avx_test (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] * b[i+3];
}
/* { dg-final { scan-assembler-not "\\*avx_movups256/1" } } */
/* { dg-final { scan-assembler "\\*avx_movups/1" } } */
/* { dg-final { scan-assembler "vinsertf128" } } */

View File

@ -0,0 +1,29 @@
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
#define N 1024
char **ep;
char **fp;
void
avx_test (void)
{
int i;
char **ap;
char **bp;
char **cp;
ap = ep;
bp = fp;
for (i = 128; i >= 0; i--)
{
*ap++ = *cp++;
*bp++ = 0;
}
}
/* { dg-final { scan-assembler-not "\\*avx_movdqu256/1" } } */
/* { dg-final { scan-assembler "\\*avx_movdqu/1" } } */
/* { dg-final { scan-assembler "vinsertf128" } } */

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
#define N 1024
double a[N], b[N+3], c[N];
void
avx_test (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] * b[i+3];
}
/* { dg-final { scan-assembler-not "\\*avx_movupd256/1" } } */
/* { dg-final { scan-assembler "\\*avx_movupd/1" } } */
/* { dg-final { scan-assembler "vinsertf128" } } */

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */
#define N 1024
float a[N], b[N+3];
void
avx_test (void)
{
int i;
for (i = 0; i < N; i++)
b[i] = a[i+3] * 2;
}
/* { dg-final { scan-assembler "\\*avx_movups256/1" } } */
/* { dg-final { scan-assembler-not "\\*avx_movups/1" } } */
/* { dg-final { scan-assembler-not "vinsertf128" } } */

View File

@ -0,0 +1,43 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
#include "avx-check.h"
#define N 8
float a[N+3] = { -1, -1, -1, 24.43, 68.346, 43.35,
546.46, 46.79, 82.78, 82.7, 9.4 };
float b[N];
float c[N];
void
foo (void)
{
int i;
for (i = 0; i < N; i++)
b[i] = a[i+3] * 2;
}
__attribute__ ((noinline))
float
bar (float x)
{
return x * 2;
}
void
avx_test (void)
{
int i;
foo ();
for (i = 0; i < N; i++)
c[i] = bar (a[i+3]);
for (i = 0; i < N; i++)
if (b[i] != c[i])
abort ();
}

View File

@ -0,0 +1,42 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
#include "avx-check.h"
#define N 4
double a[N+3] = { -1, -1, -1, 24.43, 68.346, 43.35, 546.46 };
double b[N];
double c[N];
void
foo (void)
{
int i;
for (i = 0; i < N; i++)
b[i] = a[i+3] * 2;
}
__attribute__ ((noinline))
double
bar (double x)
{
return x * 2;
}
void
avx_test (void)
{
int i;
foo ();
for (i = 0; i < N; i++)
c[i] = bar (a[i+3]);
for (i = 0; i < N; i++)
if (b[i] != c[i])
abort ();
}

View File

@ -0,0 +1,60 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
#include "avx-check.h"
#define N 128
char **ep;
char **fp;
char **mp;
char **lp;
__attribute__ ((noinline))
void
foo (void)
{
mp = (char **) malloc (N);
lp = (char **) malloc (N);
ep = (char **) malloc (N);
fp = (char **) malloc (N);
}
void
avx_test (void)
{
int i;
char **ap, **bp, **cp, **dp;
char *str = "STR";
foo ();
cp = mp;
dp = lp;
for (i = N; i >= 0; i--)
{
*cp++ = str;
*dp++ = str;
}
ap = ep;
bp = fp;
cp = mp;
dp = lp;
for (i = N; i >= 0; i--)
{
*ap++ = *cp++;
*bp++ = *dp++;
}
for (i = N; i >= 0; i--)
{
if (strcmp (*--ap, "STR") != 0)
abort ();
if (strcmp (*--bp, "STR") != 0)
abort ();
}
}

View File

@ -0,0 +1,22 @@
/* { dg-do compile } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
#define N 1024
float a[N], b[N+3], c[N], d[N];
void
avx_test (void)
{
int i;
for (i = 0; i < N; i++)
b[i+3] = a[i] * 10.0;
for (i = 0; i < N; i++)
d[i] = c[i] * 20.0;
}
/* { dg-final { scan-assembler-not "\\*avx_movups256/2" } } */
/* { dg-final { scan-assembler "movups.*\\*avx_movv4sf_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */

View File

@ -0,0 +1,29 @@
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
#define N 1024
char **ep;
char **fp;
void
avx_test (void)
{
int i;
char **ap;
char **bp;
char **cp;
ap = ep;
bp = fp;
for (i = 128; i >= 0; i--)
{
*ap++ = *cp++;
*bp++ = 0;
}
}
/* { dg-final { scan-assembler-not "\\*avx_movdqu256/2" } } */
/* { dg-final { scan-assembler "movdqu.*\\*avx_movv16qi_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */

View File

@ -0,0 +1,22 @@
/* { dg-do compile } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
#define N 1024
double a[N], b[N+3], c[N], d[N];
void
avx_test (void)
{
int i;
for (i = 0; i < N; i++)
b[i+3] = a[i] * 10.0;
for (i = 0; i < N; i++)
d[i] = c[i] * 20.0;
}
/* { dg-final { scan-assembler-not "\\*avx_movupd256/2" } } */
/* { dg-final { scan-assembler "movupd.*\\*avx_movv2df_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */

View File

@ -0,0 +1,20 @@
/* { dg-do compile } */
/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */
#define N 1024
float a[N], b[N+3], c[N];
void
avx_test (void)
{
int i;
for (i = 0; i < N; i++)
b[i+3] = a[i] * c[i];
}
/* { dg-final { scan-assembler "\\*avx_movups256/2" } } */
/* { dg-final { scan-assembler-not "\\*avx_movups/2" } } */
/* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */
/* { dg-final { scan-assembler-not "vextractf128" } } */

View File

@ -0,0 +1,42 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
#include "avx-check.h"
#define N 8
float a[N] = { 24.43, 68.346, 43.35, 546.46, 46.79, 82.78, 82.7, 9.4 };
float b[N+3];
float c[N+3];
void
foo (void)
{
int i;
for (i = 0; i < N; i++)
b[i+3] = a[i] * 2;
}
__attribute__ ((noinline))
float
bar (float x)
{
return x * 2;
}
void
avx_test (void)
{
int i;
foo ();
for (i = 0; i < N; i++)
c[i+3] = bar (a[i]);
for (i = 0; i < N; i++)
if (b[i+3] != c[i+3])
abort ();
}

View File

@ -0,0 +1,42 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
#include "avx-check.h"
#define N 4
double a[N] = { 24.43, 68.346, 43.35, 546.46 };
double b[N+3];
double c[N+3];
void
foo (void)
{
int i;
for (i = 0; i < N; i++)
b[i+3] = a[i] * 2;
}
__attribute__ ((noinline))
double
bar (double x)
{
return x * 2;
}
void
avx_test (void)
{
int i;
foo ();
for (i = 0; i < N; i++)
c[i+3] = bar (a[i]);
for (i = 0; i < N; i++)
if (b[i+3] != c[i+3])
abort ();
}

View File

@ -0,0 +1,45 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
#include "avx-check.h"
#define N 128
char **ep;
char **fp;
__attribute__ ((noinline))
void
foo (void)
{
ep = (char **) malloc (N);
fp = (char **) malloc (N);
}
void
avx_test (void)
{
int i;
char **ap, **bp;
char *str = "STR";
foo ();
ap = ep;
bp = fp;
for (i = N; i >= 0; i--)
{
*ap++ = str;
*bp++ = str;
}
for (i = N; i >= 0; i--)
{
if (strcmp (*--ap, "STR") != 0)
abort ();
if (strcmp (*--bp, "STR") != 0)
abort ();
}
}