diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0361b40677d..b4e3cab342c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2010-02-23 Jakub Jelinek + + PR target/43107 + * config/i386/i386.c (avx_vpermilp_parallel): Reject indexes + greater or equal to nelt instead of 2 * nelt. + (expand_vec_perm_1): When op0 and op1 are equal, mask indexes + with nelt - 1. + 2010-02-23 Jason Merrill PR debug/42800 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 84f5f535a51..bb03cba0a04 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24673,7 +24673,7 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode) if (!CONST_INT_P (er)) return 0; ei = INTVAL (er); - if (ei >= 2 * nelt) + if (ei >= nelt) return 0; ipar[i] = ei; } @@ -29265,7 +29265,12 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) input where SEL+CONCAT may not. */ if (d->op0 == d->op1) { - if (expand_vselect (d->target, d->op0, d->perm, nelt)) + int mask = nelt - 1; + + for (i = 0; i < nelt; i++) + perm2[i] = d->perm[i] & mask; + + if (expand_vselect (d->target, d->op0, perm2, nelt)) return true; /* There are plenty of patterns in sse.md that are written for @@ -29276,8 +29281,8 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) every other permutation operand. */ for (i = 0; i < nelt; i += 2) { - perm2[i] = d->perm[i]; - perm2[i+1] = d->perm[i+1] + nelt; + perm2[i] = d->perm[i] & mask; + perm2[i + 1] = (d->perm[i + 1] & mask) + nelt; } if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt)) return true; @@ -29285,11 +29290,12 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */ if (nelt >= 4) { - memcpy (perm2, d->perm, nelt); - for (i = 2; i < nelt; i += 4) + for (i = 0; i < nelt; i += 4) { - perm2[i+0] += nelt; - perm2[i+1] += nelt; + perm2[i + 0] = d->perm[i + 0] & mask; + perm2[i + 1] = d->perm[i + 1] & mask; + perm2[i + 2] = (d->perm[i + 2] & mask) + nelt; + perm2[i + 3] = (d->perm[i + 3] & mask) + nelt; } if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5f9fdc03f38..dc688dd61ee 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2010-02-23 Jakub Jelinek + + PR target/43107 + * gcc.target/i386/pr43107.c: New test. + 2010-02-23 Jason Merrill PR c++/43143 diff --git a/gcc/testsuite/gcc.target/i386/pr43107.c b/gcc/testsuite/gcc.target/i386/pr43107.c new file mode 100644 index 00000000000..87965293116 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr43107.c @@ -0,0 +1,20 @@ +/* PR target/43107 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx" } */ + +extern void bar (float b[4][4]); + +void +foo () +{ + float a[4][4], b[4][4]; + int i, j; + for (i = 0; i < 4; i++) + { + for (j = 0; j < 4; j++) + a[i][j] = 0; + for (j = 0; j < 4; j++) + b[i][j] = a[i][j]; + } + bar (b); +}