backport: re PR target/80718 (GCC generates slow code for offsettable vec_duplicate)
Back port from mainline [gcc] 2017-05-19 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/80718 * config/rs6000/vsx.md (vsx_splat_<mode>, VSX_D iterator): Prefer VSX registers over GPRs, particularly on ISA 2.07 which does not have the MTVSRDD instruction. [gcc/testsuite] 2017-05-19 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/80718 * gcc.target/powerpc/pr80718.c: New test. From-SVN: r248936
This commit is contained in:
parent
88c502146c
commit
5aa0092db2
|
@ -1,3 +1,13 @@
|
|||
2017-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
Back port from mainline
|
||||
2017-05-19 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
PR target/80718
|
||||
* config/rs6000/vsx.md (vsx_splat_<mode>, VSX_D iterator): Prefer
|
||||
VSX registers over GPRs, particularly on ISA 2.07 which does not
|
||||
have the MTVSRDD instruction.
|
||||
|
||||
2017-06-06 David S. Miller <davem@davemloft.net>
|
||||
|
||||
PR target/80968
|
||||
|
|
|
@ -3068,7 +3068,8 @@
|
|||
;; V2DF/V2DI splat
|
||||
(define_insn_and_split "vsx_splat_<mode>"
|
||||
[(set (match_operand:VSX_D 0 "vsx_register_operand"
|
||||
"=<VSa>, <VSa>,we,<VS_64dm>")
|
||||
"=<VSa>, <VSa>,?we,??<VS_64dm>")
|
||||
|
||||
(vec_duplicate:VSX_D
|
||||
(match_operand:<VS_scalar> 1 "splat_input_operand"
|
||||
"<VS_64reg>,Z, b, wA")))]
|
||||
|
@ -3078,8 +3079,12 @@
|
|||
lxvdsx %x0,%y1
|
||||
mtvsrdd %x0,%1,%1
|
||||
#"
|
||||
"&& reload_completed && TARGET_POWERPC64 && !TARGET_P9_VECTOR
|
||||
&& int_reg_operand (operands[1], <VS_scalar>mode)"
|
||||
"&& reload_completed
|
||||
&& !vsx_register_operand (operands[1], <VS_scalar>mode)
|
||||
&& !(MEM_P (operands[1])
|
||||
&& indexed_or_indirect_address (XEXP (operands[1], 0), Pmode))
|
||||
&& !(TARGET_POWERPC64 && TARGET_P9_VECTOR
|
||||
&& base_reg_operand (operands[1], <VS_scalar>mode))"
|
||||
[(set (match_dup 2)
|
||||
(match_dup 1))
|
||||
(set (match_dup 0)
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
2017-06-06 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
Back port from mainline
|
||||
2017-05-19 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
PR target/80718
|
||||
* gcc.target/powerpc/pr80718.c: New test.
|
||||
|
||||
2017-06-06 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||
|
||||
Backport from trunk
|
||||
|
|
|
@ -0,0 +1,298 @@
|
|||
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
|
||||
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
|
||||
/* { dg-require-effective-target powerpc_p8vector_ok } */
|
||||
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
|
||||
/* { dg-options "-mcpu=power8 -O3 -ffast-math" } */
|
||||
|
||||
/* Taken from the Spec 2006 milc brenchmark. Ultimately, GCC wants to generate
|
||||
a DF splat from offsettable memory. The register allocator decided it was
|
||||
better to do the load in the GPR registers and do a move direct, rather than
|
||||
doing a load in the VSX register sets. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
double real;
|
||||
double imag;
|
||||
} complex;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
double real;
|
||||
double imag;
|
||||
} double_complex;
|
||||
|
||||
complex cmplx (double x, double y);
|
||||
complex cadd (complex * a, complex * b);
|
||||
complex cmul (complex * a, complex * b);
|
||||
complex csub (complex * a, complex * b);
|
||||
complex cdiv (complex * a, complex * b);
|
||||
complex conjg (complex * a);
|
||||
complex ce_itheta (double theta);
|
||||
|
||||
double_complex dcmplx (double x, double y);
|
||||
double_complex dcadd (double_complex * a, double_complex * b);
|
||||
double_complex dcmul (double_complex * a, double_complex * b);
|
||||
double_complex dcsub (double_complex * a, double_complex * b);
|
||||
double_complex dcdiv (double_complex * a, double_complex * b);
|
||||
double_complex dconjg (double_complex * a);
|
||||
double_complex dcexp (double_complex * a);
|
||||
double_complex dclog (double_complex * a);
|
||||
double_complex dcsqrt (double_complex * z);
|
||||
double_complex dce_itheta (double theta);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned long r0, r1, r2, r3, r4, r5, r6;
|
||||
unsigned long multiplier, addend, ic_state;
|
||||
double scale;
|
||||
} double_prn;
|
||||
|
||||
double myrand (double_prn * prn_pt);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
complex e[3][3];
|
||||
} su3_matrix;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
complex c[3];
|
||||
} su3_vector;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
complex m01, m02, m12;
|
||||
double m00im, m11im, m22im;
|
||||
double space;
|
||||
} anti_hermitmat;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
complex e[2][2];
|
||||
} su2_matrix;
|
||||
typedef struct
|
||||
{
|
||||
su3_vector d[4];
|
||||
} wilson_vector;
|
||||
typedef struct
|
||||
{
|
||||
su3_vector h[2];
|
||||
} half_wilson_vector;
|
||||
typedef struct
|
||||
{
|
||||
wilson_vector c[3];
|
||||
} color_wilson_vector;
|
||||
typedef struct
|
||||
{
|
||||
wilson_vector d[4];
|
||||
} spin_wilson_vector;
|
||||
typedef struct
|
||||
{
|
||||
color_wilson_vector d[4];
|
||||
} wilson_matrix;
|
||||
typedef struct
|
||||
{
|
||||
spin_wilson_vector c[3];
|
||||
} wilson_propagator;
|
||||
|
||||
void mult_su3_nn (su3_matrix * a, su3_matrix * b, su3_matrix * c);
|
||||
void mult_su3_na (su3_matrix * a, su3_matrix * b, su3_matrix * c);
|
||||
void mult_su3_an (su3_matrix * a, su3_matrix * b, su3_matrix * c);
|
||||
double realtrace_su3 (su3_matrix * a, su3_matrix * b);
|
||||
complex trace_su3 (su3_matrix * a);
|
||||
complex complextrace_su3 (su3_matrix * a, su3_matrix * b);
|
||||
complex det_su3 (su3_matrix * a);
|
||||
void add_su3_matrix (su3_matrix * a, su3_matrix * b, su3_matrix * c);
|
||||
void sub_su3_matrix (su3_matrix * a, su3_matrix * b, su3_matrix * c);
|
||||
void scalar_mult_su3_matrix (su3_matrix * src, double scalar,
|
||||
su3_matrix * dest);
|
||||
void scalar_mult_add_su3_matrix (su3_matrix * src1, su3_matrix * src2,
|
||||
double scalar, su3_matrix * dest);
|
||||
void scalar_mult_sub_su3_matrix (su3_matrix * src1, su3_matrix * src2,
|
||||
double scalar, su3_matrix * dest);
|
||||
void c_scalar_mult_su3mat (su3_matrix * src, complex * scalar,
|
||||
su3_matrix * dest);
|
||||
void c_scalar_mult_add_su3mat (su3_matrix * src1, su3_matrix * src2,
|
||||
complex * scalar, su3_matrix * dest);
|
||||
void c_scalar_mult_sub_su3mat (su3_matrix * src1, su3_matrix * src2,
|
||||
complex * scalar, su3_matrix * dest);
|
||||
void su3_adjoint (su3_matrix * a, su3_matrix * b);
|
||||
void make_anti_hermitian (su3_matrix * m3, anti_hermitmat * ah3);
|
||||
void random_anti_hermitian (anti_hermitmat * mat_antihermit,
|
||||
double_prn * prn_pt);
|
||||
void uncompress_anti_hermitian (anti_hermitmat * mat_anti, su3_matrix * mat);
|
||||
void compress_anti_hermitian (su3_matrix * mat, anti_hermitmat * mat_anti);
|
||||
void clear_su3mat (su3_matrix * dest);
|
||||
void su3mat_copy (su3_matrix * a, su3_matrix * b);
|
||||
void dumpmat (su3_matrix * m);
|
||||
|
||||
void su3_projector (su3_vector * a, su3_vector * b, su3_matrix * c);
|
||||
complex su3_dot (su3_vector * a, su3_vector * b);
|
||||
double su3_rdot (su3_vector * a, su3_vector * b);
|
||||
double magsq_su3vec (su3_vector * a);
|
||||
void su3vec_copy (su3_vector * a, su3_vector * b);
|
||||
void dumpvec (su3_vector * v);
|
||||
void clearvec (su3_vector * v);
|
||||
|
||||
void mult_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c);
|
||||
void mult_su3_mat_vec_sum (su3_matrix * a, su3_vector * b, su3_vector * c);
|
||||
void mult_su3_mat_vec_sum_4dir (su3_matrix * a, su3_vector * b0,
|
||||
su3_vector * b1, su3_vector * b2,
|
||||
su3_vector * b3, su3_vector * c);
|
||||
void mult_su3_mat_vec_nsum (su3_matrix * a, su3_vector * b, su3_vector * c);
|
||||
void mult_adj_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c);
|
||||
void mult_adj_su3_mat_vec_4dir (su3_matrix * a, su3_vector * b,
|
||||
su3_vector * c);
|
||||
void mult_adj_su3_mat_4vec (su3_matrix * mat, su3_vector * src,
|
||||
su3_vector * dest0, su3_vector * dest1,
|
||||
su3_vector * dest2, su3_vector * dest3);
|
||||
void mult_adj_su3_mat_vec_sum (su3_matrix * a, su3_vector * b,
|
||||
su3_vector * c);
|
||||
void mult_adj_su3_mat_vec_nsum (su3_matrix * a, su3_vector * b,
|
||||
su3_vector * c);
|
||||
|
||||
void add_su3_vector (su3_vector * a, su3_vector * b, su3_vector * c);
|
||||
void sub_su3_vector (su3_vector * a, su3_vector * b, su3_vector * c);
|
||||
void sub_four_su3_vecs (su3_vector * a, su3_vector * b1, su3_vector * b2,
|
||||
su3_vector * b3, su3_vector * b4);
|
||||
|
||||
void scalar_mult_su3_vector (su3_vector * src, double scalar,
|
||||
su3_vector * dest);
|
||||
void scalar_mult_add_su3_vector (su3_vector * src1, su3_vector * src2,
|
||||
double scalar, su3_vector * dest);
|
||||
void scalar_mult_sum_su3_vector (su3_vector * src1, su3_vector * src2,
|
||||
double scalar);
|
||||
void scalar_mult_sub_su3_vector (su3_vector * src1, su3_vector * src2,
|
||||
double scalar, su3_vector * dest);
|
||||
void scalar_mult_wvec (wilson_vector * src, double s, wilson_vector * dest);
|
||||
void scalar_mult_hwvec (half_wilson_vector * src, double s,
|
||||
half_wilson_vector * dest);
|
||||
void scalar_mult_add_wvec (wilson_vector * src1, wilson_vector * src2,
|
||||
double scalar, wilson_vector * dest);
|
||||
void scalar_mult_addtm_wvec (wilson_vector * src1, wilson_vector * src2,
|
||||
double scalar, wilson_vector * dest);
|
||||
void c_scalar_mult_wvec (wilson_vector * src1, complex * phase,
|
||||
wilson_vector * dest);
|
||||
void c_scalar_mult_add_wvec (wilson_vector * src1, wilson_vector * src2,
|
||||
complex * phase, wilson_vector * dest);
|
||||
void c_scalar_mult_add_wvec2 (wilson_vector * src1, wilson_vector * src2,
|
||||
complex s, wilson_vector * dest);
|
||||
void c_scalar_mult_su3vec (su3_vector * src, complex * phase,
|
||||
su3_vector * dest);
|
||||
void c_scalar_mult_add_su3vec (su3_vector * v1, complex * phase,
|
||||
su3_vector * v2);
|
||||
void c_scalar_mult_sub_su3vec (su3_vector * v1, complex * phase,
|
||||
su3_vector * v2);
|
||||
|
||||
void left_su2_hit_n (su2_matrix * u, int p, int q, su3_matrix * link);
|
||||
void right_su2_hit_a (su2_matrix * u, int p, int q, su3_matrix * link);
|
||||
void dumpsu2 (su2_matrix * u);
|
||||
void mult_su2_mat_vec_elem_n (su2_matrix * u, complex * x0, complex * x1);
|
||||
void mult_su2_mat_vec_elem_a (su2_matrix * u, complex * x0, complex * x1);
|
||||
|
||||
void mult_mat_wilson_vec (su3_matrix * mat, wilson_vector * src,
|
||||
wilson_vector * dest);
|
||||
void mult_su3_mat_hwvec (su3_matrix * mat, half_wilson_vector * src,
|
||||
half_wilson_vector * dest);
|
||||
void mult_adj_mat_wilson_vec (su3_matrix * mat, wilson_vector * src,
|
||||
wilson_vector * dest);
|
||||
void mult_adj_su3_mat_hwvec (su3_matrix * mat, half_wilson_vector * src,
|
||||
half_wilson_vector * dest);
|
||||
|
||||
void add_wilson_vector (wilson_vector * src1, wilson_vector * src2,
|
||||
wilson_vector * dest);
|
||||
void sub_wilson_vector (wilson_vector * src1, wilson_vector * src2,
|
||||
wilson_vector * dest);
|
||||
double magsq_wvec (wilson_vector * src);
|
||||
complex wvec_dot (wilson_vector * src1, wilson_vector * src2);
|
||||
complex wvec2_dot (wilson_vector * src1, wilson_vector * src2);
|
||||
double wvec_rdot (wilson_vector * a, wilson_vector * b);
|
||||
|
||||
void wp_shrink (wilson_vector * src, half_wilson_vector * dest,
|
||||
int dir, int sign);
|
||||
void wp_shrink_4dir (wilson_vector * a, half_wilson_vector * b1,
|
||||
half_wilson_vector * b2, half_wilson_vector * b3,
|
||||
half_wilson_vector * b4, int sign);
|
||||
void wp_grow (half_wilson_vector * src, wilson_vector * dest,
|
||||
int dir, int sign);
|
||||
void wp_grow_add (half_wilson_vector * src, wilson_vector * dest,
|
||||
int dir, int sign);
|
||||
void grow_add_four_wvecs (wilson_vector * a, half_wilson_vector * b1,
|
||||
half_wilson_vector * b2, half_wilson_vector * b3,
|
||||
half_wilson_vector * b4, int sign, int sum);
|
||||
void mult_by_gamma (wilson_vector * src, wilson_vector * dest, int dir);
|
||||
void mult_by_gamma_left (wilson_matrix * src, wilson_matrix * dest, int dir);
|
||||
void mult_by_gamma_right (wilson_matrix * src, wilson_matrix * dest, int dir);
|
||||
void mult_swv_by_gamma_l (spin_wilson_vector * src, spin_wilson_vector * dest,
|
||||
int dir);
|
||||
void mult_swv_by_gamma_r (spin_wilson_vector * src, spin_wilson_vector * dest,
|
||||
int dir);
|
||||
void su3_projector_w (wilson_vector * a, wilson_vector * b, su3_matrix * c);
|
||||
void clear_wvec (wilson_vector * dest);
|
||||
void copy_wvec (wilson_vector * src, wilson_vector * dest);
|
||||
void dump_wilson_vec (wilson_vector * src);
|
||||
|
||||
double gaussian_rand_no (double_prn * prn_pt);
|
||||
typedef int int32type;
|
||||
typedef unsigned int u_int32type;
|
||||
void byterevn (int32type w[], int n);
|
||||
|
||||
void
|
||||
mult_adj_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c)
|
||||
{
|
||||
int i;
|
||||
register double t, ar, ai, br, bi, cr, ci;
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
ar = a->e[0][i].real;
|
||||
ai = a->e[0][i].imag;
|
||||
|
||||
br = b->c[0].real;
|
||||
bi = b->c[0].imag;
|
||||
|
||||
cr = ar * br;
|
||||
t = ai * bi;
|
||||
cr += t;
|
||||
|
||||
ci = ar * bi;
|
||||
t = ai * br;
|
||||
ci -= t;
|
||||
|
||||
ar = a->e[1][i].real;
|
||||
ai = a->e[1][i].imag;
|
||||
|
||||
br = b->c[1].real;
|
||||
bi = b->c[1].imag;
|
||||
|
||||
t = ar * br;
|
||||
cr += t;
|
||||
t = ai * bi;
|
||||
cr += t;
|
||||
|
||||
t = ar * bi;
|
||||
ci += t;
|
||||
t = ai * br;
|
||||
ci -= t;
|
||||
|
||||
ar = a->e[2][i].real;
|
||||
ai = a->e[2][i].imag;
|
||||
|
||||
br = b->c[2].real;
|
||||
bi = b->c[2].imag;
|
||||
|
||||
t = ar * br;
|
||||
cr += t;
|
||||
t = ai * bi;
|
||||
cr += t;
|
||||
|
||||
t = ar * bi;
|
||||
ci += t;
|
||||
t = ai * br;
|
||||
ci -= t;
|
||||
|
||||
c->c[i].real = cr;
|
||||
c->c[i].imag = ci;
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "mtvsrd" } } */
|
Loading…
Reference in New Issue