From 1ee488391c01524d409dfe02f6c9d4b53d6f451f Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 27 Oct 2011 21:53:28 +0200 Subject: [PATCH] sse.md (avx_cvtpd2dq256_2, [...]): New expanders. * config/i386/sse.md (avx_cvtpd2dq256_2, avx_cvttpd2dq256_2, vec_pack_sfix_trunc_v4df, vec_pack_sfix_v4df): New expanders. (*avx_cvtpd2dq256_2, *avx_cvttpd2dq256_2): New insns. * gcc.target/i386/sse2-cvt-1.c: New test. * gcc.target/i386/sse2-cvt-2.c: New test. * gcc.target/i386/avx-cvt-1.c: New test. * gcc.target/i386/avx-cvt-2.c: New test. * gcc.target/i386/avx2-cvt-1.c: New test. * gcc.target/i386/avx2-cvt-2.c: New test. From-SVN: r180580 --- gcc/ChangeLog | 4 + gcc/config/i386/sse.md | 74 ++++++++++++++ gcc/testsuite/ChangeLog | 9 ++ gcc/testsuite/gcc.target/i386/avx-cvt-1.c | 13 +++ gcc/testsuite/gcc.target/i386/avx-cvt-2.c | 13 +++ gcc/testsuite/gcc.target/i386/avx2-cvt-1.c | 13 +++ gcc/testsuite/gcc.target/i386/avx2-cvt-2.c | 13 +++ gcc/testsuite/gcc.target/i386/sse2-cvt-1.c | 111 +++++++++++++++++++++ gcc/testsuite/gcc.target/i386/sse2-cvt-2.c | 13 +++ 9 files changed, 263 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx-cvt-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-cvt-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-cvt-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-cvt-2.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-cvt-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-cvt-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e64511d7eb0..403fb604d28 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,9 @@ 2011-10-27 Jakub Jelinek + * config/i386/sse.md (avx_cvtpd2dq256_2, avx_cvttpd2dq256_2, + vec_pack_sfix_trunc_v4df, vec_pack_sfix_v4df): New expanders. + (*avx_cvtpd2dq256_2, *avx_cvttpd2dq256_2): New insns. + * config/i386/i386.c (ix86_print_operand): Handle 'q' and 'x' overrides for -masm=intel memory. * config/i386/sse.md (sse2_cvtdq2pd, sse2_cvtps2pd, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 25854e4ca7b..638be461312 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2544,6 +2544,27 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) +(define_expand "avx_cvtpd2dq256_2" + [(set (match_operand:V8SI 0 "register_operand" "") + (vec_concat:V8SI + (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")] + UNSPEC_FIX_NOTRUNC) + (match_dup 2)))] + "TARGET_AVX" + "operands[2] = CONST0_RTX (V4SImode);") + +(define_insn "*avx_cvtpd2dq256_2" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (vec_concat:V8SI + (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC) + (match_operand:V4SI 2 "const0_operand" "")))] + "TARGET_AVX" + "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_expand "sse2_cvtpd2dq" [(set (match_operand:V4SI 0 "register_operand" "") (vec_concat:V4SI @@ -2584,6 +2605,25 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) +(define_expand "avx_cvttpd2dq256_2" + [(set (match_operand:V8SI 0 "register_operand" "") + (vec_concat:V8SI + (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "")) + (match_dup 2)))] + "TARGET_AVX" + "operands[2] = CONST0_RTX (V4SImode);") + +(define_insn "*avx_cvttpd2dq256_2" + [(set (match_operand:V8SI 0 "register_operand" "=x") + (vec_concat:V8SI + (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")) + (match_operand:V4SI 2 "const0_operand" "")))] + "TARGET_AVX" + "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_expand "sse2_cvttpd2dq" [(set (match_operand:V4SI 0 "register_operand" "") (vec_concat:V4SI @@ -3027,6 +3067,23 @@ DONE; }) +(define_expand "vec_pack_sfix_trunc_v4df" + [(match_operand:V8SI 0 "register_operand" "") + (match_operand:V4DF 1 "nonimmediate_operand" "") + (match_operand:V4DF 2 "nonimmediate_operand" "")] + "TARGET_AVX" +{ + rtx r1, r2; + + r1 = gen_reg_rtx (V8SImode); + r2 = gen_reg_rtx (V8SImode); + + emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1])); + emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2])); + emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20))); + DONE; +}) + (define_expand "vec_pack_sfix_trunc_v2df" [(match_operand:V4SI 0 "register_operand" "") (match_operand:V2DF 1 "nonimmediate_operand" "") @@ -3046,6 +3103,23 @@ DONE; }) +(define_expand "vec_pack_sfix_v4df" + [(match_operand:V8SI 0 "register_operand" "") + (match_operand:V4DF 1 "nonimmediate_operand" "") + (match_operand:V4DF 2 "nonimmediate_operand" "")] + "TARGET_AVX" +{ + rtx r1, r2; + + r1 = gen_reg_rtx (V8SImode); + r2 = gen_reg_rtx (V8SImode); + + emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1])); + emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2])); + emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20))); + DONE; +}) + (define_expand "vec_pack_sfix_v2df" [(match_operand:V4SI 0 "register_operand" "") (match_operand:V2DF 1 "nonimmediate_operand" "") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 45fa9a459c9..1f6f0277163 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2011-10-27 Jakub Jelinek + + * gcc.target/i386/sse2-cvt-1.c: New test. + * gcc.target/i386/sse2-cvt-2.c: New test. + * gcc.target/i386/avx-cvt-1.c: New test. + * gcc.target/i386/avx-cvt-2.c: New test. + * gcc.target/i386/avx2-cvt-1.c: New test. + * gcc.target/i386/avx2-cvt-2.c: New test. + 2011-10-27 Martin Jambor * g++.dg/tree-ssa/pr45605.C: Scan fre1 dump and unxfail. diff --git a/gcc/testsuite/gcc.target/i386/avx-cvt-1.c b/gcc/testsuite/gcc.target/i386/avx-cvt-1.c new file mode 100644 index 00000000000..ce651649d64 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-cvt-1.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mavx -mno-avx2" } */ +/* { dg-require-effective-target avx_runtime } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include "sse2-cvt-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-cvt-2.c b/gcc/testsuite/gcc.target/i386/avx-cvt-2.c new file mode 100644 index 00000000000..78c6398f341 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-cvt-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx -mno-avx2 -fdump-tree-vect-details" } */ + +#include "avx-cvt-1.c" + +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 6 "vect" } } */ +/* { dg-final { scan-assembler "vcvttpd2dq(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */ +/* { dg-final { scan-assembler "vcvtdq2ps\[^\n\r\]*xmm" } } */ +/* { dg-final { scan-assembler "vcvtps2pd\[^\n\r\]*(%xmm\[^\n\r\]*%ymm|ymm\[^\n\r\]*xmm)" } } */ +/* { dg-final { scan-assembler "vcvttps2dq\[^\n\r\]*ymm" } } */ +/* { dg-final { scan-assembler "vcvtdq2pd\[^\n\r\]*xmm\[^\n\r\]*xmm" } } */ +/* { dg-final { scan-assembler "vcvtpd2ps(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx2-cvt-1.c b/gcc/testsuite/gcc.target/i386/avx2-cvt-1.c new file mode 100644 index 00000000000..9626a0666d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-cvt-1.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mavx2" } */ +/* { dg-require-effective-target avx2 } */ + +#ifndef CHECK_H +#define CHECK_H "avx2-check.h" +#endif + +#ifndef TEST +#define TEST avx2_test +#endif + +#include "sse2-cvt-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx2-cvt-2.c b/gcc/testsuite/gcc.target/i386/avx2-cvt-2.c new file mode 100644 index 00000000000..288e5601a46 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-cvt-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */ + +#include "avx2-cvt-1.c" + +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 6 "vect" } } */ +/* { dg-final { scan-assembler "vcvttpd2dq(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */ +/* { dg-final { scan-assembler "vcvtdq2ps\[^\n\r\]*ymm" } } */ +/* { dg-final { scan-assembler "vcvtps2pd\[^\n\r\]*(%xmm\[^\n\r\]*%ymm|ymm\[^\n\r\]*xmm)" } } */ +/* { dg-final { scan-assembler "vcvttps2dq\[^\n\r\]*ymm" } } */ +/* { dg-final { scan-assembler "vcvtdq2pd\[^\n\r\]*(%xmm\[^\n\r\]*%ymm|ymm\[^\n\r\]*xmm)" } } */ +/* { dg-final { scan-assembler "vcvtpd2ps(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse2-cvt-1.c b/gcc/testsuite/gcc.target/i386/sse2-cvt-1.c new file mode 100644 index 00000000000..4d5683108dc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-cvt-1.c @@ -0,0 +1,111 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -msse2 -mno-avx" } */ +/* { dg-require-effective-target sse2 } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#ifndef TEST +#define TEST sse2_test +#endif + +#include CHECK_H + +#define N 16 +float f[N]; +double d[N]; +int n[N]; + +__attribute__((noinline)) void +f1 (void) +{ + int i; + for (i = 0; i < N; i++) + n[i] = d[i]; +} + +__attribute__((noinline)) void +f2 (void) +{ + int i; + for (i = 0; i < N; i++) + f[i] = n[i]; +} + +__attribute__((noinline)) void +f3 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = f[i]; +} + +__attribute__((noinline)) void +f4 (void) +{ + int i; + for (i = 0; i < N; i++) + n[i] = f[i]; +} + +__attribute__((noinline)) void +f5 (void) +{ + int i; + for (i = 0; i < N; i++) + d[i] = n[i]; +} + +__attribute__((noinline)) void +f6 (void) +{ + int i; + for (i = 0; i < N; i++) + f[i] = d[i]; +} + +static void +TEST () +{ + int i; + for (i = 0; i < N; i++) + { + asm (""); + d[i] = i + 2.5; + } + f1 (); + for (i = 0; i < N; i++) + if (n[i] != i + 2) + abort (); + else + n[i] = i + 7; + f2 (); + for (i = 0; i < N; i++) + if (f[i] != i + 7) + abort (); + else + f[i] = i - 2.25f; + f3 (); + for (i = 0; i < N; i++) + if (d[i] != i - 2.25) + abort (); + else + f[i] = i + 3.5; + f4 (); + for (i = 0; i < N; i++) + if (n[i] != i + 3) + abort (); + else + n[i] = i + 9; + f5 (); + for (i = 0; i < N; i++) + if (d[i] != i + 9) + abort (); + else + d[i] = i - 7.25; + f6 (); + for (i = 0; i < N; i++) + if (f[i] != i - 7.25) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-cvt-2.c b/gcc/testsuite/gcc.target/i386/sse2-cvt-2.c new file mode 100644 index 00000000000..9c4519544ca --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-cvt-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -msse2 -mno-sse3 -fdump-tree-vect-details" } */ + +#include "sse2-cvt-1.c" + +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 6 "vect" } } */ +/* { dg-final { scan-assembler "cvttpd2dq" } } */ +/* { dg-final { scan-assembler "cvtdq2ps" } } */ +/* { dg-final { scan-assembler "cvtps2pd" } } */ +/* { dg-final { scan-assembler "cvttps2dq" } } */ +/* { dg-final { scan-assembler "cvtdq2pd" } } */ +/* { dg-final { scan-assembler "cvtpd2ps" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */