From a0d007d67c85ecf4030bad8a6cf95f31bc6cb656 Mon Sep 17 00:00:00 2001
From: Roger Sayle <roger@nextmovesoftware.com>
Date: Tue, 28 Jul 2020 15:55:47 +0200
Subject: [PATCH] nvptx: Support floating point reciprocal instructions

The following patch addds support for PTX's rcp.rn.f32 and rcp.rn.f64
instructions.  Note that the "rcp.rn" forms of this instruction
calculate the fully IEEE compliant result for the reciprocal, unlike
the rcp.approx variants that just provide fast approximations.

This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu
with "make" and "make check" with no new regressions.

2020-07-12  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog:

	* config/nvptx/nvptx.md (recip<mode>2): New instruction.

gcc/testsuite/ChangeLog:

	* gcc.target/nvptx/recip-1.c: New test.
---
 gcc/config/nvptx/nvptx.md                |  9 +++++++++
 gcc/testsuite/gcc.target/nvptx/recip-1.c | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/nvptx/recip-1.c
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 0538e834a4c..746d6ec4124 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -879,6 +879,15 @@
   ""
   "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
 
+(define_insn "*recip<mode>2"
+  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+	(div:SDFM
+	  (match_operand:SDFM 2 "const_double_operand" "F")
+	  (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
+  "CONST_DOUBLE_P (operands[2])
+   && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)"
+  "%.\\trcp%#%t0\\t%0, %1;")
+
 (define_insn "div<mode>3"
   [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
 	(div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
diff --git a/gcc/testsuite/gcc.target/nvptx/recip-1.c b/gcc/testsuite/gcc.target/nvptx/recip-1.c
new file mode 100644
index 00000000000..18127876046
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/recip-1.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -save-temps" } */
+
+double
+foo (double x)
+{
+  return 1.0 / x;
+}
+
+float
+foof (float x)
+{
+  return 1.0f / x;
+}
+
+/* { dg-final { scan-assembler-times "rcp.rn.f64" 1 } } */
+/* { dg-final { scan-assembler-times "rcp.rn.f32" 1 } } */
+