re PR target/14471 (Workaround for errata regarding multiplication bug)

PR target/14471 * configure.ac (Target-specific assembler checks) <cris-*-*>: New case, checking for -no-mul-bug-abort option. * configure, config.in: Regenerate. * doc/invoke.texi (CRIS Options): Document -mmul-bug-workaround and -mno-mul-bug-workaround. * config/cris/cris.md ("smulsi3_highpart", "umulsi3_highpart") ("mulsidi3", "umulsidi3"): Prefix output template with "%!". ("umulhisi3", "umulqihi3", "mulsi3", "mulqihi3", "mulhisi3"): Ditto. Make attribute "slottable" dependent on TARGET_MUL_BUG. * config/cris/mulsi3.asm (__Mul) [__CRIS_arch_version >= 10]: Make sure mulu.d is not last on cache-line. * config/cris/cris.h (ASM_SPEC): Translate -mno-mul-bug-workaround into -no-mul-bug-abort depending on HAVE_AS_MUL_BUG_ABORT_OPTION. (TARGET_MASK_MUL_BUG, TARGET_MUL_BUG): New macros. (TARGET_SWITCHES): New options -mmul-bug-workaround and -mno-mul-bug-workaround. (TARGET_DEFAULT): Include TARGET_MASK_MUL_BUG. (PRINT_OPERAND_PUNCT_VALID_P): Include '!'. * config/cris/cris.c (cris_operand_extend_operator): Clarify relation to MULT in head comment. (cris_op_str): Abort for MULT. (cris_print_operand) <case '!'>: New case. From-SVN: r79085
2004-03-08 00:32:37 +00:00 · 2004-03-08 00:32:37 +00:00 · 86da66b593
parent f5fb3886b2
commit 86da66b593
8 changed files with 1318 additions and 498 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,29 @@
+2004-03-08  Hans-Peter Nilsson  <hp@axis.com>
+
+	PR target/14471
+	* configure.ac (Target-specific assembler checks) <cris-*-*>: New
+	case, checking for -no-mul-bug-abort option.
+	* configure, config.in: Regenerate.
+	* doc/invoke.texi (CRIS Options): Document -mmul-bug-workaround
+	and -mno-mul-bug-workaround.
+	* config/cris/cris.md ("smulsi3_highpart", "umulsi3_highpart")
+	("mulsidi3", "umulsidi3"): Prefix output template with "%!".
+	("umulhisi3", "umulqihi3", "mulsi3", "mulqihi3", "mulhisi3"):
+	Ditto.  Make attribute "slottable" dependent on TARGET_MUL_BUG.
+	* config/cris/mulsi3.asm (__Mul) [__CRIS_arch_version >= 10]: Make
+	sure mulu.d is not last on cache-line.
+	* config/cris/cris.h (ASM_SPEC): Translate -mno-mul-bug-workaround
+	into -no-mul-bug-abort depending on HAVE_AS_MUL_BUG_ABORT_OPTION.
+	(TARGET_MASK_MUL_BUG, TARGET_MUL_BUG): New macros.
+	(TARGET_SWITCHES): New options -mmul-bug-workaround and
+	-mno-mul-bug-workaround.
+	(TARGET_DEFAULT): Include TARGET_MASK_MUL_BUG.
+	(PRINT_OPERAND_PUNCT_VALID_P): Include '!'.
+	* config/cris/cris.c (cris_operand_extend_operator): Clarify
+	relation to MULT in head comment.
+	(cris_op_str): Abort for MULT.
+	(cris_print_operand) <case '!'>: New case.
+
 2004-03-08  Alan Modra  <amodra@bigpond.net.au>

 	PR debug/11983
--- a/gcc/config.in
+++ b/gcc/config.in
@ -106,6 +106,9 @@
 /* Define if your assembler supports mfcr field. */
 #undef HAVE_AS_MFCRF

+/* Define if your assembler supports the -no-mul-bug-abort option. */
+#undef HAVE_AS_NO_MUL_BUG_ABORT_OPTION
+
 /* Define if your assembler supports offsetable %lo(). */
 #undef HAVE_AS_OFFSETABLE_LO10

@ -543,9 +546,11 @@
 /* Define to `int' if <sys/types.h> doesn't define. */
 #undef gid_t

-/* Define as `__inline' if that's what the C compiler calls it, or to nothing
-   if it is not supported. */
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
 #undef inline
+#endif

 /* Define to `int' if <sys/types.h> does not define. */
 #undef pid_t
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@ -332,7 +332,10 @@ cris_commutative_orth_op (rtx x, enum machine_mode mode)
 	   || code == IOR || code == AND || code == UMIN));
 }

-/* Check if MODE is same as mode for X, and X is PLUS or MINUS or UMIN.  */
+/* Check if MODE is same as mode for X, and X is PLUS or MINUS or UMIN.
+   By the name, you might think we should include MULT.  We don't because
+   it doesn't accept the same addressing modes as the others (ony
+   registers) and there's also the problem of handling TARGET_MUL_BUG.  */

 int
 cris_operand_extend_operator (rtx x, enum machine_mode mode)
@ -499,7 +502,11 @@ cris_op_str (rtx x)
      break;

    case MULT:
-      return "mul";
+      /* This function is for retrieving a part of an instruction name for
+	 an operator, for immediate output.  If that ever happens for
+	 MULT, we need to apply TARGET_MUL_BUG in the caller.  Make sure
+	 we notice.  */
+      abort ();
      break;

    case DIV:
@ -1392,6 +1399,23 @@ cris_print_operand (FILE *file, rtx x, int code)
 	fputs ("\n\tnop", file);
      return;

+    case '!':
+      /* Output directive for alignment padded with "nop" insns.
+	 Optimizing for size, it's plain 4-byte alignment, otherwise we
+	 align the section to a cache-line (32 bytes) and skip at max 2
+	 bytes, i.e. we skip if it's the last insn on a cache-line.  The
+	 latter is faster by a small amount (for two test-programs 99.6%
+	 and 99.9%) and larger by a small amount (ditto 100.1% and
+	 100.2%).  This is supposed to be the simplest yet performance-
+	 wise least intrusive way to make sure the immediately following
+	 (supposed) muls/mulu insn isn't located at the end of a
+	 cache-line.  */
+      if (TARGET_MUL_BUG)
+	fputs (optimize_size
+	       ? ".p2alignw 2,0x050f\n\t"
+	       : ".p2alignw 5,0x050f,2\n\t", file);
+      return;
+
    case 'H':
      /* Print high (most significant) part of something.  */
      switch (GET_CODE (operand))
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@ -171,9 +171,17 @@ extern const char *cris_elinux_stacksize_str;
   %{!melinux:%{!maout|melf:%{!fno-vtable-gc:-fvtable-gc}}}}}".  */
 #define CC1PLUS_SPEC ""

+#ifdef HAVE_AS_MUL_BUG_ABORT_OPTION
+#define MAYBE_AS_NO_MUL_BUG_ABORT \
+ "%{mno-mul-bug-workaround:-no-mul-bug-abort} "
+#else
+#define MAYBE_AS_NO_MUL_BUG_ABORT
+#endif
+
 /* Override previous definitions (linux.h).  */
 #undef ASM_SPEC
 #define ASM_SPEC \
+ MAYBE_AS_NO_MUL_BUG_ABORT \
 "%{v:-v}\
  %(asm_subtarget)"

@ -326,8 +334,34 @@ extern int target_flags;
 #define TARGET_MASK_AVOID_GOTPLT 8192
 #define TARGET_AVOID_GOTPLT (target_flags & TARGET_MASK_AVOID_GOTPLT)

+/* Whether or not to work around multiplication instruction hardware bug
+   when generating code for models where it may be present.  From the
+   trouble report for Etrax 100 LX: "A multiply operation may cause
+   incorrect cache behaviour under some specific circumstances. The
+   problem can occur if the instruction following the multiply instruction
+   causes a cache miss, and multiply operand 1 (source operand) bits
+   [31:27] matches the logical mapping of the mode register address
+   (0xb0....), and bits [9:2] of operand 1 matches the TLB register
+   address (0x258-0x25f).  There is such a mapping in kernel mode or when
+   the MMU is off.  Normally there is no such mapping in user mode, and
+   the problem will therefore probably not occur in Linux user mode
+   programs."
+
+   We have no sure-fire way to know from within GCC that we're compiling a
+   user program.  For example, -fpic/PIC is used in libgcc which is linked
+   into the kernel.  However, the workaround option -mno-mul-bug can be
+   safely used per-package when compiling programs.  The same goes for
+   general user-only libraries such as glibc, since there's no user-space
+   driver-like program that gets a mapping of I/O registers (all on the
+   same page, including the TLB registers).  */
+#define TARGET_MASK_MUL_BUG 16384
+#define TARGET_MUL_BUG (target_flags & TARGET_MASK_MUL_BUG)
+
 #define TARGET_SWITCHES							\
 {									\
+  {"mul-bug-workaround",		 TARGET_MASK_MUL_BUG,		\
+   N_("Work around bug in multiplication instruction")},		\
+  {"no-mul-bug-workaround",		-TARGET_MASK_MUL_BUG, ""},	\
  /* No "no-etrax" as it does not really imply any model.		\
     On the other hand, "etrax" implies the common (and large)		\
     subset matching all models.  */					\
@ -405,7 +439,7 @@ extern int target_flags;
 # define TARGET_DEFAULT \
 (TARGET_MASK_SIDE_EFFECT_PREFIXES + TARGET_MASK_STACK_ALIGN \
  + TARGET_MASK_CONST_ALIGN + TARGET_MASK_DATA_ALIGN \
-  + TARGET_MASK_PROLOGUE_EPILOGUE)
+  + TARGET_MASK_PROLOGUE_EPILOGUE + TARGET_MASK_MUL_BUG)
 #endif

 /* For the cris-*-elf subtarget.  */
@ -1474,7 +1508,8 @@ call_ ## FUNC (void)						\
 cris_print_operand (FILE, X, CODE)

 /* For delay-slot handling.  */
-#define PRINT_OPERAND_PUNCT_VALID_P(CODE) (CODE == '#')
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE)	\
+ ((CODE) == '#' || (CODE) == '!')

 #define PRINT_OPERAND_ADDRESS(FILE, ADDR)	\
   cris_print_operand_address (FILE, ADDR)
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@ -2469,8 +2469,11 @@
 	 (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))
 	 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
  "TARGET_HAS_MUL_INSNS"
-  "mulu.w %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!mulu.w %2,%0"
+  [(set (attr "slottable")
+	(if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+		      (const_string "no")
+		      (const_string "yes")))
   ;; Just N unusable here, but let's be safe.
   (set_attr "cc" "clobber")])

@ -2480,8 +2483,11 @@
 	 (zero_extend:HI (match_operand:QI 1 "register_operand" "0"))
 	 (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
  "TARGET_HAS_MUL_INSNS"
-  "mulu.b %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!mulu.b %2,%0"
+  [(set (attr "slottable")
+	(if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+		      (const_string "no")
+		      (const_string "yes")))
   ;; Not exactly sure, but let's be safe.
   (set_attr "cc" "clobber")])

@ -2496,8 +2502,11 @@
 	(mult:SI (match_operand:SI 1 "register_operand" "0")
 		 (match_operand:SI 2 "register_operand" "r")))]
  "TARGET_HAS_MUL_INSNS"
-  "muls.d %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!muls.d %2,%0"
+  [(set (attr "slottable")
+	(if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+		      (const_string "no")
+		      (const_string "yes")))
   ;; Just N unusable here, but let's be safe.
   (set_attr "cc" "clobber")])

@ -2511,8 +2520,11 @@
 	 (sign_extend:HI (match_operand:QI 1 "register_operand" "0"))
 	 (sign_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
  "TARGET_HAS_MUL_INSNS"
-  "muls.b %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!muls.b %2,%0"
+  [(set (attr "slottable")
+	(if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+		      (const_string "no")
+		      (const_string "yes")))
   (set_attr "cc" "clobber")])

 (define_insn "mulhisi3"
@ -2521,8 +2533,11 @@
 	 (sign_extend:SI (match_operand:HI 1 "register_operand" "0"))
 	 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
  "TARGET_HAS_MUL_INSNS"
-  "muls.w %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!muls.w %2,%0"
+  [(set (attr "slottable")
+	(if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+		      (const_string "no")
+		      (const_string "yes")))
   ;; Just N unusable here, but let's be safe.
   (set_attr "cc" "clobber")])

@ -2538,7 +2553,7 @@
 	 (sign_extend:DI (match_operand:SI 1 "register_operand" "0"))
 	 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
  "TARGET_HAS_MUL_INSNS"
-  "muls.d %2,%M0\;move $mof,%H0")
+  "%!muls.d %2,%M0\;move $mof,%H0")

 (define_insn "umulsidi3"
  [(set (match_operand:DI 0 "register_operand" "=r")
@ -2546,7 +2561,7 @@
 	 (zero_extend:DI (match_operand:SI 1 "register_operand" "0"))
 	 (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
  "TARGET_HAS_MUL_INSNS"
-  "mulu.d %2,%M0\;move $mof,%H0")
+  "%!mulu.d %2,%M0\;move $mof,%H0")

 ;; This pattern would probably not be needed if we add "mof" in its own
 ;; register class (and open a can of worms about /not/ pairing it with a
@ -2565,7 +2580,7 @@
 	  (const_int 32))))
   (clobber (match_scratch:SI 3 "=X,1,1"))]
  "TARGET_HAS_MUL_INSNS"
-  "muls.d %2,%1\;move $mof,%0"
+  "%!muls.d %2,%1\;move $mof,%0"
  [(set_attr "cc" "clobber")])

 (define_insn "umulsi3_highpart"
@ -2578,7 +2593,7 @@
 	  (const_int 32))))
   (clobber (match_scratch:SI 3 "=X,1,1"))]
  "TARGET_HAS_MUL_INSNS"
-  "mulu.d %2,%1\;move $mof,%0"
+  "%!mulu.d %2,%1\;move $mof,%0"
  [(set_attr "cc" "clobber")])

 ;; Divide and modulus instructions.  CRIS only has a step instruction.
--- a/gcc/config/cris/mulsi3.asm
+++ b/gcc/config/cris/mulsi3.asm
@ -82,8 +82,13 @@
 	.type	___Mul,@function
 ___Mul:
 #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
-	ret
+;; Can't have the mulu.d last on a cache-line (in the delay-slot of the
+;; "ret"), due to hardware bug.  See documentation for -mmul-bug-workaround.
+;; Not worthwhile to conditionalize here.
+	.p2alignw 2,0x050f
 	mulu.d $r11,$r10
+	ret
+	nop
 #else
 	move.d $r10,$r12
 	move.d $r11,$r9
--- a/gcc/configure
+++ b/gcc/configure
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@ -608,7 +608,8 @@ in the following sections.
 -metrax4  -metrax100  -mpdebug  -mcc-init  -mno-side-effects @gol
 -mstack-align  -mdata-align  -mconst-align @gol
 -m32-bit  -m16-bit  -m8-bit  -mno-prologue-epilogue  -mno-gotplt @gol
-melf  -maout  -melinux  -mlinux  -sim  -sim2}
+-melf  -maout  -melinux  -mlinux  -sim  -sim2 @gol
+-mmul-bug-workaround  -mno-mul-bug-workaround}

@emph{PDP-11 Options}
@gccoptlist{-mfpu  -msoft-float  -mac0  -mno-ac0  -m40  -m45  -m10 @gol
@ -10107,6 +10108,13 @@ program should be set to @var{n} bytes.
 The options @option{-metrax4} and @option{-metrax100} are synonyms for
@option{-march=v3} and @option{-march=v8} respectively.

+@item -mmul-bug-workaround
+@itemx -mno-mul-bug-workaround
+@opindex mmul-bug-workaround
+@opindex mno-mul-bug-workaround
+Work around a bug in the @code{muls} and @code{mulu} instructions for CPU
+models where it applies.  This option is active by default.
+
@item -mpdebug
@opindex mpdebug
 Enable CRIS-specific verbose debug-related information in the assembly