gas/arc: Handle multiple arc_opcode chains for same mnemonic

This commit completes support for having multiple instructions with the same mnemonic in non-contiguous blocks within the arc_opcodes table. The commit adds an iterator mechanism for the arc_opcode_hash_entry structure, which is then used in find_opcode_match to consider all arc_opcode entries with the same mnemonic, even when these instructions are stored in non-contiguous blocks. I extend the comment on the arc_opcodes table to discuss how entries within the table are organised, and to mention how instructions can be split into multiple groups if needed, but that the table is still searched in table order. There should be no user visible changes after this commit. gas/ChangeLog: * config/tc-arc.c (struct arc_opcode_hash_entry_iterator): New structure. (arc_opcode_hash_entry_iterator_init): New function. (arc_opcode_hash_entry_iterator_next): New function. (find_opcode_match): Iterate over all arc_opcode entries referenced by the arc_opcode_hash_entry passed in as a parameter. opcodes/ChangeLog: * arc-opc.c (arc_opcodes): Extend comment to discus table layout.
2016-03-28 17:08:29 +01:00 · 2016-03-28 17:08:29 +01:00 · 1328504b28
parent b9b47ab79f
commit 1328504b28
4 changed files with 104 additions and 13 deletions
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@ -1,3 +1,12 @@
+2016-04-07  Andrew Burgess  <andrew.burgess@embecosm.com>
+
+	* config/tc-arc.c (struct arc_opcode_hash_entry_iterator): New
+	structure.
+	(arc_opcode_hash_entry_iterator_init): New function.
+	(arc_opcode_hash_entry_iterator_next): New function.
+	(find_opcode_match): Iterate over all arc_opcode entries
+	referenced by the arc_opcode_hash_entry passed in as a parameter.
+
 2016-04-07  Andrew Burgess  <andrew.burgess@embecosm.com>

 	* config/tc-arc.c (arc_find_opcode): Now returns
--- a/gas/config/tc-arc.c
+++ b/gas/config/tc-arc.c
@ -318,6 +318,17 @@ struct arc_opcode_hash_entry
  const struct arc_opcode **opcode;
 };

+/* Structure used for iterating through an arc_opcode_hash_entry.  */
+struct arc_opcode_hash_entry_iterator
+{
+  /* Index into the OPCODE element of the arc_opcode_hash_entry.  */
+  size_t index;
+
+  /* The specific ARC_OPCODE from the ARC_OPCODES table that was last
+     returned by this iterator.  */
+  const struct arc_opcode *opcode;
+};
+
 /* Forward declaration.  */
 static void assemble_insn
  (const struct arc_opcode *, const expressionS *, int,
@ -577,6 +588,47 @@ arc_find_opcode (const char *name)
  return entry;
 }

+/* Initialise the iterator ITER.  */
+
+static void
+arc_opcode_hash_entry_iterator_init (struct arc_opcode_hash_entry_iterator *iter)
+{
+  iter->index = 0;
+  iter->opcode = NULL;
+}
+
+/* Return the next ARC_OPCODE from ENTRY, using ITER to hold state between
+   calls to this function.  Return NULL when all ARC_OPCODE entries have
+   been returned.  */
+
+static const struct arc_opcode *
+arc_opcode_hash_entry_iterator_next (const struct arc_opcode_hash_entry *entry,
+				     struct arc_opcode_hash_entry_iterator *iter)
+{
+  if (iter->opcode == NULL && iter->index == 0)
+    {
+      gas_assert (entry->count > 0);
+      iter->opcode = entry->opcode[iter->index];
+    }
+  else if (iter->opcode != NULL)
+    {
+      const char *old_name = iter->opcode->name;
+
+      iter->opcode++;
+      if ((iter->opcode - arc_opcodes >= (int) arc_num_opcodes)
+	  || (strcmp (old_name, iter->opcode->name) != 0))
+	{
+	  iter->index++;
+	  if (iter->index == entry->count)
+	    iter->opcode = NULL;
+	  else
+	    iter->opcode = entry->opcode[iter->index];
+	}
+    }
+
+  return iter->opcode;
+}
+
 /* Like md_number_to_chars but used for limms.  The 4-byte limm value,
   is encoded as 'middle-endian' for a little-endian target.  FIXME!
   this function is used for regular 4 byte instructions as well.  */
@ -1406,23 +1458,22 @@ find_opcode_match (const struct arc_opcode_hash_entry *entry,
 		   int nflgs,
 		   int *pcpumatch)
 {
-  const struct arc_opcode *first_opcode = entry->opcode[0];
-  const struct arc_opcode *opcode = first_opcode;
+  const struct arc_opcode *opcode;
+  struct arc_opcode_hash_entry_iterator iter;
  int ntok = *pntok;
  int got_cpu_match = 0;
  expressionS bktok[MAX_INSN_ARGS];
  int bkntok;
  expressionS emptyE;

-  gas_assert (entry->count > 0);
-  if (entry->count > 1)
-    as_fatal (_("unable to lookup `%s', too many opcode chains"),
-	      first_opcode->name);
+  arc_opcode_hash_entry_iterator_init (&iter);
  memset (&emptyE, 0, sizeof (emptyE));
  memcpy (bktok, tok, MAX_INSN_ARGS * sizeof (*tok));
  bkntok = ntok;

-  do
+  for (opcode = arc_opcode_hash_entry_iterator_next (entry, &iter);
+       opcode != NULL;
+       opcode = arc_opcode_hash_entry_iterator_next (entry, &iter))
    {
      const unsigned char *opidx;
      const unsigned char *flgidx;
@ -1743,8 +1794,6 @@ find_opcode_match (const struct arc_opcode_hash_entry *entry,
      memcpy (tok, bktok, MAX_INSN_ARGS * sizeof (*tok));
      ntok = bkntok;
    }
-  while (++opcode - arc_opcodes < (int) arc_num_opcodes
-	 && !strcmp (opcode->name, first_opcode->name));

  if (*pcpumatch)
    *pcpumatch = got_cpu_match;
@ -2054,9 +2103,8 @@ assemble_tokens (const char *opname,
    {
      const struct arc_opcode *opcode;

-      pr_debug ("%s:%d: assemble_tokens: %s trying opcode 0x%08X\n",
-		frag_now->fr_file, frag_now->fr_line, opcode->name,
-		opcode->opcode);
+      pr_debug ("%s:%d: assemble_tokens: %s\n",
+		frag_now->fr_file, frag_now->fr_line, opname);
      found_something = TRUE;
      opcode = find_opcode_match (entry, tok, &ntok, pflags,
 				  nflgs, &cpumatch);
--- a/opcodes/ChangeLog
+++ b/opcodes/ChangeLog
@ -1,3 +1,7 @@
+2016-04-07  Andrew Burgess  <andrew.burgess@embecosm.com>
+
+	* arc-opc.c (arc_opcodes): Extend comment to discus table layout.
+
 2016-04-05  Andrew Burgess  <andrew.burgess@embecosm.com>

 	* arc-nps400-tbl.h: Add movbi, decode1, fbset, fbclear, encode0,
--- a/opcodes/arc-opc.c
+++ b/opcodes/arc-opc.c
@ -1453,7 +1453,37 @@ const unsigned arc_NToperand = FKT_NT;

   The format of the opcode table is:

-   NAME OPCODE MASK CPU CLASS SUBCLASS { OPERANDS } { FLAGS }.  */
+   NAME OPCODE MASK CPU CLASS SUBCLASS { OPERANDS } { FLAGS }.
+
+   The table is organised such that, where possible, all instructions with
+   the same mnemonic are together in a block.  When the assembler searches
+   for a suitable instruction the entries are checked in table order, so
+   more specific, or specialised cases should appear earlier in the table.
+
+   As an example, consider two instructions 'add a,b,u6' and 'add
+   a,b,limm'.  The first takes a 6-bit immediate that is encoded within the
+   32-bit instruction, while the second takes a 32-bit immediate that is
+   encoded in a follow-on 32-bit, making the total instruction length
+   64-bits.  In this case the u6 variant must appear first in the table, as
+   all u6 immediates could also be encoded using the 'limm' extension,
+   however, we want to use the shorter instruction wherever possible.
+
+   It is possible though to split instructions with the same mnemonic into
+   multiple groups.  However, the instructions are still checked in table
+   order, even across groups.  The only time that instructions with the
+   same mnemonic should be split into different groups is when different
+   variants of the instruction appear in different architectures, in which
+   case, grouping all instructions from a particular architecture together
+   might be preferable to merging the instruction into the main instruction
+   table.
+
+   An example of this split instruction groups can be found with the 'sync'
+   instruction.  The core arc architecture provides a 'sync' instruction,
+   while the nps instruction set extension provides 'sync.rd' and
+   'sync.wr'.  The rd/wr flags are instruction flags, not part of the
+   mnemonic, so we end up with two groups for the sync instruction, the
+   first within the core arc instruction table, and the second within the
+   nps extension instructions.  */
 const struct arc_opcode arc_opcodes[] =
 {
 #include "arc-tbl.h"