diff --git a/gas/ChangeLog b/gas/ChangeLog
index f15549d818..dc7cb01582 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,3 +1,12 @@
+2016-04-07  Andrew Burgess  <andrew.burgess@embecosm.com>
+
+	* config/tc-arc.c (struct arc_opcode_hash_entry_iterator): New
+	structure.
+	(arc_opcode_hash_entry_iterator_init): New function.
+	(arc_opcode_hash_entry_iterator_next): New function.
+	(find_opcode_match): Iterate over all arc_opcode entries
+	referenced by the arc_opcode_hash_entry passed in as a parameter.
+
 2016-04-07  Andrew Burgess  <andrew.burgess@embecosm.com>
 
 	* config/tc-arc.c (arc_find_opcode): Now returns
diff --git a/gas/config/tc-arc.c b/gas/config/tc-arc.c
index 4c5af3f85f..1b70e03b3b 100644
--- a/gas/config/tc-arc.c
+++ b/gas/config/tc-arc.c
@@ -318,6 +318,17 @@ struct arc_opcode_hash_entry
   const struct arc_opcode **opcode;
 };
 
+/* Structure used for iterating through an arc_opcode_hash_entry.  */
+struct arc_opcode_hash_entry_iterator
+{
+  /* Index into the OPCODE element of the arc_opcode_hash_entry.  */
+  size_t index;
+
+  /* The specific ARC_OPCODE from the ARC_OPCODES table that was last
+     returned by this iterator.  */
+  const struct arc_opcode *opcode;
+};
+
 /* Forward declaration.  */
 static void assemble_insn
   (const struct arc_opcode *, const expressionS *, int,
@@ -577,6 +588,47 @@ arc_find_opcode (const char *name)
   return entry;
 }
 
+/* Initialise the iterator ITER.  */
+
+static void
+arc_opcode_hash_entry_iterator_init (struct arc_opcode_hash_entry_iterator *iter)
+{
+  iter->index = 0;
+  iter->opcode = NULL;
+}
+
+/* Return the next ARC_OPCODE from ENTRY, using ITER to hold state between
+   calls to this function.  Return NULL when all ARC_OPCODE entries have
+   been returned.  */
+
+static const struct arc_opcode *
+arc_opcode_hash_entry_iterator_next (const struct arc_opcode_hash_entry *entry,
+				     struct arc_opcode_hash_entry_iterator *iter)
+{
+  if (iter->opcode == NULL && iter->index == 0)
+    {
+      gas_assert (entry->count > 0);
+      iter->opcode = entry->opcode[iter->index];
+    }
+  else if (iter->opcode != NULL)
+    {
+      const char *old_name = iter->opcode->name;
+
+      iter->opcode++;
+      if ((iter->opcode - arc_opcodes >= (int) arc_num_opcodes)
+	  || (strcmp (old_name, iter->opcode->name) != 0))
+	{
+	  iter->index++;
+	  if (iter->index == entry->count)
+	    iter->opcode = NULL;
+	  else
+	    iter->opcode = entry->opcode[iter->index];
+	}
+    }
+
+  return iter->opcode;
+}
+
 /* Like md_number_to_chars but used for limms.  The 4-byte limm value,
    is encoded as 'middle-endian' for a little-endian target.  FIXME!
    this function is used for regular 4 byte instructions as well.  */
@@ -1406,23 +1458,22 @@ find_opcode_match (const struct arc_opcode_hash_entry *entry,
 		   int nflgs,
 		   int *pcpumatch)
 {
-  const struct arc_opcode *first_opcode = entry->opcode[0];
-  const struct arc_opcode *opcode = first_opcode;
+  const struct arc_opcode *opcode;
+  struct arc_opcode_hash_entry_iterator iter;
   int ntok = *pntok;
   int got_cpu_match = 0;
   expressionS bktok[MAX_INSN_ARGS];
   int bkntok;
   expressionS emptyE;
 
-  gas_assert (entry->count > 0);
-  if (entry->count > 1)
-    as_fatal (_("unable to lookup `%s', too many opcode chains"),
-	      first_opcode->name);
+  arc_opcode_hash_entry_iterator_init (&iter);
   memset (&emptyE, 0, sizeof (emptyE));
   memcpy (bktok, tok, MAX_INSN_ARGS * sizeof (*tok));
   bkntok = ntok;
 
-  do
+  for (opcode = arc_opcode_hash_entry_iterator_next (entry, &iter);
+       opcode != NULL;
+       opcode = arc_opcode_hash_entry_iterator_next (entry, &iter))
     {
       const unsigned char *opidx;
       const unsigned char *flgidx;
@@ -1743,8 +1794,6 @@ find_opcode_match (const struct arc_opcode_hash_entry *entry,
       memcpy (tok, bktok, MAX_INSN_ARGS * sizeof (*tok));
       ntok = bkntok;
     }
-  while (++opcode - arc_opcodes < (int) arc_num_opcodes
-	 && !strcmp (opcode->name, first_opcode->name));
 
   if (*pcpumatch)
     *pcpumatch = got_cpu_match;
@@ -2054,9 +2103,8 @@ assemble_tokens (const char *opname,
     {
       const struct arc_opcode *opcode;
 
-      pr_debug ("%s:%d: assemble_tokens: %s trying opcode 0x%08X\n",
-		frag_now->fr_file, frag_now->fr_line, opcode->name,
-		opcode->opcode);
+      pr_debug ("%s:%d: assemble_tokens: %s\n",
+		frag_now->fr_file, frag_now->fr_line, opname);
       found_something = TRUE;
       opcode = find_opcode_match (entry, tok, &ntok, pflags,
 				  nflgs, &cpumatch);
diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog
index 4ff1366984..294a688f5f 100644
--- a/opcodes/ChangeLog
+++ b/opcodes/ChangeLog
@@ -1,3 +1,7 @@
+2016-04-07  Andrew Burgess  <andrew.burgess@embecosm.com>
+
+	* arc-opc.c (arc_opcodes): Extend comment to discus table layout.
+
 2016-04-05  Andrew Burgess  <andrew.burgess@embecosm.com>
 
 	* arc-nps400-tbl.h: Add movbi, decode1, fbset, fbclear, encode0,
diff --git a/opcodes/arc-opc.c b/opcodes/arc-opc.c
index f182318865..69c65fc017 100644
--- a/opcodes/arc-opc.c
+++ b/opcodes/arc-opc.c
@@ -1453,7 +1453,37 @@ const unsigned arc_NToperand = FKT_NT;
 
    The format of the opcode table is:
 
-   NAME OPCODE MASK CPU CLASS SUBCLASS { OPERANDS } { FLAGS }.  */
+   NAME OPCODE MASK CPU CLASS SUBCLASS { OPERANDS } { FLAGS }.
+
+   The table is organised such that, where possible, all instructions with
+   the same mnemonic are together in a block.  When the assembler searches
+   for a suitable instruction the entries are checked in table order, so
+   more specific, or specialised cases should appear earlier in the table.
+
+   As an example, consider two instructions 'add a,b,u6' and 'add
+   a,b,limm'.  The first takes a 6-bit immediate that is encoded within the
+   32-bit instruction, while the second takes a 32-bit immediate that is
+   encoded in a follow-on 32-bit, making the total instruction length
+   64-bits.  In this case the u6 variant must appear first in the table, as
+   all u6 immediates could also be encoded using the 'limm' extension,
+   however, we want to use the shorter instruction wherever possible.
+
+   It is possible though to split instructions with the same mnemonic into
+   multiple groups.  However, the instructions are still checked in table
+   order, even across groups.  The only time that instructions with the
+   same mnemonic should be split into different groups is when different
+   variants of the instruction appear in different architectures, in which
+   case, grouping all instructions from a particular architecture together
+   might be preferable to merging the instruction into the main instruction
+   table.
+
+   An example of this split instruction groups can be found with the 'sync'
+   instruction.  The core arc architecture provides a 'sync' instruction,
+   while the nps instruction set extension provides 'sync.rd' and
+   'sync.wr'.  The rd/wr flags are instruction flags, not part of the
+   mnemonic, so we end up with two groups for the sync instruction, the
+   first within the core arc instruction table, and the second within the
+   nps extension instructions.  */
 const struct arc_opcode arc_opcodes[] =
 {
 #include "arc-tbl.h"