Make sure NOPS are inserted between 32-bit multiply and load or 16-bit multiply; Compile cleanly with -Wall; Add -n/-N options

This commit is contained in:
Michael Meissner 1997-12-17 12:43:15 +00:00
parent 7a880bf32d
commit 343b2ab8c1
2 changed files with 208 additions and 64 deletions

View File

@ -1,3 +1,41 @@
start-sanitize-d30v
Wed Dec 17 15:29:03 1997 Michael Meissner <meissner@cygnus.com>
* config/tc-d30v.c (md_shortopts): Add 'n' and 'N' options.
(exec_type_enum): Enumeration giving all of the exec types.
(warn_nops): New static variable to give nop warning level.
({cur,prev}_mul32_p): New static variable to keep track of whether
the current/previous instruction is a 32-bit multiply.
(Optimizing): Make static.
(NOP{2,_LEFT,_RIGHT}): Macros for word of nops and left/right
nops.
(d30v_insert_operand): Delete declaration of unused function.
(write_2_short): Make exec_type argument enum, not int.
(parallel_ok): Ditto.
(check_range): Delete unused variable(s).
(build_insn): Ditto.
(find_format): Ditto.
(md_apply_fix3): Ditto.
(md_show_usage): Document -n and -N.
(md_parse_option): Parse -n and -N.
(write_1_short): If -n, warn about adding a nop. Use
NOP_{LEFT,RIGHT}.
(write_2_short): Use enumeration values instead of hard coded
integers. Reset exec_type for default operations. For explicit
parallel operations, call parallel_ok to make sure everything is
ok. If writing out a parallel operation, and the previous
instruction was a 32-bit multiply, indicate current instruction
is.
(parallel_ok): Allow add/tx ... to be done in parallel with
another add/tx ... assuming the gpr registers don't overlap.
(md_assemble): Use exec type enumeration values, not hard coded
ints. Check for loads or 16-bit multiplies following in the next
cycle after a 32-bit multiply. Add nops if that is the case.
(do_assemble): Copy prev_mul32_p to cur_mul32_p, and set
cur_mul32_p if current instruction is a 32-bit multiply.
(find_format): Change spacing and layout.
end-sanitize-d30v
start-sanitize-tic80
Tue Dec 16 16:55:45 1997 Fred Fish <fnf@cygnus.com>

View File

@ -28,15 +28,27 @@
const char comment_chars[] = ";";
const char line_comment_chars[] = "#";
const char line_separator_chars[] = "";
const char *md_shortopts = "O";
const char *md_shortopts = "OnN";
const char EXP_CHARS[] = "eE";
const char FLT_CHARS[] = "dD";
int Optimizing = 0;
#define NOP_MULTIPLY 1
#define NOP_ALL 2
static int warn_nops = 0;
static int Optimizing = 0;
#define FORCE_SHORT 1
#define FORCE_LONG 2
/* EXEC types. */
typedef enum _exec_type
{
EXEC_UNKNOWN, /* no order specified */
EXEC_PARALLEL, /* done in parallel */
EXEC_SEQ, /* sequential */
EXEC_REVSEQ /* reverse sequential */
} exec_type_enum;
/* fixups */
#define MAX_INSN_FIXUPS (5)
struct d30v_fixup
@ -58,6 +70,15 @@ typedef struct _fixups
static Fixups FixUps[2];
static Fixups *fixups;
/* Whether current and previous instruction is a word multiply. */
int cur_mul32_p = 0;
int prev_mul32_p = 0;
/* Two nops */
#define NOP_LEFT ((long long)NOP << 32)
#define NOP_RIGHT ((long long)NOP)
#define NOP2 (FM00 | NOP_LEFT | NOP_RIGHT)
/* local functions */
static int reg_name_search PARAMS ((char *name));
static int register_name PARAMS ((expressionS *expressionP));
@ -71,13 +92,11 @@ static long long build_insn PARAMS ((struct d30v_insn *opcode, expressionS *oper
static void write_long PARAMS ((struct d30v_insn *opcode, long long insn, Fixups *fx));
static void write_1_short PARAMS ((struct d30v_insn *opcode, long long insn, Fixups *fx));
static int write_2_short PARAMS ((struct d30v_insn *opcode1, long long insn1,
struct d30v_insn *opcode2, long long insn2, int exec_type, Fixups *fx));
struct d30v_insn *opcode2, long long insn2, exec_type_enum exec_type, Fixups *fx));
static long long do_assemble PARAMS ((char *str, struct d30v_insn *opcode));
static unsigned long d30v_insert_operand PARAMS (( unsigned long insn, int op_type,
offsetT value, int left, fixS *fix));
static int parallel_ok PARAMS ((struct d30v_insn *opcode1, unsigned long insn1,
struct d30v_insn *opcode2, unsigned long insn2,
int exec_type));
exec_type_enum exec_type));
static void d30v_number_to_chars PARAMS ((char *buf, long long value, int nbytes));
static void check_size PARAMS ((long value, int bits, char *file, int line));
@ -167,7 +186,7 @@ check_range (num, bits, flags)
int bits;
int flags;
{
long min, max, bit1;
long min, max;
int retval=0;
/* don't bother checking 32-bit values */
@ -196,8 +215,10 @@ void
md_show_usage (stream)
FILE *stream;
{
fprintf(stream, "D30V options:\n\
-O optimize. Will do some operations in parallel.\n");
fprintf(stream, "\nD30V options:\n\
-O Make adjacent short instructions parallel if possible.\n\
-n Warn about all NOPs inserted by the assembler.\n\
-N Warn about NOPs inserted after word multiplies.\n");
}
int
@ -207,10 +228,22 @@ md_parse_option (c, arg)
{
switch (c)
{
case 'O':
/* Optimize. Will attempt to parallelize operations */
case 'O':
Optimizing = 1;
break;
/* Warn about all NOPS that the assembler inserts. */
case 'n':
warn_nops = NOP_ALL;
break;
/* Warn about the NOPS that the assembler inserts because of the
multiply hazard. */
case 'N':
warn_nops = NOP_MULTIPLY;
break;
default:
return 0;
}
@ -465,7 +498,7 @@ build_insn (opcode, opers)
struct d30v_insn *opcode;
expressionS *opers;
{
int i, length, bits, shift, flags, format;
int i, length, bits, shift, flags;
unsigned int number, id=0;
long long insn;
struct d30v_opcode *op = opcode->op;
@ -593,13 +626,16 @@ write_1_short (opcode, insn, fx)
char *f = frag_more(8);
int i, where;
if (warn_nops == NOP_ALL)
as_warn ("NOP inserted");
/* the other container needs to be NOP */
/* according to 4.3.1: for FM=00, sub-instructions performed only
by IU cannot be encoded in L-container. */
if (opcode->op->unit == IU)
insn |= FM00 | ((long long)NOP << 32); /* right container */
insn |= FM00 | NOP_LEFT; /* right container */
else
insn = FM00 | (insn << 32) | (long long)NOP; /* left container */
insn = FM00 | (insn << 32) | NOP_RIGHT; /* left container */
d30v_number_to_chars (f, insn, 8);
@ -625,14 +661,14 @@ static int
write_2_short (opcode1, insn1, opcode2, insn2, exec_type, fx)
struct d30v_insn *opcode1, *opcode2;
long long insn1, insn2;
int exec_type;
exec_type_enum exec_type;
Fixups *fx;
{
long long insn;
long long insn = NOP2;
char *f;
int i,j, where;
if(exec_type != 1 && (opcode1->op->flags_used == FLAG_JSR))
if(exec_type != EXEC_PARALLEL && (opcode1->op->flags_used == FLAG_JSR))
{
/* subroutines must be called from 32-bit boundaries */
/* so the return address will be correct */
@ -642,10 +678,11 @@ write_2_short (opcode1, insn1, opcode2, insn2, exec_type, fx)
switch (exec_type)
{
case 0: /* order not specified */
if ( Optimizing && parallel_ok (opcode1, insn1, opcode2, insn2, exec_type))
case EXEC_UNKNOWN: /* order not specified */
if (Optimizing && parallel_ok (opcode1, insn1, opcode2, insn2, exec_type))
{
/* parallel */
exec_type = EXEC_PARALLEL;
if (opcode1->op->unit == IU)
insn = FM00 | (insn2 << 32) | insn1;
else if (opcode2->op->unit == MU)
@ -656,20 +693,25 @@ write_2_short (opcode1, insn1, opcode2, insn2, exec_type, fx)
fx = fx->next;
}
}
else if (opcode1->op->unit == IU)
else if (opcode1->op->unit == IU)
{
/* reverse sequential */
insn = FM10 | (insn2 << 32) | insn1;
exec_type = EXEC_REVSEQ;
}
else
{
/* sequential */
insn = FM01 | (insn1 << 32) | insn2;
fx = fx->next;
fx = fx->next;
exec_type = EXEC_SEQ;
}
break;
case 1: /* parallel */
if (opcode1->op->unit == IU)
case EXEC_PARALLEL: /* parallel */
if (! parallel_ok (opcode1, insn1, opcode2, insn2, exec_type))
as_fatal ("Instructions may not be executed in parallel");
else if (opcode1->op->unit == IU)
{
if (opcode2->op->unit == IU)
as_fatal ("Two IU instructions may not be executed in parallel");
@ -689,18 +731,21 @@ write_2_short (opcode1, insn1, opcode2, insn2, exec_type, fx)
fx = fx->next;
}
break;
case 2: /* sequential */
case EXEC_SEQ: /* sequential */
if (opcode1->op->unit == IU)
as_fatal ("IU instruction may not be in the left container");
insn = FM01 | (insn1 << 32) | insn2;
fx = fx->next;
break;
case 3: /* reverse sequential */
case EXEC_REVSEQ: /* reverse sequential */
if (opcode2->op->unit == MU)
as_fatal ("MU instruction may not be in the right container");
insn = FM10 | (insn1 << 32) | insn2;
fx = fx->next;
break;
default:
as_fatal("unknown execution type passed to write_2_short()");
}
@ -709,6 +754,12 @@ write_2_short (opcode1, insn1, opcode2, insn2, exec_type, fx)
f = frag_more(8);
d30v_number_to_chars (f, insn, 8);
/* If the previous instruction was a 32-bit multiply but it is put into a
parallel container, mark the current instruction as being a 32-bit
multiply. */
if (prev_mul32_p && exec_type == EXEC_PARALLEL)
cur_mul32_p = 1;
for (j=0; j<2; j++)
{
for (i=0; i < fx->fc; i++)
@ -738,22 +789,23 @@ static int
parallel_ok (op1, insn1, op2, insn2, exec_type)
struct d30v_insn *op1, *op2;
unsigned long insn1, insn2;
int exec_type;
exec_type_enum exec_type;
{
int i, j, shift, regno, bits, ecc;
unsigned long flags, mask, flags_set1, flags_set2, flags_used1, flags_used2;
unsigned long ins, mod_reg[2][3], used_reg[2][3];
unsigned long ins, mod_reg[2][3], used_reg[2][3], flag_reg[2];
struct d30v_format *f;
struct d30v_opcode *op;
int reverse_p;
/* section 4.3: both instructions must not be IU or MU only */
if ((op1->op->unit == IU && op2->op->unit == IU)
|| (op1->op->unit == MU && op2->op->unit == MU))
return 0;
/* first instruction must not be a jump to safely optimize */
if (op1->op->flags_used & (FLAG_JMP | FLAG_JSR))
/* first instruction must not be a jump to safely optimize, unless this
is an explicit parallel operation. */
if (exec_type != EXEC_PARALLEL
&& (op1->op->flags_used & (FLAG_JMP | FLAG_JSR)))
return 0;
/* If one instruction is /TX or /XT and the other is /FX or /XF respectively,
@ -785,6 +837,7 @@ parallel_ok (op1, insn1, op2, insn2, exec_type)
ecc = op2->ecc;
ins = insn2;
}
flag_reg[j] = 0;
mod_reg[j][0] = mod_reg[j][1] = 0;
mod_reg[j][2] = (op->flags_set & FLAG_ALL);
used_reg[j][0] = used_reg[j][1] = 0;
@ -799,17 +852,17 @@ parallel_ok (op1, insn1, op2, insn2, exec_type)
{
case ECC_TX:
case ECC_FX:
used_reg[j][2] |= FLAG_0;
used_reg[j][2] |= flag_reg[j] = FLAG_0;
break;
case ECC_XT:
case ECC_XF:
used_reg[j][2] |= FLAG_1;
used_reg[j][2] |= flag_reg[j] = FLAG_1;
break;
case ECC_TT:
case ECC_TF:
used_reg[j][2] |= (FLAG_0 | FLAG_1);
used_reg[j][2] |= flag_reg[j] = (FLAG_0 | FLAG_1);
break;
}
@ -935,7 +988,8 @@ parallel_ok (op1, insn1, op2, insn2, exec_type)
subb. */
if (mod_reg[0][2] == FLAG_CVVA && mod_reg[1][2] == FLAG_CVVA
&& used_reg[0][2] == 0 && used_reg[1][2] == 0
&& (used_reg[0][2] & ~flag_reg[0]) == 0
&& (used_reg[1][2] & ~flag_reg[1]) == 0
&& op1->op->unit == EITHER && op2->op->unit == EITHER)
{
mod_reg[0][2] = mod_reg[1][2] = 0;
@ -958,8 +1012,7 @@ parallel_ok (op1, insn1, op2, insn2, exec_type)
/* This is the main entry point for the machine-dependent assembler. str points to a
machine-dependent instruction. This function is supposed to emit the frags/bytes
it assembles to. For the D30V, it mostly handles the special VLIW parsing and packing
and leaves the difficult stuff to do_assemble().
*/
and leaves the difficult stuff to do_assemble(). */
static long long prev_insn = -1;
static struct d30v_insn prev_opcode;
@ -972,29 +1025,29 @@ md_assemble (str)
{
struct d30v_insn opcode;
long long insn;
int extype=0; /* execution type; parallel, etc */
static int etype=0; /* saved extype. used for multiline instructions */
exec_type_enum extype = EXEC_UNKNOWN; /* execution type; parallel, etc */
static exec_type_enum etype = EXEC_UNKNOWN; /* saved extype. used for multiline instructions */
char *str2;
if ( (prev_insn != -1) && prev_seg && ((prev_seg != now_seg) || (prev_subseg != now_subseg)))
d30v_cleanup();
if (etype == 0)
if (etype == EXEC_UNKNOWN)
{
/* look for the special multiple instruction separators */
str2 = strstr (str, "||");
if (str2)
extype = 1;
extype = EXEC_PARALLEL;
else
{
str2 = strstr (str, "->");
if (str2)
extype = 2;
extype = EXEC_SEQ;
else
{
str2 = strstr (str, "<-");
if (str2)
extype = 3;
extype = EXEC_REVSEQ;
}
}
/* str2 points to the separator, if one */
@ -1034,6 +1087,43 @@ md_assemble (str)
etype = 0;
}
/* Word multiply instructions must not be followed by either a load or a
16-bit multiply instruction in the next cycle. */
if (prev_mul32_p && (opcode.op->flags_used & (FLAG_MEM | FLAG_MUL16)))
{
/* However, load and multiply should able to be combined in a parallel
operation, so check for that first. */
if (prev_insn != -1
&& (opcode.op->flags_used & FLAG_MEM)
&& opcode.form->form < LONG
&& (extype == EXEC_PARALLEL || (Optimizing && extype == EXEC_UNKNOWN))
&& parallel_ok (&prev_opcode, (long)prev_insn,
&opcode, (long)insn, extype)
&& write_2_short (&prev_opcode, (long)prev_insn,
&opcode, (long)insn, extype, fixups) == 0)
{
/* no instructions saved */
prev_insn = -1;
return;
}
/* Can't parallelize, flush current instruction and emit a word of NOPS */
else
{
char *f;
d30v_cleanup();
f = frag_more(8);
d30v_number_to_chars (f, NOP2, 8);
if (warn_nops == NOP_ALL || warn_nops == NOP_MULTIPLY)
as_warn ("word of NOPs added between word multiply and %s",
((opcode.op->flags_used & FLAG_MEM)
? "load"
: "16-bit multiply"));
}
}
/* if this is a long instruction, write it and any previous short instruction */
if (opcode.form->form >= LONG)
{
@ -1044,19 +1134,20 @@ md_assemble (str)
prev_insn = -1;
return;
}
if ( (prev_insn != -1) &&
(write_2_short (&prev_opcode, (long)prev_insn, &opcode, (long)insn, extype, fixups) == 0))
if ((prev_insn != -1) &&
(write_2_short (&prev_opcode, (long)prev_insn, &opcode, (long)insn, extype, fixups) == 0))
{
/* no instructions saved */
prev_insn = -1;
}
else
{
if (extype)
as_fatal("Unable to mix instructions as specified");
/* save off last instruction so it may be packed on next pass */
memcpy( &prev_opcode, &opcode, sizeof(prev_opcode));
memcpy(&prev_opcode, &opcode, sizeof(prev_opcode));
prev_insn = insn;
prev_seg = now_seg;
prev_subseg = now_subseg;
@ -1134,7 +1225,7 @@ do_assemble (str, opcode)
else
p = 3;
for(i=1; *str && strncmp(*str,&name[p],2); i++, *str++)
for(i=1; *str && strncmp(*str,&name[p],2); i++, str++)
;
/* cmpu only supports some condition codes */
@ -1192,6 +1283,14 @@ do_assemble (str, opcode)
input_line_pointer = save;
insn = build_insn (opcode, myops);
/* Propigate multiply status */
if (insn != -1)
{
prev_mul32_p = cur_mul32_p;
cur_mul32_p = (opcode->op->flags_used & FLAG_MUL32) != 0;
}
return (insn);
}
@ -1209,12 +1308,11 @@ find_format (opcode, myops, fsize, cmp_hack)
{
int numops, match, index, i=0, j, k;
struct d30v_format *fm;
struct d30v_operand *op;
/* get all the operands and save them as expressions */
numops = get_operands (myops, cmp_hack);
while (index = opcode->format[i++])
while ((index = opcode->format[i++]) != 0)
{
if ((fsize == FORCE_SHORT) && (index >= LONG))
continue;
@ -1240,20 +1338,21 @@ find_format (opcode, myops, fsize, cmp_hack)
match = 0;
else if (flags & OPERAND_REG)
{
if ((X_op != O_register) ||
((flags & OPERAND_ACC) && !(num & OPERAND_ACC)) ||
((flags & OPERAND_FLAG) && !(num & OPERAND_FLAG)) ||
(flags & OPERAND_CONTROL && !(num & OPERAND_CONTROL | num & OPERAND_FLAG)))
if ((X_op != O_register)
|| ((flags & OPERAND_ACC) && !(num & OPERAND_ACC))
|| ((flags & OPERAND_FLAG) && !(num & OPERAND_FLAG))
|| ((flags & OPERAND_CONTROL)
&& !(num & (OPERAND_CONTROL | OPERAND_FLAG))))
{
match = 0;
}
}
else
if (((flags & OPERAND_MINUS) && ((X_op != O_absent) || (num != OPERAND_MINUS))) ||
((flags & OPERAND_PLUS) && ((X_op != O_absent) || (num != OPERAND_PLUS))) ||
((flags & OPERAND_ATMINUS) && ((X_op != O_absent) || (num != OPERAND_ATMINUS))) ||
((flags & OPERAND_ATPAR) && ((X_op != O_absent) || (num != OPERAND_ATPAR))) ||
((flags & OPERAND_ATSIGN) && ((X_op != O_absent) || (num != OPERAND_ATSIGN))))
else
if (((flags & OPERAND_MINUS) && ((X_op != O_absent) || (num != OPERAND_MINUS)))
|| ((flags & OPERAND_PLUS) && ((X_op != O_absent) || (num != OPERAND_PLUS)))
|| ((flags & OPERAND_ATMINUS) && ((X_op != O_absent) || (num != OPERAND_ATMINUS)))
|| ((flags & OPERAND_ATPAR) && ((X_op != O_absent) || (num != OPERAND_ATPAR)))
|| ((flags & OPERAND_ATSIGN) && ((X_op != O_absent) || (num != OPERAND_ATSIGN))))
{
match=0;
}
@ -1362,8 +1461,6 @@ md_apply_fix3 (fixp, valuep, seg)
char *where;
unsigned long insn, insn2;
long value;
int op_type;
int left=0;
if (fixp->fx_addsy == (symbolS *) NULL)
{
@ -1402,6 +1499,7 @@ md_apply_fix3 (fixp, valuep, seg)
insn |= value & 0x3F;
bfd_putb32 ((bfd_vma) insn, (unsigned char *) where);
break;
case BFD_RELOC_D30V_9_PCREL:
if (fixp->fx_where & 0x7)
{
@ -1414,11 +1512,13 @@ md_apply_fix3 (fixp, valuep, seg)
insn |= ((value >> 3) & 0x3F) << 12;
bfd_putb32 ((bfd_vma) insn, (unsigned char *) where);
break;
case BFD_RELOC_D30V_15:
check_size (value, 15, fixp->fx_file, fixp->fx_line);
insn |= (value >> 3) & 0xFFF;
bfd_putb32 ((bfd_vma) insn, (unsigned char *) where);
break;
case BFD_RELOC_D30V_15_PCREL:
if (fixp->fx_where & 0x7)
{
@ -1431,11 +1531,13 @@ md_apply_fix3 (fixp, valuep, seg)
insn |= (value >> 3) & 0xFFF;
bfd_putb32 ((bfd_vma) insn, (unsigned char *) where);
break;
case BFD_RELOC_D30V_21:
check_size (value, 21, fixp->fx_file, fixp->fx_line);
insn |= (value >> 3) & 0x3FFFF;
bfd_putb32 ((bfd_vma) insn, (unsigned char *) where);
break;
case BFD_RELOC_D30V_21_PCREL:
if (fixp->fx_where & 0x7)
{
@ -1448,25 +1550,29 @@ md_apply_fix3 (fixp, valuep, seg)
insn |= (value >> 3) & 0x3FFFF;
bfd_putb32 ((bfd_vma) insn, (unsigned char *) where);
break;
case BFD_RELOC_D30V_32:
insn2 = bfd_getb32 ((unsigned char *) where + 4);
insn |= (value >> 26) & 0x3F; /* top 6 bits */
insn2 |= ((value & 0x03FC0000) << 2); /* next 8 bits */
insn |= (value >> 26) & 0x3F; /* top 6 bits */
insn2 |= ((value & 0x03FC0000) << 2); /* next 8 bits */
insn2 |= value & 0x0003FFFF; /* bottom 18 bits */
bfd_putb32 ((bfd_vma) insn, (unsigned char *) where);
bfd_putb32 ((bfd_vma) insn2, (unsigned char *) where + 4);
break;
case BFD_RELOC_D30V_32_PCREL:
insn2 = bfd_getb32 ((unsigned char *) where + 4);
insn |= (value >> 26) & 0x3F; /* top 6 bits */
insn2 |= ((value & 0x03FC0000) << 2); /* next 8 bits */
insn |= (value >> 26) & 0x3F; /* top 6 bits */
insn2 |= ((value & 0x03FC0000) << 2); /* next 8 bits */
insn2 |= value & 0x0003FFFF; /* bottom 18 bits */
bfd_putb32 ((bfd_vma) insn, (unsigned char *) where);
bfd_putb32 ((bfd_vma) insn2, (unsigned char *) where + 4);
break;
case BFD_RELOC_32:
bfd_putb32 ((bfd_vma) value, (unsigned char *) where);
break;
default:
as_fatal ("line %d: unknown relocation type: 0x%x",fixp->fx_line,fixp->fx_r_type);
}