re PR target/65105 ([i386] XMM registers are not used for 64bit computations on 32bit target)
gcc/ PR target/65105 * config/i386/i386.c: Include dbgcnt.h. (has_non_address_hard_reg): New. (convertible_comparison_p): New. (scalar_to_vector_candidate_p): New. (remove_non_convertible_regs): New. (scalar_chain): New. (scalar_chain::scalar_chain): New. (scalar_chain::~scalar_chain): New. (scalar_chain::add_to_queue): New. (scalar_chain::mark_dual_mode_def): New. (scalar_chain::analyze_register_chain): New. (scalar_chain::add_insn): New. (scalar_chain::build): New. (scalar_chain::compute_convert_gain): New. (scalar_chain::replace_with_subreg): New. (scalar_chain::replace_with_subreg_in_insn): New. (scalar_chain::emit_conversion_insns): New. (scalar_chain::make_vector_copies): New. (scalar_chain::convert_reg): New. (scalar_chain::convert_op): New. (scalar_chain::convert_insn): New. (scalar_chain::convert): New. (convert_scalars_to_vector): New. (pass_data_stv): New. (pass_stv): New. (make_pass_stv): New. (ix86_option_override): Created and register stv pass. (flag_opts): Add -mstv. (ix86_option_override_internal): Likewise. * config/i386/i386.md (SWIM1248x): New. (*movdi_internal): Add xmm to mem alternative for TARGET_STV. (and<mode>3): Use SWIM1248x iterator instead of SWIM. (*anddi3_doubleword): New. (*zext<mode>_doubleword): New. (*zextsi_doubleword): New. (<code><mode>3): Use SWIM1248x iterator instead of SWIM. (*<code>di3_doubleword): New. * config/i386/i386.opt (mstv): New. * dbgcnt.def (stv_conversion): New. gcc/testsuite/ PR target/65105 * gcc.target/i386/pr65105-1.c: New. * gcc.target/i386/pr65105-2.c: New. * gcc.target/i386/pr65105-3.c: New. * gcc.target/i386/pr65105-4.C: New. * gcc.dg/lower-subreg-1.c: Add -mno-stv options for ia32. From-SVN: r228231
This commit is contained in:
parent
2943f6f716
commit
006ba5047c
|
@ -1,3 +1,46 @@
|
|||
2015-09-29 Ilya Enkovich <enkovich.gnu@gmail.com>
|
||||
|
||||
PR target/65105
|
||||
* config/i386/i386.c: Include dbgcnt.h.
|
||||
(has_non_address_hard_reg): New.
|
||||
(convertible_comparison_p): New.
|
||||
(scalar_to_vector_candidate_p): New.
|
||||
(remove_non_convertible_regs): New.
|
||||
(scalar_chain): New.
|
||||
(scalar_chain::scalar_chain): New.
|
||||
(scalar_chain::~scalar_chain): New.
|
||||
(scalar_chain::add_to_queue): New.
|
||||
(scalar_chain::mark_dual_mode_def): New.
|
||||
(scalar_chain::analyze_register_chain): New.
|
||||
(scalar_chain::add_insn): New.
|
||||
(scalar_chain::build): New.
|
||||
(scalar_chain::compute_convert_gain): New.
|
||||
(scalar_chain::replace_with_subreg): New.
|
||||
(scalar_chain::replace_with_subreg_in_insn): New.
|
||||
(scalar_chain::emit_conversion_insns): New.
|
||||
(scalar_chain::make_vector_copies): New.
|
||||
(scalar_chain::convert_reg): New.
|
||||
(scalar_chain::convert_op): New.
|
||||
(scalar_chain::convert_insn): New.
|
||||
(scalar_chain::convert): New.
|
||||
(convert_scalars_to_vector): New.
|
||||
(pass_data_stv): New.
|
||||
(pass_stv): New.
|
||||
(make_pass_stv): New.
|
||||
(ix86_option_override): Created and register stv pass.
|
||||
(flag_opts): Add -mstv.
|
||||
(ix86_option_override_internal): Likewise.
|
||||
* config/i386/i386.md (SWIM1248x): New.
|
||||
(*movdi_internal): Add xmm to mem alternative for TARGET_STV.
|
||||
(and<mode>3): Use SWIM1248x iterator instead of SWIM.
|
||||
(*anddi3_doubleword): New.
|
||||
(*zext<mode>_doubleword): New.
|
||||
(*zextsi_doubleword): New.
|
||||
(<code><mode>3): Use SWIM1248x iterator instead of SWIM.
|
||||
(*<code>di3_doubleword): New.
|
||||
* config/i386/i386.opt (mstv): New.
|
||||
* dbgcnt.def (stv_conversion): New.
|
||||
|
||||
2015-09-29 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* tree-cfg.c (dump_function_to_file): Dump function attributes.
|
||||
|
|
|
@ -87,6 +87,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "tree-iterator.h"
|
||||
#include "tree-chkp.h"
|
||||
#include "rtl-chkp.h"
|
||||
#include "dbgcnt.h"
|
||||
|
||||
/* This file should be included last. */
|
||||
#include "target-def.h"
|
||||
|
@ -2602,6 +2603,908 @@ rest_of_handle_insert_vzeroupper (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Return 1 if INSN uses or defines a hard register.
|
||||
Hard register uses in a memory address are ignored.
|
||||
Clobbers and flags definitions are ignored. */
|
||||
|
||||
static bool
|
||||
has_non_address_hard_reg (rtx_insn *insn)
|
||||
{
|
||||
df_ref ref;
|
||||
FOR_EACH_INSN_DEF (ref, insn)
|
||||
if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
|
||||
&& !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
|
||||
&& DF_REF_REGNO (ref) != FLAGS_REG)
|
||||
return true;
|
||||
|
||||
FOR_EACH_INSN_USE (ref, insn)
|
||||
if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check if comparison INSN may be transformed
|
||||
into vector comparison. Currently we transform
|
||||
zero checks only which look like:
|
||||
|
||||
(set (reg:CCZ 17 flags)
|
||||
(compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
|
||||
(subreg:SI (reg:DI x) 0))
|
||||
(const_int 0 [0]))) */
|
||||
|
||||
static bool
|
||||
convertible_comparison_p (rtx_insn *insn)
|
||||
{
|
||||
if (!TARGET_SSE4_1)
|
||||
return false;
|
||||
|
||||
rtx def_set = single_set (insn);
|
||||
|
||||
gcc_assert (def_set);
|
||||
|
||||
rtx src = SET_SRC (def_set);
|
||||
rtx dst = SET_DEST (def_set);
|
||||
|
||||
gcc_assert (GET_CODE (src) == COMPARE);
|
||||
|
||||
if (GET_CODE (dst) != REG
|
||||
|| REGNO (dst) != FLAGS_REG
|
||||
|| GET_MODE (dst) != CCZmode)
|
||||
return false;
|
||||
|
||||
rtx op1 = XEXP (src, 0);
|
||||
rtx op2 = XEXP (src, 1);
|
||||
|
||||
if (op2 != CONST0_RTX (GET_MODE (op2)))
|
||||
return false;
|
||||
|
||||
if (GET_CODE (op1) != IOR)
|
||||
return false;
|
||||
|
||||
op2 = XEXP (op1, 1);
|
||||
op1 = XEXP (op1, 0);
|
||||
|
||||
if (!SUBREG_P (op1)
|
||||
|| !SUBREG_P (op2)
|
||||
|| GET_MODE (op1) != SImode
|
||||
|| GET_MODE (op2) != SImode
|
||||
|| ((SUBREG_BYTE (op1) != 0
|
||||
|| SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
|
||||
&& (SUBREG_BYTE (op2) != 0
|
||||
|| SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
|
||||
return false;
|
||||
|
||||
op1 = SUBREG_REG (op1);
|
||||
op2 = SUBREG_REG (op2);
|
||||
|
||||
if (op1 != op2
|
||||
|| !REG_P (op1)
|
||||
|| GET_MODE (op1) != DImode)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return 1 if INSN may be converted into vector
|
||||
instruction. */
|
||||
|
||||
static bool
|
||||
scalar_to_vector_candidate_p (rtx_insn *insn)
|
||||
{
|
||||
rtx def_set = single_set (insn);
|
||||
|
||||
if (!def_set)
|
||||
return false;
|
||||
|
||||
if (has_non_address_hard_reg (insn))
|
||||
return false;
|
||||
|
||||
rtx src = SET_SRC (def_set);
|
||||
rtx dst = SET_DEST (def_set);
|
||||
|
||||
if (GET_CODE (src) == COMPARE)
|
||||
return convertible_comparison_p (insn);
|
||||
|
||||
/* We are interested in DImode promotion only. */
|
||||
if (GET_MODE (src) != DImode
|
||||
|| GET_MODE (dst) != DImode)
|
||||
return false;
|
||||
|
||||
if (!REG_P (dst) && !MEM_P (dst))
|
||||
return false;
|
||||
|
||||
switch (GET_CODE (src))
|
||||
{
|
||||
case PLUS:
|
||||
case MINUS:
|
||||
case IOR:
|
||||
case XOR:
|
||||
case AND:
|
||||
break;
|
||||
|
||||
case REG:
|
||||
return true;
|
||||
|
||||
case MEM:
|
||||
return REG_P (dst);
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)))
|
||||
return false;
|
||||
|
||||
if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
|
||||
return false;
|
||||
|
||||
if (GET_MODE (XEXP (src, 0)) != DImode
|
||||
|| GET_MODE (XEXP (src, 1)) != DImode)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* For a given bitmap of insn UIDs scans all instruction and
|
||||
remove insn from CANDIDATES in case it has both convertible
|
||||
and not convertible definitions.
|
||||
|
||||
All insns in a bitmap are conversion candidates according to
|
||||
scalar_to_vector_candidate_p. Currently it implies all insns
|
||||
are single_set. */
|
||||
|
||||
static void
|
||||
remove_non_convertible_regs (bitmap candidates)
|
||||
{
|
||||
bitmap_iterator bi;
|
||||
unsigned id;
|
||||
bitmap regs = BITMAP_ALLOC (NULL);
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
|
||||
{
|
||||
rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
|
||||
rtx reg = SET_DEST (def_set);
|
||||
|
||||
if (!REG_P (reg)
|
||||
|| bitmap_bit_p (regs, REGNO (reg))
|
||||
|| HARD_REGISTER_P (reg))
|
||||
continue;
|
||||
|
||||
for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
|
||||
def;
|
||||
def = DF_REF_NEXT_REG (def))
|
||||
{
|
||||
if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
"r%d has non convertible definition in insn %d\n",
|
||||
REGNO (reg), DF_REF_INSN_UID (def));
|
||||
|
||||
bitmap_set_bit (regs, REGNO (reg));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
|
||||
{
|
||||
for (df_ref def = DF_REG_DEF_CHAIN (id);
|
||||
def;
|
||||
def = DF_REF_NEXT_REG (def))
|
||||
if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Removing insn %d from candidates list\n",
|
||||
DF_REF_INSN_UID (def));
|
||||
|
||||
bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
|
||||
}
|
||||
}
|
||||
|
||||
BITMAP_FREE (regs);
|
||||
}
|
||||
|
||||
class scalar_chain
|
||||
{
|
||||
public:
|
||||
scalar_chain ();
|
||||
~scalar_chain ();
|
||||
|
||||
static unsigned max_id;
|
||||
|
||||
/* ID of a chain. */
|
||||
unsigned int chain_id;
|
||||
/* A queue of instructions to be included into a chain. */
|
||||
bitmap queue;
|
||||
/* Instructions included into a chain. */
|
||||
bitmap insns;
|
||||
/* All registers defined by a chain. */
|
||||
bitmap defs;
|
||||
/* Registers used in both vector and sclar modes. */
|
||||
bitmap defs_conv;
|
||||
|
||||
void build (bitmap candidates, unsigned insn_uid);
|
||||
int compute_convert_gain ();
|
||||
int convert ();
|
||||
|
||||
private:
|
||||
void add_insn (bitmap candidates, unsigned insn_uid);
|
||||
void add_to_queue (unsigned insn_uid);
|
||||
void mark_dual_mode_def (df_ref def);
|
||||
void analyze_register_chain (bitmap candidates, df_ref ref);
|
||||
rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
|
||||
void emit_conversion_insns (rtx insns, rtx_insn *pos);
|
||||
void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
|
||||
void convert_insn (rtx_insn *insn);
|
||||
void convert_op (rtx *op, rtx_insn *insn);
|
||||
void convert_reg (unsigned regno);
|
||||
void make_vector_copies (unsigned regno);
|
||||
};
|
||||
|
||||
unsigned scalar_chain::max_id = 0;
|
||||
|
||||
/* Initialize new chain. */
|
||||
|
||||
scalar_chain::scalar_chain ()
|
||||
{
|
||||
chain_id = ++max_id;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
|
||||
|
||||
bitmap_obstack_initialize (NULL);
|
||||
insns = BITMAP_ALLOC (NULL);
|
||||
defs = BITMAP_ALLOC (NULL);
|
||||
defs_conv = BITMAP_ALLOC (NULL);
|
||||
queue = NULL;
|
||||
}
|
||||
|
||||
/* Free chain's data. */
|
||||
|
||||
scalar_chain::~scalar_chain ()
|
||||
{
|
||||
BITMAP_FREE (insns);
|
||||
BITMAP_FREE (defs);
|
||||
BITMAP_FREE (defs_conv);
|
||||
bitmap_obstack_release (NULL);
|
||||
}
|
||||
|
||||
/* Add instruction into chains' queue. */
|
||||
|
||||
void
|
||||
scalar_chain::add_to_queue (unsigned insn_uid)
|
||||
{
|
||||
if (bitmap_bit_p (insns, insn_uid)
|
||||
|| bitmap_bit_p (queue, insn_uid))
|
||||
return;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
|
||||
insn_uid, chain_id);
|
||||
bitmap_set_bit (queue, insn_uid);
|
||||
}
|
||||
|
||||
/* Mark register defined by DEF as requiring conversion. */
|
||||
|
||||
void
|
||||
scalar_chain::mark_dual_mode_def (df_ref def)
|
||||
{
|
||||
gcc_assert (DF_REF_REG_DEF_P (def));
|
||||
|
||||
if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
|
||||
return;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
" Mark r%d def in insn %d as requiring both modes in chain #%d\n",
|
||||
DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
|
||||
|
||||
bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
|
||||
}
|
||||
|
||||
/* Check REF's chain to add new insns into a queue
|
||||
and find registers requiring conversion. */
|
||||
|
||||
void
|
||||
scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
|
||||
{
|
||||
df_link *chain;
|
||||
|
||||
gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
|
||||
|| bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
|
||||
add_to_queue (DF_REF_INSN_UID (ref));
|
||||
|
||||
for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
|
||||
{
|
||||
unsigned uid = DF_REF_INSN_UID (chain->ref);
|
||||
if (!DF_REF_REG_MEM_P (chain->ref))
|
||||
{
|
||||
if (bitmap_bit_p (insns, uid))
|
||||
continue;
|
||||
|
||||
if (bitmap_bit_p (candidates, uid))
|
||||
{
|
||||
add_to_queue (uid);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (DF_REF_REG_DEF_P (chain->ref))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
|
||||
DF_REF_REGNO (chain->ref), uid);
|
||||
mark_dual_mode_def (chain->ref);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
|
||||
DF_REF_REGNO (chain->ref), uid);
|
||||
mark_dual_mode_def (ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Add instruction into a chain. */
|
||||
|
||||
void
|
||||
scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
|
||||
{
|
||||
if (bitmap_bit_p (insns, insn_uid))
|
||||
return;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
|
||||
|
||||
bitmap_set_bit (insns, insn_uid);
|
||||
|
||||
rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
|
||||
rtx def_set = single_set (insn);
|
||||
if (def_set && REG_P (SET_DEST (def_set))
|
||||
&& !HARD_REGISTER_P (SET_DEST (def_set)))
|
||||
bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
|
||||
|
||||
df_ref ref;
|
||||
df_ref def;
|
||||
for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
|
||||
if (!HARD_REGISTER_P (DF_REF_REG (ref)))
|
||||
for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
|
||||
def;
|
||||
def = DF_REF_NEXT_REG (def))
|
||||
analyze_register_chain (candidates, def);
|
||||
for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
|
||||
if (!DF_REF_REG_MEM_P (ref))
|
||||
analyze_register_chain (candidates, ref);
|
||||
}
|
||||
|
||||
/* Build new chain starting from insn INSN_UID recursively
|
||||
adding all dependent uses and definitions. */
|
||||
|
||||
void
|
||||
scalar_chain::build (bitmap candidates, unsigned insn_uid)
|
||||
{
|
||||
queue = BITMAP_ALLOC (NULL);
|
||||
bitmap_set_bit (queue, insn_uid);
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Building chain #%d...\n", chain_id);
|
||||
|
||||
while (!bitmap_empty_p (queue))
|
||||
{
|
||||
insn_uid = bitmap_first_set_bit (queue);
|
||||
bitmap_clear_bit (queue, insn_uid);
|
||||
bitmap_clear_bit (candidates, insn_uid);
|
||||
add_insn (candidates, insn_uid);
|
||||
}
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "Collected chain #%d...\n", chain_id);
|
||||
fprintf (dump_file, " insns: ");
|
||||
dump_bitmap (dump_file, insns);
|
||||
if (!bitmap_empty_p (defs_conv))
|
||||
{
|
||||
bitmap_iterator bi;
|
||||
unsigned id;
|
||||
const char *comma = "";
|
||||
fprintf (dump_file, " defs to convert: ");
|
||||
EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
|
||||
{
|
||||
fprintf (dump_file, "%sr%d", comma, id);
|
||||
comma = ", ";
|
||||
}
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
BITMAP_FREE (queue);
|
||||
}
|
||||
|
||||
/* Compute a gain for chain conversion. */
|
||||
|
||||
int
|
||||
scalar_chain::compute_convert_gain ()
|
||||
{
|
||||
bitmap_iterator bi;
|
||||
unsigned insn_uid;
|
||||
int gain = 0;
|
||||
int cost = 0;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
|
||||
{
|
||||
rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
|
||||
rtx def_set = single_set (insn);
|
||||
rtx src = SET_SRC (def_set);
|
||||
rtx dst = SET_DEST (def_set);
|
||||
|
||||
if (REG_P (src) && REG_P (dst))
|
||||
gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
|
||||
else if (REG_P (src) && MEM_P (dst))
|
||||
gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
|
||||
else if (MEM_P (src) && REG_P (dst))
|
||||
gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
|
||||
else if (GET_CODE (src) == PLUS
|
||||
|| GET_CODE (src) == MINUS
|
||||
|| GET_CODE (src) == IOR
|
||||
|| GET_CODE (src) == XOR
|
||||
|| GET_CODE (src) == AND)
|
||||
gain += ix86_cost->add;
|
||||
else if (GET_CODE (src) == COMPARE)
|
||||
{
|
||||
/* Assume comparison cost is the same. */
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Instruction convertion gain: %d\n", gain);
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
|
||||
cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Registers convertion cost: %d\n", cost);
|
||||
|
||||
gain -= cost;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Total gain: %d\n", gain);
|
||||
|
||||
return gain;
|
||||
}
|
||||
|
||||
/* Replace REG in X with a V2DI subreg of NEW_REG. */
|
||||
|
||||
rtx
|
||||
scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
|
||||
{
|
||||
if (x == reg)
|
||||
return gen_rtx_SUBREG (V2DImode, new_reg, 0);
|
||||
|
||||
const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
|
||||
int i, j;
|
||||
for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
|
||||
{
|
||||
if (fmt[i] == 'e')
|
||||
XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
|
||||
else if (fmt[i] == 'E')
|
||||
for (j = XVECLEN (x, i) - 1; j >= 0; j--)
|
||||
XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
|
||||
reg, new_reg);
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Replace REG in INSN with a V2DI subreg of NEW_REG. */
|
||||
|
||||
void
|
||||
scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
|
||||
{
|
||||
replace_with_subreg (single_set (insn), reg, new_reg);
|
||||
}
|
||||
|
||||
/* Insert generated conversion instruction sequence INSNS
|
||||
after instruction AFTER. New BB may be required in case
|
||||
instruction has EH region attached. */
|
||||
|
||||
void
|
||||
scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
|
||||
{
|
||||
if (!control_flow_insn_p (after))
|
||||
{
|
||||
emit_insn_after (insns, after);
|
||||
return;
|
||||
}
|
||||
|
||||
basic_block bb = BLOCK_FOR_INSN (after);
|
||||
edge e = find_fallthru_edge (bb->succs);
|
||||
gcc_assert (e);
|
||||
|
||||
basic_block new_bb = split_edge (e);
|
||||
emit_insn_after (insns, BB_HEAD (new_bb));
|
||||
}
|
||||
|
||||
/* Make vector copies for all register REGNO definitions
|
||||
and replace its uses in a chain. */
|
||||
|
||||
void
|
||||
scalar_chain::make_vector_copies (unsigned regno)
|
||||
{
|
||||
rtx reg = regno_reg_rtx[regno];
|
||||
rtx vreg = gen_reg_rtx (DImode);
|
||||
df_ref ref;
|
||||
|
||||
for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
|
||||
if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
|
||||
{
|
||||
rtx_insn *insn = DF_REF_INSN (ref);
|
||||
|
||||
start_sequence ();
|
||||
if (TARGET_SSE4_1)
|
||||
{
|
||||
emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
|
||||
CONST0_RTX (V4SImode),
|
||||
gen_rtx_SUBREG (SImode, reg, 0)));
|
||||
emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
|
||||
gen_rtx_SUBREG (V4SImode, vreg, 0),
|
||||
gen_rtx_SUBREG (SImode, reg, 4),
|
||||
GEN_INT (2)));
|
||||
}
|
||||
else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (DImode);
|
||||
emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
|
||||
CONST0_RTX (V4SImode),
|
||||
gen_rtx_SUBREG (SImode, reg, 0)));
|
||||
emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
|
||||
CONST0_RTX (V4SImode),
|
||||
gen_rtx_SUBREG (SImode, reg, 4)));
|
||||
emit_insn (gen_vec_interleave_lowv4si
|
||||
(gen_rtx_SUBREG (V4SImode, vreg, 0),
|
||||
gen_rtx_SUBREG (V4SImode, vreg, 0),
|
||||
gen_rtx_SUBREG (V4SImode, tmp, 0)));
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
|
||||
emit_move_insn (adjust_address (tmp, SImode, 0),
|
||||
gen_rtx_SUBREG (SImode, reg, 0));
|
||||
emit_move_insn (adjust_address (tmp, SImode, 4),
|
||||
gen_rtx_SUBREG (SImode, reg, 4));
|
||||
emit_move_insn (vreg, tmp);
|
||||
}
|
||||
emit_conversion_insns (get_insns (), insn);
|
||||
end_sequence ();
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
" Copied r%d to a vector register r%d for insn %d\n",
|
||||
regno, REGNO (vreg), DF_REF_INSN_UID (ref));
|
||||
}
|
||||
|
||||
for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
|
||||
if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
|
||||
{
|
||||
replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
|
||||
regno, REGNO (vreg), DF_REF_INSN_UID (ref));
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert all definitions of register REGNO
|
||||
and fix its uses. Scalar copies may be created
|
||||
in case register is used in not convertible insn. */
|
||||
|
||||
void
|
||||
scalar_chain::convert_reg (unsigned regno)
|
||||
{
|
||||
bool scalar_copy = bitmap_bit_p (defs_conv, regno);
|
||||
rtx reg = regno_reg_rtx[regno];
|
||||
rtx scopy = NULL_RTX;
|
||||
df_ref ref;
|
||||
bitmap conv;
|
||||
|
||||
conv = BITMAP_ALLOC (NULL);
|
||||
bitmap_copy (conv, insns);
|
||||
|
||||
if (scalar_copy)
|
||||
scopy = gen_reg_rtx (DImode);
|
||||
|
||||
for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
|
||||
{
|
||||
rtx_insn *insn = DF_REF_INSN (ref);
|
||||
rtx def_set = single_set (insn);
|
||||
rtx src = SET_SRC (def_set);
|
||||
rtx reg = DF_REF_REG (ref);
|
||||
|
||||
if (!MEM_P (src))
|
||||
{
|
||||
replace_with_subreg_in_insn (insn, reg, reg);
|
||||
bitmap_clear_bit (conv, INSN_UID (insn));
|
||||
}
|
||||
|
||||
if (scalar_copy)
|
||||
{
|
||||
rtx vcopy = gen_reg_rtx (V2DImode);
|
||||
|
||||
start_sequence ();
|
||||
if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
|
||||
{
|
||||
emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
|
||||
emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
|
||||
gen_rtx_SUBREG (SImode, vcopy, 0));
|
||||
emit_move_insn (vcopy,
|
||||
gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
|
||||
emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
|
||||
gen_rtx_SUBREG (SImode, vcopy, 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
|
||||
emit_move_insn (tmp, reg);
|
||||
emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
|
||||
adjust_address (tmp, SImode, 0));
|
||||
emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
|
||||
adjust_address (tmp, SImode, 4));
|
||||
}
|
||||
emit_conversion_insns (get_insns (), insn);
|
||||
end_sequence ();
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
" Copied r%d to a scalar register r%d for insn %d\n",
|
||||
regno, REGNO (scopy), INSN_UID (insn));
|
||||
}
|
||||
}
|
||||
|
||||
for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
|
||||
if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
|
||||
{
|
||||
if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
|
||||
{
|
||||
rtx def_set = single_set (DF_REF_INSN (ref));
|
||||
if (!MEM_P (SET_DEST (def_set))
|
||||
|| !REG_P (SET_SRC (def_set)))
|
||||
replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
|
||||
bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
replace_rtx (DF_REF_INSN (ref), reg, scopy);
|
||||
df_insn_rescan (DF_REF_INSN (ref));
|
||||
}
|
||||
|
||||
BITMAP_FREE (conv);
|
||||
}
|
||||
|
||||
/* Convert operand OP in INSN. All register uses
|
||||
are converted during registers conversion.
|
||||
Therefore we should just handle memory operands. */
|
||||
|
||||
void
|
||||
scalar_chain::convert_op (rtx *op, rtx_insn *insn)
|
||||
{
|
||||
*op = copy_rtx_if_shared (*op);
|
||||
|
||||
if (MEM_P (*op))
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (DImode);
|
||||
|
||||
emit_insn_before (gen_move_insn (tmp, *op), insn);
|
||||
*op = gen_rtx_SUBREG (V2DImode, tmp, 0);
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
|
||||
INSN_UID (insn), REGNO (tmp));
|
||||
}
|
||||
else
|
||||
{
|
||||
gcc_assert (SUBREG_P (*op));
|
||||
gcc_assert (GET_MODE (*op) == V2DImode);
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert INSN to vector mode. */
|
||||
|
||||
void
|
||||
scalar_chain::convert_insn (rtx_insn *insn)
|
||||
{
|
||||
rtx def_set = single_set (insn);
|
||||
rtx src = SET_SRC (def_set);
|
||||
rtx dst = SET_DEST (def_set);
|
||||
rtx subreg;
|
||||
|
||||
if (MEM_P (dst) && !REG_P (src))
|
||||
{
|
||||
/* There are no scalar integer instructions and therefore
|
||||
temporary register usage is required. */
|
||||
rtx tmp = gen_reg_rtx (DImode);
|
||||
emit_conversion_insns (gen_move_insn (dst, tmp), insn);
|
||||
dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
|
||||
}
|
||||
|
||||
switch (GET_CODE (src))
|
||||
{
|
||||
case PLUS:
|
||||
case MINUS:
|
||||
case IOR:
|
||||
case XOR:
|
||||
case AND:
|
||||
convert_op (&XEXP (src, 0), insn);
|
||||
convert_op (&XEXP (src, 1), insn);
|
||||
PUT_MODE (src, V2DImode);
|
||||
break;
|
||||
|
||||
case MEM:
|
||||
if (!REG_P (dst))
|
||||
convert_op (&src, insn);
|
||||
break;
|
||||
|
||||
case REG:
|
||||
break;
|
||||
|
||||
case SUBREG:
|
||||
gcc_assert (GET_MODE (src) == V2DImode);
|
||||
break;
|
||||
|
||||
case COMPARE:
|
||||
src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
|
||||
|
||||
gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
|
||||
|| (SUBREG_P (src) && GET_MODE (src) == V2DImode));
|
||||
|
||||
if (REG_P (src))
|
||||
subreg = gen_rtx_SUBREG (V2DImode, src, 0);
|
||||
else
|
||||
subreg = copy_rtx_if_shared (src);
|
||||
emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
|
||||
copy_rtx_if_shared (subreg),
|
||||
copy_rtx_if_shared (subreg)),
|
||||
insn);
|
||||
dst = gen_rtx_REG (CCmode, FLAGS_REG);
|
||||
src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
|
||||
copy_rtx_if_shared (src)),
|
||||
UNSPEC_PTEST);
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
SET_SRC (def_set) = src;
|
||||
SET_DEST (def_set) = dst;
|
||||
|
||||
/* Drop possible dead definitions. */
|
||||
PATTERN (insn) = def_set;
|
||||
|
||||
INSN_CODE (insn) = -1;
|
||||
recog_memoized (insn);
|
||||
df_insn_rescan (insn);
|
||||
}
|
||||
|
||||
/* Convert whole chain creating required register
|
||||
conversions and copies. */
|
||||
|
||||
int
|
||||
scalar_chain::convert ()
|
||||
{
|
||||
bitmap_iterator bi;
|
||||
unsigned id;
|
||||
int converted_insns = 0;
|
||||
|
||||
if (!dbg_cnt (stv_conversion))
|
||||
return 0;
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Converting chain #%d...\n", chain_id);
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
|
||||
convert_reg (id);
|
||||
|
||||
EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
|
||||
make_vector_copies (id);
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
|
||||
{
|
||||
convert_insn (DF_INSN_UID_GET (id)->insn);
|
||||
converted_insns++;
|
||||
}
|
||||
|
||||
return converted_insns;
|
||||
}
|
||||
|
||||
/* Main STV pass function. Find and convert scalar
|
||||
instructions into vector mode when profitable. */
|
||||
|
||||
static unsigned int
|
||||
convert_scalars_to_vector ()
|
||||
{
|
||||
basic_block bb;
|
||||
bitmap candidates;
|
||||
int converted_insns = 0;
|
||||
|
||||
bitmap_obstack_initialize (NULL);
|
||||
candidates = BITMAP_ALLOC (NULL);
|
||||
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
df_set_flags (DF_DEFER_INSN_RESCAN);
|
||||
df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
|
||||
df_md_add_problem ();
|
||||
df_analyze ();
|
||||
|
||||
/* Find all instructions we want to convert into vector mode. */
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Searching for mode convertion candidates...\n");
|
||||
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
{
|
||||
rtx_insn *insn;
|
||||
FOR_BB_INSNS (bb, insn)
|
||||
if (scalar_to_vector_candidate_p (insn))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " insn %d is marked as a candidate\n",
|
||||
INSN_UID (insn));
|
||||
|
||||
bitmap_set_bit (candidates, INSN_UID (insn));
|
||||
}
|
||||
}
|
||||
|
||||
remove_non_convertible_regs (candidates);
|
||||
|
||||
if (bitmap_empty_p (candidates))
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "There are no candidates for optimization.\n");
|
||||
|
||||
while (!bitmap_empty_p (candidates))
|
||||
{
|
||||
unsigned uid = bitmap_first_set_bit (candidates);
|
||||
scalar_chain chain;
|
||||
|
||||
/* Find instructions chain we want to convert to vector mode.
|
||||
Check all uses and definitions to estimate all required
|
||||
conversions. */
|
||||
chain.build (candidates, uid);
|
||||
|
||||
if (chain.compute_convert_gain () > 0)
|
||||
converted_insns += chain.convert ();
|
||||
else
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Chain #%d conversion is not profitable\n",
|
||||
chain.chain_id);
|
||||
}
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
|
||||
|
||||
BITMAP_FREE (candidates);
|
||||
bitmap_obstack_release (NULL);
|
||||
df_process_deferred_rescans ();
|
||||
|
||||
/* Conversion means we may have 128bit register spills/fills
|
||||
which require aligned stack. */
|
||||
if (converted_insns)
|
||||
{
|
||||
if (crtl->stack_alignment_needed < 128)
|
||||
crtl->stack_alignment_needed = 128;
|
||||
if (crtl->stack_alignment_estimated < 128)
|
||||
crtl->stack_alignment_estimated = 128;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const pass_data pass_data_insert_vzeroupper =
|
||||
|
@ -2639,6 +3542,39 @@ public:
|
|||
|
||||
}; // class pass_insert_vzeroupper
|
||||
|
||||
const pass_data pass_data_stv =
|
||||
{
|
||||
RTL_PASS, /* type */
|
||||
"stv", /* name */
|
||||
OPTGROUP_NONE, /* optinfo_flags */
|
||||
TV_NONE, /* tv_id */
|
||||
0, /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
TODO_df_finish, /* todo_flags_finish */
|
||||
};
|
||||
|
||||
class pass_stv : public rtl_opt_pass
|
||||
{
|
||||
public:
|
||||
pass_stv (gcc::context *ctxt)
|
||||
: rtl_opt_pass (pass_data_stv, ctxt)
|
||||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
virtual bool gate (function *)
|
||||
{
|
||||
return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
|
||||
}
|
||||
|
||||
virtual unsigned int execute (function *)
|
||||
{
|
||||
return convert_scalars_to_vector ();
|
||||
}
|
||||
|
||||
}; // class pass_stv
|
||||
|
||||
} // anon namespace
|
||||
|
||||
rtl_opt_pass *
|
||||
|
@ -2647,6 +3583,12 @@ make_pass_insert_vzeroupper (gcc::context *ctxt)
|
|||
return new pass_insert_vzeroupper (ctxt);
|
||||
}
|
||||
|
||||
rtl_opt_pass *
|
||||
make_pass_stv (gcc::context *ctxt)
|
||||
{
|
||||
return new pass_stv (ctxt);
|
||||
}
|
||||
|
||||
/* Return true if a red-zone is in use. */
|
||||
|
||||
static inline bool
|
||||
|
@ -2756,6 +3698,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
|
|||
{ "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
|
||||
{ "-m8bit-idiv", MASK_USE_8BIT_IDIV },
|
||||
{ "-mvzeroupper", MASK_VZEROUPPER },
|
||||
{ "-mstv", MASK_STV},
|
||||
{ "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
|
||||
{ "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
|
||||
{ "-mprefer-avx128", MASK_PREFER_AVX128},
|
||||
|
@ -4372,6 +5315,8 @@ ix86_option_override_internal (bool main_args_p,
|
|||
|
||||
if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
|
||||
opts->x_target_flags |= MASK_VZEROUPPER;
|
||||
if (!(opts_set->x_target_flags & MASK_STV))
|
||||
opts->x_target_flags |= MASK_STV;
|
||||
if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
|
||||
&& !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
|
||||
opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
|
||||
|
@ -4485,12 +5430,18 @@ ix86_option_override (void)
|
|||
= { pass_insert_vzeroupper, "reload",
|
||||
1, PASS_POS_INSERT_AFTER
|
||||
};
|
||||
opt_pass *pass_stv = make_pass_stv (g);
|
||||
struct register_pass_info stv_info
|
||||
= { pass_stv, "combine",
|
||||
1, PASS_POS_INSERT_AFTER
|
||||
};
|
||||
|
||||
ix86_option_override_internal (true, &global_options, &global_options_set);
|
||||
|
||||
|
||||
/* This needs to be done at start up. It's convenient to do it here. */
|
||||
register_pass (&insert_vzeroupper_info);
|
||||
register_pass (&stv_info);
|
||||
}
|
||||
|
||||
/* Implement the TARGET_OFFLOAD_OPTIONS hook. */
|
||||
|
|
|
@ -981,6 +981,11 @@
|
|||
(HI "TARGET_HIMODE_MATH")
|
||||
SI])
|
||||
|
||||
;; Math-dependant integer modes with DImode.
|
||||
(define_mode_iterator SWIM1248x [(QI "TARGET_QIMODE_MATH")
|
||||
(HI "TARGET_HIMODE_MATH")
|
||||
SI (DI "(TARGET_STV && TARGET_SSE2) || TARGET_64BIT")])
|
||||
|
||||
;; Math-dependant single word integer modes without QImode.
|
||||
(define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
|
||||
SI (DI "TARGET_64BIT")])
|
||||
|
@ -2097,9 +2102,9 @@
|
|||
|
||||
(define_insn "*movdi_internal"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand"
|
||||
"=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m")
|
||||
"=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m")
|
||||
(match_operand:DI 1 "general_operand"
|
||||
"riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn ,*r ,*km,*k,*k"))]
|
||||
"riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,v,*Yj,*v,r ,*Yj ,*Yn ,*r ,*km,*k,*k"))]
|
||||
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
|
||||
{
|
||||
switch (get_attr_type (insn))
|
||||
|
@ -2177,9 +2182,9 @@
|
|||
[(set (attr "isa")
|
||||
(cond [(eq_attr "alternative" "0,1")
|
||||
(const_string "nox64")
|
||||
(eq_attr "alternative" "2,3,4,5,10,11,16,18,21,23")
|
||||
(eq_attr "alternative" "2,3,4,5,10,11,17,19,22,24")
|
||||
(const_string "x64")
|
||||
(eq_attr "alternative" "17")
|
||||
(eq_attr "alternative" "18")
|
||||
(const_string "x64_sse4")
|
||||
]
|
||||
(const_string "*")))
|
||||
|
@ -2190,13 +2195,13 @@
|
|||
(const_string "mmx")
|
||||
(eq_attr "alternative" "7,8,9,10,11")
|
||||
(const_string "mmxmov")
|
||||
(eq_attr "alternative" "12,17")
|
||||
(eq_attr "alternative" "12,18")
|
||||
(const_string "sselog1")
|
||||
(eq_attr "alternative" "13,14,15,16,18")
|
||||
(eq_attr "alternative" "13,14,15,16,17,19")
|
||||
(const_string "ssemov")
|
||||
(eq_attr "alternative" "19,20")
|
||||
(eq_attr "alternative" "20,21")
|
||||
(const_string "ssecvt")
|
||||
(eq_attr "alternative" "21,22,23,24")
|
||||
(eq_attr "alternative" "22,23,24,25")
|
||||
(const_string "mskmov")
|
||||
(and (match_operand 0 "register_operand")
|
||||
(match_operand 1 "pic_32bit_operand"))
|
||||
|
@ -2211,16 +2216,16 @@
|
|||
(set (attr "length_immediate")
|
||||
(cond [(and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
|
||||
(const_string "8")
|
||||
(eq_attr "alternative" "17")
|
||||
(eq_attr "alternative" "18")
|
||||
(const_string "1")
|
||||
]
|
||||
(const_string "*")))
|
||||
(set (attr "prefix_rex")
|
||||
(if_then_else (eq_attr "alternative" "10,11,16,17,18")
|
||||
(if_then_else (eq_attr "alternative" "10,11,17,18,19")
|
||||
(const_string "1")
|
||||
(const_string "*")))
|
||||
(set (attr "prefix_extra")
|
||||
(if_then_else (eq_attr "alternative" "17")
|
||||
(if_then_else (eq_attr "alternative" "18")
|
||||
(const_string "1")
|
||||
(const_string "*")))
|
||||
(set (attr "prefix")
|
||||
|
@ -2248,13 +2253,26 @@
|
|||
]
|
||||
(const_string "TI"))
|
||||
|
||||
(and (eq_attr "alternative" "14,15")
|
||||
(and (eq_attr "alternative" "14,15,16")
|
||||
(not (match_test "TARGET_SSE2")))
|
||||
(const_string "V2SF")
|
||||
(eq_attr "alternative" "17")
|
||||
(eq_attr "alternative" "18")
|
||||
(const_string "TI")
|
||||
]
|
||||
(const_string "DI")))])
|
||||
(const_string "DI")))
|
||||
(set (attr "enabled")
|
||||
(cond [(eq_attr "alternative" "15")
|
||||
(if_then_else
|
||||
(match_test "TARGET_STV && TARGET_SSE2")
|
||||
(symbol_ref "false")
|
||||
(const_string "*"))
|
||||
(eq_attr "alternative" "16")
|
||||
(if_then_else
|
||||
(match_test "TARGET_STV && TARGET_SSE2")
|
||||
(symbol_ref "true")
|
||||
(symbol_ref "false"))
|
||||
]
|
||||
(const_string "*")))])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand")
|
||||
|
@ -3814,6 +3832,26 @@
|
|||
"movz{bl|x}\t{%1, %k0|%k0, %1}"
|
||||
[(set_attr "type" "imovx")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn_and_split "*zext<mode>_doubleword"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
|
||||
"!TARGET_64BIT && TARGET_STV && TARGET_SSE2"
|
||||
"#"
|
||||
"&& reload_completed && GENERAL_REG_P (operands[0])"
|
||||
[(set (match_dup 0) (zero_extend:SI (match_dup 1)))
|
||||
(set (match_dup 2) (const_int 0))]
|
||||
"split_double_mode (DImode, &operands[0], 1, &operands[0], &operands[2]);")
|
||||
|
||||
(define_insn_and_split "*zextsi_doubleword"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))]
|
||||
"!TARGET_64BIT && TARGET_STV && TARGET_SSE2"
|
||||
"#"
|
||||
"&& reload_completed && GENERAL_REG_P (operands[0])"
|
||||
[(set (match_dup 0) (match_dup 1))
|
||||
(set (match_dup 2) (const_int 0))]
|
||||
"split_double_mode (DImode, &operands[0], 1, &operands[0], &operands[2]);")
|
||||
|
||||
;; Sign extension instructions
|
||||
|
||||
|
@ -7863,9 +7901,9 @@
|
|||
;; it should be done with splitters.
|
||||
|
||||
(define_expand "and<mode>3"
|
||||
[(set (match_operand:SWIM 0 "nonimmediate_operand")
|
||||
(and:SWIM (match_operand:SWIM 1 "nonimmediate_operand")
|
||||
(match_operand:SWIM 2 "<general_szext_operand>")))]
|
||||
[(set (match_operand:SWIM1248x 0 "nonimmediate_operand")
|
||||
(and:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")
|
||||
(match_operand:SWIM1248x 2 "<general_szext_operand>")))]
|
||||
""
|
||||
{
|
||||
machine_mode mode = <MODE>mode;
|
||||
|
@ -7943,6 +7981,23 @@
|
|||
(const_string "*")))
|
||||
(set_attr "mode" "SI,DI,DI,SI,DI")])
|
||||
|
||||
(define_insn_and_split "*anddi3_doubleword"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
|
||||
(and:DI
|
||||
(match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
|
||||
(match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"!TARGET_64BIT && TARGET_STV && TARGET_SSE2 && ix86_binary_operator_ok (AND, DImode, operands)"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(parallel [(set (match_dup 0)
|
||||
(and:SI (match_dup 1) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(parallel [(set (match_dup 3)
|
||||
(and:SI (match_dup 4) (match_dup 5)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);")
|
||||
|
||||
(define_insn "*andsi_1"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya,!k")
|
||||
(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm,k")
|
||||
|
@ -8430,9 +8485,9 @@
|
|||
;; If this is considered useful, it should be done with splitters.
|
||||
|
||||
(define_expand "<code><mode>3"
|
||||
[(set (match_operand:SWIM 0 "nonimmediate_operand")
|
||||
(any_or:SWIM (match_operand:SWIM 1 "nonimmediate_operand")
|
||||
(match_operand:SWIM 2 "<general_operand>")))]
|
||||
[(set (match_operand:SWIM1248x 0 "nonimmediate_operand")
|
||||
(any_or:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")
|
||||
(match_operand:SWIM1248x 2 "<general_operand>")))]
|
||||
""
|
||||
"ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
|
||||
|
||||
|
@ -8450,6 +8505,23 @@
|
|||
[(set_attr "type" "alu,alu,msklog")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn_and_split "*<code>di3_doubleword"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
|
||||
(any_or:DI
|
||||
(match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
|
||||
(match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"!TARGET_64BIT && TARGET_STV && TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, DImode, operands)"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(parallel [(set (match_dup 0)
|
||||
(any_or:SI (match_dup 1) (match_dup 2)))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(parallel [(set (match_dup 3)
|
||||
(any_or:SI (match_dup 4) (match_dup 5)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);")
|
||||
|
||||
(define_insn "*<code>hi_1"
|
||||
[(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!k")
|
||||
(any_or:HI
|
||||
|
|
|
@ -567,6 +567,11 @@ Target Report Mask(VZEROUPPER) Save
|
|||
Generate vzeroupper instruction before a transfer of control flow out of
|
||||
the function.
|
||||
|
||||
mstv
|
||||
Target Report Mask(STV) Save
|
||||
Disable Scalar to Vector optimization pass transforming 64-bit integer
|
||||
computations into a vector ones.
|
||||
|
||||
mdispatch-scheduler
|
||||
Target RejectNegative Var(flag_dispatch_scheduler)
|
||||
Do dispatch scheduling if processor is bdver1 or bdver2 or bdver3 or bdver4 and Haifa scheduling
|
||||
|
|
|
@ -186,6 +186,7 @@ DEBUG_COUNTER (sel_sched_region_cnt)
|
|||
DEBUG_COUNTER (sms_sched_loop)
|
||||
DEBUG_COUNTER (split_for_sched2)
|
||||
DEBUG_COUNTER (store_motion)
|
||||
DEBUG_COUNTER (stv_conversion)
|
||||
DEBUG_COUNTER (tail_call)
|
||||
DEBUG_COUNTER (treepre_insert)
|
||||
DEBUG_COUNTER (tree_sra)
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
2015-09-29 Ilya Enkovich <enkovich.gnu@gmail.com>
|
||||
|
||||
PR target/65105
|
||||
* gcc.target/i386/pr65105-1.c: New.
|
||||
* gcc.target/i386/pr65105-2.c: New.
|
||||
* gcc.target/i386/pr65105-3.c: New.
|
||||
* gcc.target/i386/pr65105-4.C: New.
|
||||
* gcc.dg/lower-subreg-1.c: Add -mno-stv options for ia32.
|
||||
|
||||
2015-09-28 Segher Boessenkool <segher@kernel.crashing.org>
|
||||
|
||||
* gcc.dg/asm-4.c: Use braced words for the regular expressions.
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */
|
||||
/* { dg-options "-O -fdump-rtl-subreg1" } */
|
||||
/* { dg-additional-options "-mno-stv" { target ia32 } } */
|
||||
/* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */
|
||||
/* { dg-require-effective-target ilp32 } */
|
||||
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/* PR target/pr65105 */
|
||||
/* { dg-do run { target { ia32 } } } */
|
||||
/* { dg-options "-O2 -march=slm" } */
|
||||
/* { dg-final { scan-assembler "por" } } */
|
||||
/* { dg-final { scan-assembler "pand" } } */
|
||||
|
||||
#include "stdlib.h"
|
||||
|
||||
static int count = 0;
|
||||
|
||||
void __attribute__((noinline))
|
||||
counter (long long l)
|
||||
{
|
||||
count++;
|
||||
if (!l || count > 5)
|
||||
exit (1);
|
||||
}
|
||||
|
||||
void __attribute__((noinline))
|
||||
test (long long *arr)
|
||||
{
|
||||
register unsigned long long tmp;
|
||||
|
||||
tmp = arr[0] | arr[1] & arr[2];
|
||||
while (tmp)
|
||||
{
|
||||
counter (tmp);
|
||||
tmp = *(arr++) & tmp;
|
||||
}
|
||||
}
|
||||
|
||||
void __attribute__((noinline))
|
||||
fill_data (long long *arr)
|
||||
{
|
||||
arr[0] = 0x00ffffffL;
|
||||
arr[1] = 0xffffff00L;
|
||||
arr[2] = 0x00ffffffL;
|
||||
arr[3] = 0x0000ff00L;
|
||||
arr[4] = 0x00ff0000L;
|
||||
arr[5] = 0xff000000L;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, const char **argv)
|
||||
{
|
||||
long long arr[6];
|
||||
fill_data (arr);
|
||||
test (arr);
|
||||
return count - 5;
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/* PR target/pr65105 */
|
||||
/* { dg-do compile { target { ia32 } } } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-final { scan-assembler "por" } } */
|
||||
|
||||
long long i1, i2, res;
|
||||
|
||||
void
|
||||
test ()
|
||||
{
|
||||
res = i1 | i2;
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
/* PR target/pr65105 */
|
||||
/* { dg-do compile { target { ia32 } } } */
|
||||
/* { dg-options "-O2 -march=slm -msse4.2" } */
|
||||
/* { dg-final { scan-assembler "pand" } } */
|
||||
/* { dg-final { scan-assembler "por" } } */
|
||||
/* { dg-final { scan-assembler "ptest" } } */
|
||||
|
||||
long long i1, i2, i3, res;
|
||||
|
||||
void
|
||||
test ()
|
||||
{
|
||||
res = i1 | i2;
|
||||
if (res)
|
||||
res &= i3;
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/* PR target/pr65105 */
|
||||
/* { dg-do run { target { ia32 } } } */
|
||||
/* { dg-options "-O2 -march=slm" } */
|
||||
|
||||
struct s {
|
||||
long long l1, l2, l3, l4, l5;
|
||||
} *a;
|
||||
long long b;
|
||||
long long fn1()
|
||||
{
|
||||
try
|
||||
{
|
||||
b = (a->l1 | a->l2 | a->l3 | a->l4 | a->l5);
|
||||
return a->l1;
|
||||
}
|
||||
catch (int)
|
||||
{
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue