middle-end: Use subregs to expand COMPLEX_EXPR to set the lowpart.

When lowering COMPLEX_EXPR we currently emit two VEC_EXTRACTs.  One for the
lowpart and one for the highpart.

The problem with this is that in RTL the lvalue of the RTX is the only thing
tying the two instructions together.

This means that e.g. combine is unable to try to combine the two instructions
for setting the lowpart and highpart.

For ISAs that have bit extract instructions we can eliminate one of the extracts
if, and only if we're setting the entire complex number.

This change changes the expand code when we're setting the entire complex number
to generate a subreg for the lowpart instead of a vec_extract.

This allows us to optimize sequences such as:

_Complex int f(int a, int b) {
    _Complex int t = a + b * 1i;
    return t;
}

from:

f:
	bfi     x2, x0, 0, 32
	bfi     x2, x1, 32, 32
	mov     x0, x2
	ret

into:

f:
	bfi	x0, x1, 32, 32
	ret

I have also confirmed the codegen for x86_64 did not change.

gcc/ChangeLog:

	* expmed.cc (store_bit_field_1): Add parameter that indicates if value is
	still undefined and if so emit a subreg move instead.
	(store_integral_bit_field): Likewise.
	(store_bit_field): Likewise.
	* expr.h (write_complex_part): Likewise.
	* expmed.h (store_bit_field): Add new parameter.
	* builtins.cc (expand_ifn_atomic_compare_exchange_into_call): Use new
	parameter.
	(expand_ifn_atomic_compare_exchange): Likewise.
	* calls.cc (store_unaligned_arguments_into_pseudos): Likewise.
	* emit-rtl.cc (validate_subreg): Likewise.
	* expr.cc (emit_group_store): Likewise.
	(copy_blkmode_from_reg): Likewise.
	(copy_blkmode_to_reg): Likewise.
	(clear_storage_hints): Likewise.
	(write_complex_part):  Likewise.
	(emit_move_complex_parts): Likewise.
	(expand_assignment): Likewise.
	(store_expr): Likewise.
	(store_field): Likewise.
	(expand_expr_real_2): Likewise.
	* ifcvt.cc (noce_emit_move_insn): Likewise.
	* internal-fn.cc (expand_arith_set_overflow): Likewise.
	(expand_arith_overflow_result_store): Likewise.
	(expand_addsub_overflow): Likewise.
	(expand_neg_overflow): Likewise.
	(expand_mul_overflow): Likewise.
	(expand_arith_overflow): Likewise.

gcc/testsuite/ChangeLog:

	* g++.target/aarch64/complex-init.C: New test.
This commit is contained in:
Tamar Christina 2022-07-08 07:37:20 +01:00
parent bf3695691f
commit 13f44099bc
10 changed files with 99 additions and 51 deletions

View File

@ -6029,8 +6029,8 @@ expand_ifn_atomic_compare_exchange_into_call (gcall *call, machine_mode mode)
if (GET_MODE (boolret) != mode)
boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1);
x = force_reg (mode, x);
write_complex_part (target, boolret, true);
write_complex_part (target, x, false);
write_complex_part (target, boolret, true, true);
write_complex_part (target, x, false, false);
}
}
@ -6085,8 +6085,8 @@ expand_ifn_atomic_compare_exchange (gcall *call)
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (GET_MODE (boolret) != mode)
boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1);
write_complex_part (target, boolret, true);
write_complex_part (target, oldval, false);
write_complex_part (target, boolret, true, true);
write_complex_part (target, oldval, false, false);
}
}

View File

@ -1226,7 +1226,7 @@ store_unaligned_arguments_into_pseudos (struct arg_data *args, int num_actuals)
bytes -= bitsize / BITS_PER_UNIT;
store_bit_field (reg, bitsize, endian_correction, 0, 0,
word_mode, word, false);
word_mode, word, false, false);
}
}
}

View File

@ -947,9 +947,11 @@ validate_subreg (machine_mode omode, machine_mode imode,
&& GET_MODE_INNER (omode) == GET_MODE_INNER (imode))
;
/* Subregs involving floating point modes are not allowed to
change size. Therefore (subreg:DI (reg:DF) 0) is fine, but
change size unless it's an insert into a complex mode.
Therefore (subreg:DI (reg:DF) 0) and (subreg:CS (reg:SF) 0) are fine, but
(subreg:SI (reg:DF) 0) isn't. */
else if (FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))
else if ((FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))
&& !COMPLEX_MODE_P (omode))
{
if (! (known_eq (isize, osize)
/* LRA can use subreg to store a floating point value in

View File

@ -738,13 +738,16 @@ store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
no other way of implementing the operation. If FALLBACK_P is false,
return false instead. */
return false instead.
if UNDEFINED_P is true then STR_RTX is undefined and may be set using
a subreg instead. */
static bool
store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
poly_uint64 bitregion_start, poly_uint64 bitregion_end,
machine_mode fieldmode,
rtx value, bool reverse, bool fallback_p)
rtx value, bool reverse, bool fallback_p, bool undefined_p)
{
rtx op0 = str_rtx;
@ -805,8 +808,9 @@ store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
return true;
}
}
else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
&& multiple_p (bitsize, regsize * BITS_PER_UNIT)
else if (((constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
&& multiple_p (bitsize, regsize * BITS_PER_UNIT))
|| undefined_p)
&& known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
{
sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
@ -869,7 +873,7 @@ store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
GET_MODE_SIZE (GET_MODE (op0)));
emit_move_insn (temp, op0);
store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
reverse, fallback_p);
reverse, fallback_p, undefined_p);
emit_move_insn (op0, temp);
return true;
}
@ -994,7 +998,7 @@ store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
bitnum + bit_offset,
bitregion_start, bitregion_end,
word_mode,
value_word, reverse, fallback_p))
value_word, reverse, fallback_p, false))
{
delete_insns_since (last);
return false;
@ -1084,7 +1088,7 @@ store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
rtx tempreg = copy_to_reg (xop0);
if (store_bit_field_1 (tempreg, bitsize, bitpos,
bitregion_start, bitregion_end,
fieldmode, orig_value, reverse, false))
fieldmode, orig_value, reverse, false, false))
{
emit_move_insn (xop0, tempreg);
return true;
@ -1112,13 +1116,15 @@ store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
If REVERSE is true, the store is to be done in reverse order. */
If REVERSE is true, the store is to be done in reverse order.
If UNDEFINED_P is true then STR_RTX is currently undefined. */
void
store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
poly_uint64 bitregion_start, poly_uint64 bitregion_end,
machine_mode fieldmode,
rtx value, bool reverse)
rtx value, bool reverse, bool undefined_p)
{
/* Handle -fstrict-volatile-bitfields in the cases where it applies. */
unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
@ -1151,7 +1157,7 @@ store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
temp = copy_to_reg (str_rtx);
if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
int_mode, value, reverse, true))
int_mode, value, reverse, true, undefined_p))
gcc_unreachable ();
emit_move_insn (str_rtx, temp);
@ -1186,7 +1192,7 @@ store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
bitregion_start, bitregion_end,
fieldmode, value, reverse, true))
fieldmode, value, reverse, true, undefined_p))
gcc_unreachable ();
}

View File

@ -715,7 +715,7 @@ extern rtx expand_divmod (int, enum tree_code, machine_mode, rtx, rtx,
extern void store_bit_field (rtx, poly_uint64, poly_uint64,
poly_uint64, poly_uint64,
machine_mode, rtx, bool);
machine_mode, rtx, bool, bool);
extern rtx extract_bit_field (rtx, poly_uint64, poly_uint64, int, rtx,
machine_mode, machine_mode, bool, rtx *);
extern rtx extract_low_bits (machine_mode, machine_mode, rtx);

View File

@ -2891,7 +2891,7 @@ emit_group_store (rtx orig_dst, rtx src, tree type ATTRIBUTE_UNUSED,
store_bit_field (dest,
adj_bytelen * BITS_PER_UNIT, bytepos * BITS_PER_UNIT,
bytepos * BITS_PER_UNIT, ssize * BITS_PER_UNIT - 1,
VOIDmode, tmps[i], false);
VOIDmode, tmps[i], false, false);
}
/* Optimize the access just a bit. */
@ -2905,7 +2905,7 @@ emit_group_store (rtx orig_dst, rtx src, tree type ATTRIBUTE_UNUSED,
else
store_bit_field (dest, bytelen * BITS_PER_UNIT, bytepos * BITS_PER_UNIT,
0, 0, mode, tmps[i], false);
0, 0, mode, tmps[i], false, false);
}
/* Copy from the pseudo into the (probable) hard reg. */
@ -3038,7 +3038,7 @@ copy_blkmode_from_reg (rtx target, rtx srcreg, tree type)
xbitpos % BITS_PER_WORD, 1,
NULL_RTX, copy_mode, copy_mode,
false, NULL),
false);
false, false);
}
}
@ -3140,7 +3140,7 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
bitpos % BITS_PER_WORD, 1,
NULL_RTX, word_mode, word_mode,
false, NULL),
false);
false, false);
}
if (mode == BLKmode)
@ -3308,8 +3308,8 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
zero = CONST0_RTX (GET_MODE_INNER (mode));
if (zero != NULL)
{
write_complex_part (object, zero, 0);
write_complex_part (object, zero, 1);
write_complex_part (object, zero, 0, true);
write_complex_part (object, zero, 1, false);
return NULL;
}
}
@ -3470,10 +3470,11 @@ set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
/* Write to one of the components of the complex value CPLX. Write VAL to
the real part if IMAG_P is false, and the imaginary part if its true. */
the real part if IMAG_P is false, and the imaginary part if its true.
If UNDEFINED_P then the value in CPLX is currently undefined. */
void
write_complex_part (rtx cplx, rtx val, bool imag_p)
write_complex_part (rtx cplx, rtx val, bool imag_p, bool undefined_p)
{
machine_mode cmode;
scalar_mode imode;
@ -3528,7 +3529,7 @@ write_complex_part (rtx cplx, rtx val, bool imag_p)
}
store_bit_field (cplx, ibitsize, imag_p ? ibitsize : 0, 0, 0, imode, val,
false);
false, undefined_p);
}
/* Extract one of the components of the complex value CPLX. Extract the
@ -3781,8 +3782,8 @@ emit_move_complex_parts (rtx x, rtx y)
&& REG_P (x) && !reg_overlap_mentioned_p (x, y))
emit_clobber (x);
write_complex_part (x, read_complex_part (y, false), false);
write_complex_part (x, read_complex_part (y, true), true);
write_complex_part (x, read_complex_part (y, false), false, true);
write_complex_part (x, read_complex_part (y, true), true, false);
return get_last_insn ();
}
@ -5441,7 +5442,7 @@ expand_assignment (tree to, tree from, bool nontemporal)
}
else
store_bit_field (mem, GET_MODE_BITSIZE (mode), 0, 0, 0, mode, reg,
false);
false, false);
return;
}
@ -5663,8 +5664,8 @@ expand_assignment (tree to, tree from, bool nontemporal)
concat_store_slow:;
rtx temp = assign_stack_temp (GET_MODE (to_rtx),
GET_MODE_SIZE (GET_MODE (to_rtx)));
write_complex_part (temp, XEXP (to_rtx, 0), false);
write_complex_part (temp, XEXP (to_rtx, 1), true);
write_complex_part (temp, XEXP (to_rtx, 0), false, true);
write_complex_part (temp, XEXP (to_rtx, 1), true, false);
result = store_field (temp, bitsize, bitpos,
bitregion_start, bitregion_end,
mode1, from, get_alias_set (to),
@ -6222,7 +6223,8 @@ store_expr (tree exp, rtx target, int call_param_p,
store_bit_field (target,
rtx_to_poly_int64 (expr_size (exp))
* BITS_PER_UNIT,
0, 0, 0, GET_MODE (temp), temp, reverse);
0, 0, 0, GET_MODE (temp), temp, reverse,
false);
}
else
convert_move (target, temp, TYPE_UNSIGNED (TREE_TYPE (exp)));
@ -7631,7 +7633,7 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos,
gcc_checking_assert (known_ge (bitpos, 0));
store_bit_field (target, bitsize, bitpos,
bitregion_start, bitregion_end,
mode, temp, reverse);
mode, temp, reverse, false);
return const0_rtx;
}
@ -10120,8 +10122,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
complex_expr_swap_order:
/* Move the imaginary (op1) and real (op0) parts to their
location. */
write_complex_part (target, op1, true);
write_complex_part (target, op0, false);
write_complex_part (target, op1, true, true);
write_complex_part (target, op0, false, false);
return target;
}
@ -10150,8 +10152,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
}
/* Move the real (op0) and imaginary (op1) parts to their location. */
write_complex_part (target, op0, false);
write_complex_part (target, op1, true);
write_complex_part (target, op0, false, true);
write_complex_part (target, op1, true, false);
return target;
@ -10390,7 +10392,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
rtx dst = gen_reg_rtx (mode);
emit_move_insn (dst, op0);
store_bit_field (dst, bitsize, bitpos, 0, 0,
TYPE_MODE (TREE_TYPE (treeop1)), op1, false);
TYPE_MODE (TREE_TYPE (treeop1)), op1, false, false);
return dst;
}

View File

@ -262,7 +262,7 @@ extern rtx_insn *emit_move_insn_1 (rtx, rtx);
extern rtx_insn *emit_move_complex_push (machine_mode, rtx, rtx);
extern rtx_insn *emit_move_complex_parts (rtx, rtx);
extern rtx read_complex_part (rtx, bool);
extern void write_complex_part (rtx, rtx, bool);
extern void write_complex_part (rtx, rtx, bool, bool);
extern rtx read_complex_part (rtx, bool);
extern rtx emit_move_resolve_push (machine_mode, rtx);

View File

@ -999,7 +999,8 @@ noce_emit_move_insn (rtx x, rtx y)
}
gcc_assert (start < (MEM_P (op) ? BITS_PER_UNIT : BITS_PER_WORD));
store_bit_field (op, size, start, 0, 0, GET_MODE (x), y, false);
store_bit_field (op, size, start, 0, 0, GET_MODE (x), y, false,
false);
return;
}
@ -1056,7 +1057,7 @@ noce_emit_move_insn (rtx x, rtx y)
outmode = GET_MODE (outer);
bitpos = SUBREG_BYTE (outer) * BITS_PER_UNIT;
store_bit_field (inner, GET_MODE_BITSIZE (outmode), bitpos,
0, 0, outmode, y, false);
0, 0, outmode, y, false, false);
}
/* Return the CC reg if it is used in COND. */

View File

@ -815,9 +815,9 @@ expand_arith_set_overflow (tree lhs, rtx target)
{
if (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (lhs))) == 1
&& !TYPE_UNSIGNED (TREE_TYPE (TREE_TYPE (lhs))))
write_complex_part (target, constm1_rtx, true);
write_complex_part (target, constm1_rtx, true, false);
else
write_complex_part (target, const1_rtx, true);
write_complex_part (target, const1_rtx, true, false);
}
/* Helper for expand_*_overflow. Store RES into the __real__ part
@ -872,7 +872,7 @@ expand_arith_overflow_result_store (tree lhs, rtx target,
expand_arith_set_overflow (lhs, target);
emit_label (done_label);
}
write_complex_part (target, lres, false);
write_complex_part (target, lres, false, false);
}
/* Helper for expand_*_overflow. Store RES into TARGET. */
@ -917,7 +917,7 @@ expand_addsub_overflow (location_t loc, tree_code code, tree lhs,
{
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (!is_ubsan)
write_complex_part (target, const0_rtx, true);
write_complex_part (target, const0_rtx, true, false);
}
/* We assume both operands and result have the same precision
@ -1362,7 +1362,7 @@ expand_neg_overflow (location_t loc, tree lhs, tree arg1, bool is_ubsan,
{
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (!is_ubsan)
write_complex_part (target, const0_rtx, true);
write_complex_part (target, const0_rtx, true, false);
}
enum insn_code icode = optab_handler (negv3_optab, mode);
@ -1487,7 +1487,7 @@ expand_mul_overflow (location_t loc, tree lhs, tree arg0, tree arg1,
{
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (!is_ubsan)
write_complex_part (target, const0_rtx, true);
write_complex_part (target, const0_rtx, true, false);
}
if (is_ubsan)
@ -2304,7 +2304,7 @@ expand_mul_overflow (location_t loc, tree lhs, tree arg0, tree arg1,
do_compare_rtx_and_jump (op1, res, NE, true, mode, NULL_RTX, NULL,
all_done_label, profile_probability::very_unlikely ());
emit_label (set_noovf);
write_complex_part (target, const0_rtx, true);
write_complex_part (target, const0_rtx, true, false);
emit_label (all_done_label);
}
@ -2573,7 +2573,7 @@ expand_arith_overflow (enum tree_code code, gimple *stmt)
{
/* The infinity precision result will always fit into result. */
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
write_complex_part (target, const0_rtx, true);
write_complex_part (target, const0_rtx, true, false);
scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
struct separate_ops ops;
ops.code = code;

View File

@ -0,0 +1,37 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
/*
** _Z1fii:
** ...
** bfi x0, x1, 32, 32
** ret
*/
_Complex int f(int a, int b) {
_Complex int t = a + b * 1i;
return t;
}
/*
** _Z2f2ii:
** ...
** bfi x0, x1, 32, 32
** ret
*/
_Complex int f2(int a, int b) {
_Complex int t = {a, b};
return t;
}
/*
** _Z12f_convolutedii:
** ...
** bfi x0, x1, 32, 32
** ret
*/
_Complex int f_convoluted(int a, int b) {
_Complex int t = (_Complex int)a;
__imag__ t = b;
return t;
}