re PR rtl-optimization/59461 (missed zero-extension elimination in the combiner)

PR rtl-optimization/59461
	* doc/rtl.texi (paradoxical subregs): Add missing word.
	* combine.c (reg_nonzero_bits_for_combine): Do not discard results
	in modes with precision larger than that of last_set_mode.
	* rtlanal.c (nonzero_bits1) <SUBREG>: If WORD_REGISTER_OPERATIONS is
	set and LOAD_EXTEND_OP is appropriate, propagate results from inner
	REGs to paradoxical SUBREGs.
	(num_sign_bit_copies1) <SUBREG>: Likewise.  Check that the mode is not
	larger than a word before invoking LOAD_EXTEND_OP on it.

From-SVN: r242326
This commit is contained in:
Eric Botcazou 2016-11-11 22:38:33 +00:00 committed by Eric Botcazou
parent 84971f1bad
commit 8b287aea35
6 changed files with 64 additions and 32 deletions

View File

@ -1,3 +1,15 @@
2016-11-11 Eric Botcazou <ebotcazou@adacore.com>
PR rtl-optimization/59461
* doc/rtl.texi (paradoxical subregs): Add missing word.
* combine.c (reg_nonzero_bits_for_combine): Do not discard results
in modes with precision larger than that of last_set_mode.
* rtlanal.c (nonzero_bits1) <SUBREG>: If WORD_REGISTER_OPERATIONS is
set and LOAD_EXTEND_OP is appropriate, propagate results from inner
REGs to paradoxical SUBREGs.
(num_sign_bit_copies1) <SUBREG>: Likewise. Check that the mode is not
larger than a word before invoking LOAD_EXTEND_OP on it.
2016-11-11 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/78243

View File

@ -9895,18 +9895,17 @@ reg_nonzero_bits_for_combine (const_rtx x, machine_mode mode,
(DF_LR_IN (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb),
REGNO (x)))))
{
unsigned HOST_WIDE_INT mask = rsp->last_set_nonzero_bits;
if (GET_MODE_PRECISION (rsp->last_set_mode) < GET_MODE_PRECISION (mode))
/* We don't know anything about the upper bits. */
mask |= GET_MODE_MASK (mode) ^ GET_MODE_MASK (rsp->last_set_mode);
*nonzero &= mask;
/* Note that, even if the precision of last_set_mode is lower than that
of mode, record_value_for_reg invoked nonzero_bits on the register
with nonzero_bits_mode (because last_set_mode is necessarily integral
and HWI_COMPUTABLE_MODE_P in this case) so bits in nonzero_bits_mode
are all valid, hence in mode too since nonzero_bits_mode is defined
to the largest HWI_COMPUTABLE_MODE_P mode. */
*nonzero &= rsp->last_set_nonzero_bits;
return NULL;
}
tem = get_last_value (x);
if (tem)
{
if (SHORT_IMMEDIATES_SIGN_EXTEND)
@ -9915,7 +9914,8 @@ reg_nonzero_bits_for_combine (const_rtx x, machine_mode mode,
return tem;
}
else if (nonzero_sign_valid && rsp->nonzero_bits)
if (nonzero_sign_valid && rsp->nonzero_bits)
{
unsigned HOST_WIDE_INT mask = rsp->nonzero_bits;

View File

@ -1882,7 +1882,7 @@ When used as an rvalue, the low-order bits of the @code{subreg} are
taken from @var{reg} while the high-order bits may or may not be
defined.
The high-order bits of rvalues are in the following circumstances:
The high-order bits of rvalues are defined in the following circumstances:
@itemize
@item @code{subreg}s of @code{mem}

View File

@ -4256,7 +4256,7 @@ cached_nonzero_bits (const_rtx x, machine_mode mode, const_rtx known_x,
/* Given an expression, X, compute which bits in X can be nonzero.
We don't care about bits outside of those defined in MODE.
For most X this is simply GET_MODE_MASK (GET_MODE (MODE)), but if X is
For most X this is simply GET_MODE_MASK (GET_MODE (X)), but if X is
an arithmetic operation, we can do better. */
static unsigned HOST_WIDE_INT
@ -4563,18 +4563,17 @@ nonzero_bits1 (const_rtx x, machine_mode mode, const_rtx known_x,
/* If this is a SUBREG formed for a promoted variable that has
been zero-extended, we know that at least the high-order bits
are zero, though others might be too. */
if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_UNSIGNED_P (x))
nonzero = GET_MODE_MASK (GET_MODE (x))
& cached_nonzero_bits (SUBREG_REG (x), GET_MODE (x),
known_x, known_mode, known_ret);
inner_mode = GET_MODE (SUBREG_REG (x));
/* If the inner mode is a single word for both the host and target
machines, we can compute this from which bits of the inner
object might be nonzero. */
inner_mode = GET_MODE (SUBREG_REG (x));
if (GET_MODE_PRECISION (inner_mode) <= BITS_PER_WORD
&& (GET_MODE_PRECISION (inner_mode) <= HOST_BITS_PER_WIDE_INT))
&& GET_MODE_PRECISION (inner_mode) <= HOST_BITS_PER_WIDE_INT)
{
nonzero &= cached_nonzero_bits (SUBREG_REG (x), mode,
known_x, known_mode, known_ret);
@ -4582,19 +4581,17 @@ nonzero_bits1 (const_rtx x, machine_mode mode, const_rtx known_x,
/* On many CISC machines, accessing an object in a wider mode
causes the high-order bits to become undefined. So they are
not known to be zero. */
if (!WORD_REGISTER_OPERATIONS
/* If this is a typical RISC machine, we only have to worry
about the way loads are extended. */
|| ((LOAD_EXTEND_OP (inner_mode) == SIGN_EXTEND
? val_signbit_known_set_p (inner_mode, nonzero)
: LOAD_EXTEND_OP (inner_mode) != ZERO_EXTEND)
|| !MEM_P (SUBREG_REG (x))))
{
if (GET_MODE_PRECISION (GET_MODE (x))
if ((!WORD_REGISTER_OPERATIONS
/* If this is a typical RISC machine, we only have to worry
about the way loads are extended. */
|| (LOAD_EXTEND_OP (inner_mode) == SIGN_EXTEND
? val_signbit_known_set_p (inner_mode, nonzero)
: LOAD_EXTEND_OP (inner_mode) != ZERO_EXTEND)
|| (!MEM_P (SUBREG_REG (x)) && !REG_P (SUBREG_REG (x))))
&& GET_MODE_PRECISION (GET_MODE (x))
> GET_MODE_PRECISION (inner_mode))
nonzero |= (GET_MODE_MASK (GET_MODE (x))
& ~GET_MODE_MASK (inner_mode));
}
nonzero
|= (GET_MODE_MASK (GET_MODE (x)) & ~GET_MODE_MASK (inner_mode));
}
break;
@ -4799,6 +4796,7 @@ num_sign_bit_copies1 (const_rtx x, machine_mode mode, const_rtx known_x,
{
enum rtx_code code = GET_CODE (x);
unsigned int bitwidth = GET_MODE_PRECISION (mode);
machine_mode inner_mode;
int num0, num1, result;
unsigned HOST_WIDE_INT nonzero;
@ -4906,13 +4904,13 @@ num_sign_bit_copies1 (const_rtx x, machine_mode mode, const_rtx known_x,
}
/* For a smaller object, just ignore the high bits. */
if (bitwidth <= GET_MODE_PRECISION (GET_MODE (SUBREG_REG (x))))
inner_mode = GET_MODE (SUBREG_REG (x));
if (bitwidth <= GET_MODE_PRECISION (inner_mode))
{
num0 = cached_num_sign_bit_copies (SUBREG_REG (x), VOIDmode,
known_x, known_mode, known_ret);
return MAX (1, (num0
- (int) (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (x)))
- bitwidth)));
return
MAX (1, num0 - (int) (GET_MODE_PRECISION (inner_mode) - bitwidth));
}
/* For paradoxical SUBREGs on machines where all register operations
@ -4926,9 +4924,10 @@ num_sign_bit_copies1 (const_rtx x, machine_mode mode, const_rtx known_x,
to the stack. */
if (WORD_REGISTER_OPERATIONS
&& GET_MODE_PRECISION (inner_mode) <= BITS_PER_WORD
&& LOAD_EXTEND_OP (inner_mode) == SIGN_EXTEND
&& paradoxical_subreg_p (x)
&& LOAD_EXTEND_OP (GET_MODE (SUBREG_REG (x))) == SIGN_EXTEND
&& MEM_P (SUBREG_REG (x)))
&& (MEM_P (SUBREG_REG (x)) || REG_P (SUBREG_REG (x))))
return cached_num_sign_bit_copies (SUBREG_REG (x), mode,
known_x, known_mode, known_ret);
break;

View File

@ -1,3 +1,7 @@
2016-11-11 Eric Botcazou <ebotcazou@adacore.com>
* gcc.target/sparc/20161111-1.c: New test.
2016-11-11 Uros Bizjak <ubizjak@gmail.com>
PR target/78310

View File

@ -0,0 +1,17 @@
/* PR rtl-optimization/59461 */
/* { dg-do compile } */
/* { dg-options "-O2" } */
extern char zeb_test_array[10];
unsigned char ee_isdigit2(unsigned int i)
{
unsigned char c = zeb_test_array[i];
unsigned char retval;
retval = ((c>='0') & (c<='9')) ? 1 : 0;
return retval;
}
/* { dg-final { scan-assembler-not "and\t%" } } */