tree-optimization/98834 - fix optimization regression with _b_c_p

The following makes FRE optimize a load we formerly required
SRA + CCP for which now run after we get rid of all __builtin_constant_p
calls.

2021-03-15  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/98834
	* tree-ssa-sccvn.c (vn_reference_lookup_3): Handle missing
	subsetting by truncating the access size.

	* g++.dg/opt/pr98834.C: New testcase.
This commit is contained in:
Richard Biener 2021-03-15 13:44:07 +01:00
parent dce586ff83
commit 99415d0f18
2 changed files with 82 additions and 1 deletions

View File

@ -0,0 +1,71 @@
/* { dg-do compile } */
/* { dg-require-effective-target c++17 } */
/* { dg-options "-O2 -fdump-tree-fre3" } */
struct _Base
{
int _M_data = 0;
};
struct _Wrapper : _Base
{
_Wrapper(int) {}
bool _M_is_constprop() { return __builtin_constant_p(_M_data); }
};
struct _Impl
{
_Wrapper _S_multiplies(_Wrapper __x, _Wrapper __y)
{
if (__x._M_is_constprop() || __y._M_is_constprop())
return __y;
return 0;
}
};
struct _TupleData
{
_Wrapper first;
int second;
};
struct _Tuple : _TupleData
{
template <typename _Fp>
_Tuple _M_apply_per_chunk(_Fp __fun, _Tuple __y)
{
return {__fun(first, __y.first), second};
}
};
struct _ImplFixed
{
static _Tuple _S_multiplies(_Tuple __x, _Tuple __y)
{
return __x._M_apply_per_chunk(
[]( auto __xx, auto __yy) {
return _Impl()._S_multiplies(__xx, __yy);
},
__y);
}
};
class simd
{
public:
[[__gnu__::__always_inline__]] friend simd operator*(simd __x, simd __y)
{ return _ImplFixed::_S_multiplies(__x._M_data, __y._M_data); }
simd(_Tuple __init) : _M_data(__init) {}
_Tuple _M_data;
};
int main()
{
simd({0, 0}) * simd({0, 0});
}
/* FRE3 should elide all conditionals in the remaining main. */
/* { dg-final { scan-tree-dump-times "<bb" 1 "fre3" } } */

View File

@ -3215,7 +3215,17 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
return (void *)-1;
/* This can happen with bitfields. */
if (maybe_ne (ref->size, r.size))
return (void *)-1;
{
/* If the access lacks some subsetting simply apply that by
shortening it. That in the end can only be successful
if we can pun the lookup result which in turn requires
exact offsets. */
if (known_eq (r.size, r.max_size)
&& known_lt (ref->size, r.size))
r.size = r.max_size = ref->size;
else
return (void *)-1;
}
*ref = r;
/* Do not update last seen VUSE after translating. */