Implement more rtx vector folds on variable-length vectors
This patch extends the tree-level folding of variable-length vectors so that it can also be used on rtxes. The first step is to move the tree_vector_builder new_unary/binary_operator routines to the parent vector_builder class (which in turn means adding a new template parameter). The second step is to make simplify-rtx.c use a direct rtx analogue of the VECTOR_CST handling in fold-const.c. 2019-07-29 Richard Sandiford <richard.sandiford@arm.com> gcc/ * vector-builder.h (vector_builder): Add a shape template parameter. (vector_builder::new_unary_operation): New function, generalizing the old tree_vector_builder function. (vector_builder::new_binary_operation): Likewise. (vector_builder::binary_encoded_nelts): Likewise. * int-vector-builder.h (int_vector_builder): Update template parameters to vector_builder. (int_vector_builder::shape_nelts): New function. * rtx-vector-builder.h (rtx_vector_builder): Update template parameters to vector_builder. (rtx_vector_builder::shape_nelts): New function. (rtx_vector_builder::nelts_of): Likewise. (rtx_vector_builder::npatterns_of): Likewise. (rtx_vector_builder::nelts_per_pattern_of): Likewise. * tree-vector-builder.h (tree_vector_builder): Update template parameters to vector_builder. (tree_vector_builder::shape_nelts): New function. (tree_vector_builder::nelts_of): Likewise. (tree_vector_builder::npatterns_of): Likewise. (tree_vector_builder::nelts_per_pattern_of): Likewise. * tree-vector-builder.c (tree_vector_builder::new_unary_operation) (tree_vector_builder::new_binary_operation): Delete. (tree_vector_builder::binary_encoded_nelts): Likewise. * simplify-rtx.c: Include rtx-vector-builder.h. (distributes_over_addition_p): New function. (simplify_const_unary_operation) (simplify_const_binary_operation): Generalize handling of vector constants to include variable-length vectors. (test_vector_ops_series): Add more tests. From-SVN: r273867
This commit is contained in:
parent
66fafc3bf6
commit
4ce6ab6889
@ -1,3 +1,35 @@
|
||||
2019-07-29 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* vector-builder.h (vector_builder): Add a shape template parameter.
|
||||
(vector_builder::new_unary_operation): New function, generalizing
|
||||
the old tree_vector_builder function.
|
||||
(vector_builder::new_binary_operation): Likewise.
|
||||
(vector_builder::binary_encoded_nelts): Likewise.
|
||||
* int-vector-builder.h (int_vector_builder): Update template
|
||||
parameters to vector_builder.
|
||||
(int_vector_builder::shape_nelts): New function.
|
||||
* rtx-vector-builder.h (rtx_vector_builder): Update template
|
||||
parameters to vector_builder.
|
||||
(rtx_vector_builder::shape_nelts): New function.
|
||||
(rtx_vector_builder::nelts_of): Likewise.
|
||||
(rtx_vector_builder::npatterns_of): Likewise.
|
||||
(rtx_vector_builder::nelts_per_pattern_of): Likewise.
|
||||
* tree-vector-builder.h (tree_vector_builder): Update template
|
||||
parameters to vector_builder.
|
||||
(tree_vector_builder::shape_nelts): New function.
|
||||
(tree_vector_builder::nelts_of): Likewise.
|
||||
(tree_vector_builder::npatterns_of): Likewise.
|
||||
(tree_vector_builder::nelts_per_pattern_of): Likewise.
|
||||
* tree-vector-builder.c (tree_vector_builder::new_unary_operation)
|
||||
(tree_vector_builder::new_binary_operation): Delete.
|
||||
(tree_vector_builder::binary_encoded_nelts): Likewise.
|
||||
* simplify-rtx.c: Include rtx-vector-builder.h.
|
||||
(distributes_over_addition_p): New function.
|
||||
(simplify_const_unary_operation)
|
||||
(simplify_const_binary_operation): Generalize handling of vector
|
||||
constants to include variable-length vectors.
|
||||
(test_vector_ops_series): Add more tests.
|
||||
|
||||
2019-07-28 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
PR lto/91222
|
||||
|
@ -26,10 +26,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
encoding as tree and rtx constants. See vector_builder for more
|
||||
details. */
|
||||
template<typename T>
|
||||
class int_vector_builder : public vector_builder<T, int_vector_builder<T> >
|
||||
class int_vector_builder : public vector_builder<T, poly_uint64,
|
||||
int_vector_builder<T> >
|
||||
{
|
||||
typedef vector_builder<T, int_vector_builder> parent;
|
||||
friend class vector_builder<T, int_vector_builder>;
|
||||
typedef vector_builder<T, poly_uint64, int_vector_builder> parent;
|
||||
friend class vector_builder<T, poly_uint64, int_vector_builder>;
|
||||
|
||||
public:
|
||||
int_vector_builder () {}
|
||||
@ -45,6 +46,8 @@ private:
|
||||
T apply_step (T, unsigned int, T) const;
|
||||
bool can_elide_p (T) const { return true; }
|
||||
void note_representative (T *, T) {}
|
||||
|
||||
static poly_uint64 shape_nelts (poly_uint64 x) { return x; }
|
||||
};
|
||||
|
||||
/* Create a new builder for a vector with FULL_NELTS elements.
|
||||
|
@ -24,10 +24,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
/* This class is used to build VECTOR_CSTs from a sequence of elements.
|
||||
See vector_builder for more details. */
|
||||
class rtx_vector_builder : public vector_builder<rtx, rtx_vector_builder>
|
||||
class rtx_vector_builder : public vector_builder<rtx, machine_mode,
|
||||
rtx_vector_builder>
|
||||
{
|
||||
typedef vector_builder<rtx, rtx_vector_builder> parent;
|
||||
friend class vector_builder<rtx, rtx_vector_builder>;
|
||||
typedef vector_builder<rtx, machine_mode, rtx_vector_builder> parent;
|
||||
friend class vector_builder<rtx, machine_mode, rtx_vector_builder>;
|
||||
|
||||
public:
|
||||
rtx_vector_builder () : m_mode (VOIDmode) {}
|
||||
@ -48,6 +49,15 @@ private:
|
||||
bool can_elide_p (rtx) const { return true; }
|
||||
void note_representative (rtx *, rtx) {}
|
||||
|
||||
static poly_uint64 shape_nelts (machine_mode mode)
|
||||
{ return GET_MODE_NUNITS (mode); }
|
||||
static poly_uint64 nelts_of (const_rtx x)
|
||||
{ return CONST_VECTOR_NUNITS (x); }
|
||||
static unsigned int npatterns_of (const_rtx x)
|
||||
{ return CONST_VECTOR_NPATTERNS (x); }
|
||||
static unsigned int nelts_per_pattern_of (const_rtx x)
|
||||
{ return CONST_VECTOR_NELTS_PER_PATTERN (x); }
|
||||
|
||||
rtx find_cached_value ();
|
||||
|
||||
machine_mode m_mode;
|
||||
|
@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "flags.h"
|
||||
#include "selftest.h"
|
||||
#include "selftest-rtl.h"
|
||||
#include "rtx-vector-builder.h"
|
||||
|
||||
/* Simplification and canonicalization of RTL. */
|
||||
|
||||
@ -1753,27 +1754,23 @@ simplify_const_unary_operation (enum rtx_code code, machine_mode mode,
|
||||
|
||||
if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
|
||||
{
|
||||
unsigned int n_elts;
|
||||
if (!CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
|
||||
return NULL_RTX;
|
||||
gcc_assert (GET_MODE (op) == op_mode);
|
||||
|
||||
machine_mode opmode = GET_MODE (op);
|
||||
gcc_assert (known_eq (GET_MODE_NUNITS (mode), n_elts));
|
||||
gcc_assert (known_eq (GET_MODE_NUNITS (opmode), n_elts));
|
||||
rtx_vector_builder builder;
|
||||
if (!builder.new_unary_operation (mode, op, false))
|
||||
return 0;
|
||||
|
||||
rtvec v = rtvec_alloc (n_elts);
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < n_elts; i++)
|
||||
unsigned int count = builder.encoded_nelts ();
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
{
|
||||
rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode),
|
||||
CONST_VECTOR_ELT (op, i),
|
||||
GET_MODE_INNER (opmode));
|
||||
GET_MODE_INNER (op_mode));
|
||||
if (!x || !valid_for_const_vector_p (mode, x))
|
||||
return 0;
|
||||
RTVEC_ELT (v, i) = x;
|
||||
builder.quick_push (x);
|
||||
}
|
||||
return gen_rtx_CONST_VECTOR (mode, v);
|
||||
return builder.build ();
|
||||
}
|
||||
|
||||
/* The order of these tests is critical so that, for example, we don't
|
||||
@ -4059,6 +4056,27 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return true if binary operation OP distributes over addition in operand
|
||||
OPNO, with the other operand being held constant. OPNO counts from 1. */
|
||||
|
||||
static bool
|
||||
distributes_over_addition_p (rtx_code op, int opno)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case PLUS:
|
||||
case MINUS:
|
||||
case MULT:
|
||||
return true;
|
||||
|
||||
case ASHIFT:
|
||||
return opno == 1;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
rtx
|
||||
simplify_const_binary_operation (enum rtx_code code, machine_mode mode,
|
||||
rtx op0, rtx op1)
|
||||
@ -4068,26 +4086,45 @@ simplify_const_binary_operation (enum rtx_code code, machine_mode mode,
|
||||
&& GET_CODE (op0) == CONST_VECTOR
|
||||
&& GET_CODE (op1) == CONST_VECTOR)
|
||||
{
|
||||
unsigned int n_elts;
|
||||
if (!CONST_VECTOR_NUNITS (op0).is_constant (&n_elts))
|
||||
return NULL_RTX;
|
||||
bool step_ok_p;
|
||||
if (CONST_VECTOR_STEPPED_P (op0)
|
||||
&& CONST_VECTOR_STEPPED_P (op1))
|
||||
/* We can operate directly on the encoding if:
|
||||
|
||||
gcc_assert (known_eq (n_elts, CONST_VECTOR_NUNITS (op1)));
|
||||
gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode)));
|
||||
rtvec v = rtvec_alloc (n_elts);
|
||||
unsigned int i;
|
||||
a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1
|
||||
implies
|
||||
(a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)
|
||||
|
||||
for (i = 0; i < n_elts; i++)
|
||||
Addition and subtraction are the supported operators
|
||||
for which this is true. */
|
||||
step_ok_p = (code == PLUS || code == MINUS);
|
||||
else if (CONST_VECTOR_STEPPED_P (op0))
|
||||
/* We can operate directly on stepped encodings if:
|
||||
|
||||
a3 - a2 == a2 - a1
|
||||
implies:
|
||||
(a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)
|
||||
|
||||
which is true if (x -> x op c) distributes over addition. */
|
||||
step_ok_p = distributes_over_addition_p (code, 1);
|
||||
else
|
||||
/* Similarly in reverse. */
|
||||
step_ok_p = distributes_over_addition_p (code, 2);
|
||||
rtx_vector_builder builder;
|
||||
if (!builder.new_binary_operation (mode, op0, op1, step_ok_p))
|
||||
return 0;
|
||||
|
||||
unsigned int count = builder.encoded_nelts ();
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
{
|
||||
rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode),
|
||||
CONST_VECTOR_ELT (op0, i),
|
||||
CONST_VECTOR_ELT (op1, i));
|
||||
if (!x || !valid_for_const_vector_p (mode, x))
|
||||
return 0;
|
||||
RTVEC_ELT (v, i) = x;
|
||||
builder.quick_push (x);
|
||||
}
|
||||
|
||||
return gen_rtx_CONST_VECTOR (mode, v);
|
||||
return builder.build ();
|
||||
}
|
||||
|
||||
if (VECTOR_MODE_P (mode)
|
||||
@ -7012,6 +7049,58 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg)
|
||||
ASSERT_RTX_EQ (series_0_m1,
|
||||
simplify_binary_operation (VEC_SERIES, mode, const0_rtx,
|
||||
constm1_rtx));
|
||||
|
||||
/* Test NEG on constant vector series. */
|
||||
ASSERT_RTX_EQ (series_0_m1,
|
||||
simplify_unary_operation (NEG, mode, series_0_1, mode));
|
||||
ASSERT_RTX_EQ (series_0_1,
|
||||
simplify_unary_operation (NEG, mode, series_0_m1, mode));
|
||||
|
||||
/* Test PLUS and MINUS on constant vector series. */
|
||||
rtx scalar2 = gen_int_mode (2, inner_mode);
|
||||
rtx scalar3 = gen_int_mode (3, inner_mode);
|
||||
rtx series_1_1 = gen_const_vec_series (mode, const1_rtx, const1_rtx);
|
||||
rtx series_0_2 = gen_const_vec_series (mode, const0_rtx, scalar2);
|
||||
rtx series_1_3 = gen_const_vec_series (mode, const1_rtx, scalar3);
|
||||
ASSERT_RTX_EQ (series_1_1,
|
||||
simplify_binary_operation (PLUS, mode, series_0_1,
|
||||
CONST1_RTX (mode)));
|
||||
ASSERT_RTX_EQ (series_0_m1,
|
||||
simplify_binary_operation (PLUS, mode, CONST0_RTX (mode),
|
||||
series_0_m1));
|
||||
ASSERT_RTX_EQ (series_1_3,
|
||||
simplify_binary_operation (PLUS, mode, series_1_1,
|
||||
series_0_2));
|
||||
ASSERT_RTX_EQ (series_0_1,
|
||||
simplify_binary_operation (MINUS, mode, series_1_1,
|
||||
CONST1_RTX (mode)));
|
||||
ASSERT_RTX_EQ (series_1_1,
|
||||
simplify_binary_operation (MINUS, mode, CONST1_RTX (mode),
|
||||
series_0_m1));
|
||||
ASSERT_RTX_EQ (series_1_1,
|
||||
simplify_binary_operation (MINUS, mode, series_1_3,
|
||||
series_0_2));
|
||||
|
||||
/* Test MULT between constant vectors. */
|
||||
rtx vec2 = gen_const_vec_duplicate (mode, scalar2);
|
||||
rtx vec3 = gen_const_vec_duplicate (mode, scalar3);
|
||||
rtx scalar9 = gen_int_mode (9, inner_mode);
|
||||
rtx series_3_9 = gen_const_vec_series (mode, scalar3, scalar9);
|
||||
ASSERT_RTX_EQ (series_0_2,
|
||||
simplify_binary_operation (MULT, mode, series_0_1, vec2));
|
||||
ASSERT_RTX_EQ (series_3_9,
|
||||
simplify_binary_operation (MULT, mode, vec3, series_1_3));
|
||||
if (!GET_MODE_NUNITS (mode).is_constant ())
|
||||
ASSERT_FALSE (simplify_binary_operation (MULT, mode, series_0_1,
|
||||
series_0_1));
|
||||
|
||||
/* Test ASHIFT between constant vectors. */
|
||||
ASSERT_RTX_EQ (series_0_2,
|
||||
simplify_binary_operation (ASHIFT, mode, series_0_1,
|
||||
CONST1_RTX (mode)));
|
||||
if (!GET_MODE_NUNITS (mode).is_constant ())
|
||||
ASSERT_FALSE (simplify_binary_operation (ASHIFT, mode, CONST1_RTX (mode),
|
||||
series_0_1));
|
||||
}
|
||||
|
||||
/* Verify simplify_merge_mask works correctly. */
|
||||
|
@ -24,103 +24,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "fold-const.h"
|
||||
#include "tree-vector-builder.h"
|
||||
|
||||
/* Try to start building a new vector of type TYPE that holds the result of
|
||||
a unary operation on VECTOR_CST T. ALLOW_STEPPED_P is true if the
|
||||
operation can handle stepped encodings directly, without having to
|
||||
expand the full sequence.
|
||||
|
||||
Return true if the operation is possible, which it always is when
|
||||
ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */
|
||||
|
||||
bool
|
||||
tree_vector_builder::new_unary_operation (tree type, tree t,
|
||||
bool allow_stepped_p)
|
||||
{
|
||||
poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type);
|
||||
gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t))));
|
||||
unsigned int npatterns = VECTOR_CST_NPATTERNS (t);
|
||||
unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (t);
|
||||
if (!allow_stepped_p && nelts_per_pattern > 2)
|
||||
{
|
||||
if (!full_nelts.is_constant ())
|
||||
return false;
|
||||
npatterns = full_nelts.to_constant ();
|
||||
nelts_per_pattern = 1;
|
||||
}
|
||||
new_vector (type, npatterns, nelts_per_pattern);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Try to start building a new vector of type TYPE that holds the result of
|
||||
a binary operation on VECTOR_CSTs T1 and T2. ALLOW_STEPPED_P is true if
|
||||
the operation can handle stepped encodings directly, without having to
|
||||
expand the full sequence.
|
||||
|
||||
Return true if the operation is possible. Leave the builder unchanged
|
||||
otherwise. */
|
||||
|
||||
bool
|
||||
tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2,
|
||||
bool allow_stepped_p)
|
||||
{
|
||||
poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type);
|
||||
gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1)))
|
||||
&& known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2))));
|
||||
/* Conceptually we split the patterns in T1 and T2 until we have
|
||||
an equal number for both. Each split pattern requires the same
|
||||
number of elements per pattern as the original. E.g. splitting:
|
||||
|
||||
{ 1, 2, 3, ... }
|
||||
|
||||
into two gives:
|
||||
|
||||
{ 1, 3, 5, ... }
|
||||
{ 2, 4, 6, ... }
|
||||
|
||||
while splitting:
|
||||
|
||||
{ 1, 0, ... }
|
||||
|
||||
into two gives:
|
||||
|
||||
{ 1, 0, ... }
|
||||
{ 0, 0, ... }. */
|
||||
unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),
|
||||
VECTOR_CST_NPATTERNS (t2));
|
||||
unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),
|
||||
VECTOR_CST_NELTS_PER_PATTERN (t2));
|
||||
if (!allow_stepped_p && nelts_per_pattern > 2)
|
||||
{
|
||||
if (!full_nelts.is_constant ())
|
||||
return false;
|
||||
npatterns = full_nelts.to_constant ();
|
||||
nelts_per_pattern = 1;
|
||||
}
|
||||
new_vector (type, npatterns, nelts_per_pattern);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return the number of elements that the caller needs to operate on in
|
||||
order to handle a binary operation on VECTOR_CSTs T1 and T2. This static
|
||||
function is used instead of new_binary_operation if the result of the
|
||||
operation is not a VECTOR_CST. */
|
||||
|
||||
unsigned int
|
||||
tree_vector_builder::binary_encoded_nelts (tree t1, tree t2)
|
||||
{
|
||||
poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1));
|
||||
gcc_assert (known_eq (nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2))));
|
||||
/* See new_binary_operation for details. */
|
||||
unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),
|
||||
VECTOR_CST_NPATTERNS (t2));
|
||||
unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),
|
||||
VECTOR_CST_NELTS_PER_PATTERN (t2));
|
||||
unsigned HOST_WIDE_INT const_nelts;
|
||||
if (nelts.is_constant (&const_nelts))
|
||||
return MIN (npatterns * nelts_per_pattern, const_nelts);
|
||||
return npatterns * nelts_per_pattern;
|
||||
}
|
||||
|
||||
/* Return a vector element with the value BASE + FACTOR * STEP. */
|
||||
|
||||
tree
|
||||
|
@ -24,10 +24,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
/* This class is used to build VECTOR_CSTs from a sequence of elements.
|
||||
See vector_builder for more details. */
|
||||
class tree_vector_builder : public vector_builder<tree, tree_vector_builder>
|
||||
class tree_vector_builder : public vector_builder<tree, tree,
|
||||
tree_vector_builder>
|
||||
{
|
||||
typedef vector_builder<tree, tree_vector_builder> parent;
|
||||
friend class vector_builder<tree, tree_vector_builder>;
|
||||
typedef vector_builder<tree, tree, tree_vector_builder> parent;
|
||||
friend class vector_builder<tree, tree, tree_vector_builder>;
|
||||
|
||||
public:
|
||||
tree_vector_builder () : m_type (0) {}
|
||||
@ -37,10 +38,6 @@ public:
|
||||
tree type () const { return m_type; }
|
||||
|
||||
void new_vector (tree, unsigned int, unsigned int);
|
||||
bool new_unary_operation (tree, tree, bool);
|
||||
bool new_binary_operation (tree, tree, tree, bool);
|
||||
|
||||
static unsigned int binary_encoded_nelts (tree, tree);
|
||||
|
||||
private:
|
||||
bool equal_p (const_tree, const_tree) const;
|
||||
@ -51,6 +48,15 @@ private:
|
||||
bool can_elide_p (const_tree) const;
|
||||
void note_representative (tree *, tree);
|
||||
|
||||
static poly_uint64 shape_nelts (const_tree t)
|
||||
{ return TYPE_VECTOR_SUBPARTS (t); }
|
||||
static poly_uint64 nelts_of (const_tree t)
|
||||
{ return VECTOR_CST_NELTS (t); }
|
||||
static unsigned int npatterns_of (const_tree t)
|
||||
{ return VECTOR_CST_NPATTERNS (t); }
|
||||
static unsigned int nelts_per_pattern_of (const_tree t)
|
||||
{ return VECTOR_CST_NELTS_PER_PATTERN (t); }
|
||||
|
||||
tree m_type;
|
||||
};
|
||||
|
||||
|
@ -45,8 +45,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
variable-length vectors. finalize () then canonicalizes the encoding
|
||||
to a simpler form if possible.
|
||||
|
||||
The derived class Derived provides this functionality for specific Ts.
|
||||
Derived needs to provide the following interface:
|
||||
Shape is the type that specifies the number of elements in the vector
|
||||
and (where relevant) the type of each element.
|
||||
|
||||
The derived class Derived provides the functionality of this class
|
||||
for specific Ts. Derived needs to provide the following interface:
|
||||
|
||||
bool equal_p (T elt1, T elt2) const;
|
||||
|
||||
@ -82,9 +85,30 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
Record that ELT2 is being elided, given that ELT1_PTR points to
|
||||
the last encoded element for the containing pattern. This is
|
||||
again provided for TREE_OVERFLOW handling. */
|
||||
again provided for TREE_OVERFLOW handling.
|
||||
|
||||
template<typename T, typename Derived>
|
||||
static poly_uint64 shape_nelts (Shape shape);
|
||||
|
||||
Return the number of elements in SHAPE.
|
||||
|
||||
The class provides additional functionality for the case in which
|
||||
T can describe a vector constant as well as an individual element.
|
||||
This functionality requires:
|
||||
|
||||
static poly_uint64 nelts_of (T x);
|
||||
|
||||
Return the number of elements in vector constant X.
|
||||
|
||||
static unsigned int npatterns_of (T x);
|
||||
|
||||
Return the number of patterns used to encode vector constant X.
|
||||
|
||||
static unsigned int nelts_per_pattern_of (T x);
|
||||
|
||||
Return the number of elements used to encode each pattern
|
||||
in vector constant X. */
|
||||
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
class vector_builder : public auto_vec<T, 32>
|
||||
{
|
||||
public:
|
||||
@ -101,8 +125,13 @@ public:
|
||||
bool operator == (const Derived &) const;
|
||||
bool operator != (const Derived &x) const { return !operator == (x); }
|
||||
|
||||
bool new_unary_operation (Shape, T, bool);
|
||||
bool new_binary_operation (Shape, T, T, bool);
|
||||
|
||||
void finalize ();
|
||||
|
||||
static unsigned int binary_encoded_nelts (T, T);
|
||||
|
||||
protected:
|
||||
void new_vector (poly_uint64, unsigned int, unsigned int);
|
||||
void reshape (unsigned int, unsigned int);
|
||||
@ -121,16 +150,16 @@ private:
|
||||
unsigned int m_nelts_per_pattern;
|
||||
};
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
inline const Derived *
|
||||
vector_builder<T, Derived>::derived () const
|
||||
vector_builder<T, Shape, Derived>::derived () const
|
||||
{
|
||||
return static_cast<const Derived *> (this);
|
||||
}
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
inline
|
||||
vector_builder<T, Derived>::vector_builder ()
|
||||
vector_builder<T, Shape, Derived>::vector_builder ()
|
||||
: m_full_nelts (0),
|
||||
m_npatterns (0),
|
||||
m_nelts_per_pattern (0)
|
||||
@ -140,18 +169,18 @@ vector_builder<T, Derived>::vector_builder ()
|
||||
starts with these explicitly-encoded elements and may contain additional
|
||||
elided elements. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
inline unsigned int
|
||||
vector_builder<T, Derived>::encoded_nelts () const
|
||||
vector_builder<T, Shape, Derived>::encoded_nelts () const
|
||||
{
|
||||
return m_npatterns * m_nelts_per_pattern;
|
||||
}
|
||||
|
||||
/* Return true if every element of the vector is explicitly encoded. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
inline bool
|
||||
vector_builder<T, Derived>::encoded_full_vector_p () const
|
||||
vector_builder<T, Shape, Derived>::encoded_full_vector_p () const
|
||||
{
|
||||
return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
|
||||
}
|
||||
@ -159,11 +188,11 @@ vector_builder<T, Derived>::encoded_full_vector_p () const
|
||||
/* Start building a vector that has FULL_NELTS elements. Initially
|
||||
encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
void
|
||||
vector_builder<T, Derived>::new_vector (poly_uint64 full_nelts,
|
||||
unsigned int npatterns,
|
||||
unsigned int nelts_per_pattern)
|
||||
vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts,
|
||||
unsigned int npatterns,
|
||||
unsigned int nelts_per_pattern)
|
||||
{
|
||||
m_full_nelts = full_nelts;
|
||||
m_npatterns = npatterns;
|
||||
@ -175,9 +204,9 @@ vector_builder<T, Derived>::new_vector (poly_uint64 full_nelts,
|
||||
/* Return true if this vector and OTHER have the same elements and
|
||||
are encoded in the same way. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
bool
|
||||
vector_builder<T, Derived>::operator == (const Derived &other) const
|
||||
vector_builder<T, Shape, Derived>::operator == (const Derived &other) const
|
||||
{
|
||||
if (maybe_ne (m_full_nelts, other.m_full_nelts)
|
||||
|| m_npatterns != other.m_npatterns
|
||||
@ -195,9 +224,9 @@ vector_builder<T, Derived>::operator == (const Derived &other) const
|
||||
/* Return the value of vector element I, which might or might not be
|
||||
encoded explicitly. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
T
|
||||
vector_builder<T, Derived>::elt (unsigned int i) const
|
||||
vector_builder<T, Shape, Derived>::elt (unsigned int i) const
|
||||
{
|
||||
/* First handle elements that are already present in the underlying
|
||||
vector, regardless of whether they're part of the encoding or not. */
|
||||
@ -225,12 +254,118 @@ vector_builder<T, Derived>::elt (unsigned int i) const
|
||||
derived ()->step (prev, final));
|
||||
}
|
||||
|
||||
/* Try to start building a new vector of shape SHAPE that holds the result of
|
||||
a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the
|
||||
operation can handle stepped encodings directly, without having to expand
|
||||
the full sequence.
|
||||
|
||||
Return true if the operation is possible, which it always is when
|
||||
ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */
|
||||
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
bool
|
||||
vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec,
|
||||
bool allow_stepped_p)
|
||||
{
|
||||
poly_uint64 full_nelts = Derived::shape_nelts (shape);
|
||||
gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec)));
|
||||
unsigned int npatterns = Derived::npatterns_of (vec);
|
||||
unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec);
|
||||
if (!allow_stepped_p && nelts_per_pattern > 2)
|
||||
{
|
||||
if (!full_nelts.is_constant ())
|
||||
return false;
|
||||
npatterns = full_nelts.to_constant ();
|
||||
nelts_per_pattern = 1;
|
||||
}
|
||||
derived ()->new_vector (shape, npatterns, nelts_per_pattern);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Try to start building a new vector of shape SHAPE that holds the result of
|
||||
a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is
|
||||
true if the operation can handle stepped encodings directly, without
|
||||
having to expand the full sequence.
|
||||
|
||||
Return true if the operation is possible. Leave the builder unchanged
|
||||
otherwise. */
|
||||
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
bool
|
||||
vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape,
|
||||
T vec1, T vec2,
|
||||
bool allow_stepped_p)
|
||||
{
|
||||
poly_uint64 full_nelts = Derived::shape_nelts (shape);
|
||||
gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1))
|
||||
&& known_eq (full_nelts, Derived::nelts_of (vec2)));
|
||||
/* Conceptually we split the patterns in VEC1 and VEC2 until we have
|
||||
an equal number for both. Each split pattern requires the same
|
||||
number of elements per pattern as the original. E.g. splitting:
|
||||
|
||||
{ 1, 2, 3, ... }
|
||||
|
||||
into two gives:
|
||||
|
||||
{ 1, 3, 5, ... }
|
||||
{ 2, 4, 6, ... }
|
||||
|
||||
while splitting:
|
||||
|
||||
{ 1, 0, ... }
|
||||
|
||||
into two gives:
|
||||
|
||||
{ 1, 0, ... }
|
||||
{ 0, 0, ... }. */
|
||||
unsigned int npatterns
|
||||
= least_common_multiple (Derived::npatterns_of (vec1),
|
||||
Derived::npatterns_of (vec2));
|
||||
unsigned int nelts_per_pattern
|
||||
= MAX (Derived::nelts_per_pattern_of (vec1),
|
||||
Derived::nelts_per_pattern_of (vec2));
|
||||
if (!allow_stepped_p && nelts_per_pattern > 2)
|
||||
{
|
||||
if (!full_nelts.is_constant ())
|
||||
return false;
|
||||
npatterns = full_nelts.to_constant ();
|
||||
nelts_per_pattern = 1;
|
||||
}
|
||||
derived ()->new_vector (shape, npatterns, nelts_per_pattern);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return the number of elements that the caller needs to operate on in
|
||||
order to handle a binary operation on vector constants VEC1 and VEC2.
|
||||
This static function is used instead of new_binary_operation if the
|
||||
result of the operation is not a constant vector. */
|
||||
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
unsigned int
|
||||
vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2)
|
||||
{
|
||||
poly_uint64 nelts = Derived::nelts_of (vec1);
|
||||
gcc_assert (known_eq (nelts, Derived::nelts_of (vec2)));
|
||||
/* See new_binary_operation for details. */
|
||||
unsigned int npatterns
|
||||
= least_common_multiple (Derived::npatterns_of (vec1),
|
||||
Derived::npatterns_of (vec2));
|
||||
unsigned int nelts_per_pattern
|
||||
= MAX (Derived::nelts_per_pattern_of (vec1),
|
||||
Derived::nelts_per_pattern_of (vec2));
|
||||
unsigned HOST_WIDE_INT const_nelts;
|
||||
if (nelts.is_constant (&const_nelts))
|
||||
return MIN (npatterns * nelts_per_pattern, const_nelts);
|
||||
return npatterns * nelts_per_pattern;
|
||||
}
|
||||
|
||||
/* Return the number of leading duplicate elements in the range
|
||||
[START:END:STEP]. The value is always at least 1. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
unsigned int
|
||||
vector_builder<T, Derived>::count_dups (int start, int end, int step) const
|
||||
vector_builder<T, Shape, Derived>::count_dups (int start, int end,
|
||||
int step) const
|
||||
{
|
||||
gcc_assert ((end - start) % step == 0);
|
||||
|
||||
@ -245,10 +380,10 @@ vector_builder<T, Derived>::count_dups (int start, int end, int step) const
|
||||
/* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
|
||||
but without changing the underlying vector. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
void
|
||||
vector_builder<T, Derived>::reshape (unsigned int npatterns,
|
||||
unsigned int nelts_per_pattern)
|
||||
vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns,
|
||||
unsigned int nelts_per_pattern)
|
||||
{
|
||||
unsigned int old_encoded_nelts = encoded_nelts ();
|
||||
unsigned int new_encoded_nelts = npatterns * nelts_per_pattern;
|
||||
@ -268,11 +403,11 @@ vector_builder<T, Derived>::reshape (unsigned int npatterns,
|
||||
/* Return true if elements [START, END) contain a repeating sequence of
|
||||
STEP elements. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
bool
|
||||
vector_builder<T, Derived>::repeating_sequence_p (unsigned int start,
|
||||
unsigned int end,
|
||||
unsigned int step)
|
||||
vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start,
|
||||
unsigned int end,
|
||||
unsigned int step)
|
||||
{
|
||||
for (unsigned int i = start; i < end - step; ++i)
|
||||
if (!derived ()->equal_p ((*this)[i], (*this)[i + step]))
|
||||
@ -283,11 +418,11 @@ vector_builder<T, Derived>::repeating_sequence_p (unsigned int start,
|
||||
/* Return true if elements [START, END) contain STEP interleaved linear
|
||||
series. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
bool
|
||||
vector_builder<T, Derived>::stepped_sequence_p (unsigned int start,
|
||||
unsigned int end,
|
||||
unsigned int step)
|
||||
vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start,
|
||||
unsigned int end,
|
||||
unsigned int step)
|
||||
{
|
||||
if (!derived ()->allow_steps_p ())
|
||||
return false;
|
||||
@ -316,9 +451,9 @@ vector_builder<T, Derived>::stepped_sequence_p (unsigned int start,
|
||||
/* Try to change the number of encoded patterns to NPATTERNS, returning
|
||||
true on success. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
bool
|
||||
vector_builder<T, Derived>::try_npatterns (unsigned int npatterns)
|
||||
vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns)
|
||||
{
|
||||
if (m_nelts_per_pattern == 1)
|
||||
{
|
||||
@ -369,9 +504,9 @@ vector_builder<T, Derived>::try_npatterns (unsigned int npatterns)
|
||||
|
||||
/* Replace the current encoding with the canonical form. */
|
||||
|
||||
template<typename T, typename Derived>
|
||||
template<typename T, typename Shape, typename Derived>
|
||||
void
|
||||
vector_builder<T, Derived>::finalize ()
|
||||
vector_builder<T, Shape, Derived>::finalize ()
|
||||
{
|
||||
/* The encoding requires the same number of elements to come from each
|
||||
pattern. */
|
||||
|
Loading…
Reference in New Issue
Block a user