re PR rtl-optimization/33721 ([meta-bug] Gcc can't properly align stack variable)
PR rtl-opt/33721 * explow.c (allocate_dynamic_stack_space): Add REQUIRED_ALIGN parm, remove TARGET parm, convert KNOWN_ALIGN parm to SIZE_ALIGN. Honor required_align, tidy the code a bit. Emit split_stack code in the right place. Mark the return value with the alignment properly. * expr.h (allocate_dynamic_stack_space): Update decl. * builtins.c (expand_builtin_apply): Update call to allocate_dynamic_stack_space. (expand_builtin_alloca): Likewise. Remove TARGET parameter. * calls.c (initialize_argument_information): Update call to allocate_dynamic_stack_space. (expand_call): Likewise. * cfgexpand.c (get_decl_align_unit): Don't limit alignment. Don't update_stack_alignment here. (alloc_stack_frame_space): Make ALIGN unsigned. (stack_var_cmp): Sort by alignment too. (partition_stack_vars): Don't merge large and small alignment vars. (expand_one_stack_var_at): Add BASE and BASE_ALIGN parameters. Take care when BASE is not virtual_stack_vars_rtx. (expand_stack_vars): Allocate dynamic stack space for large alignment variables. (expand_one_stack_var): Update all to expand_one_stack_var_at. (defer_stack_allocation): True for large alignment vars. (update_stack_alignment): Merge into ... (expand_one_var): ... here. (gimple_expand_cfg): Place code from expand_stack_vars. From-SVN: r165240
This commit is contained in:
parent
a5f4f531fe
commit
3a42502df4
@ -1,3 +1,33 @@
|
||||
2010-10-09 Richard Henderson <rth@redhat.com>
|
||||
|
||||
PR rtl-opt/33721
|
||||
* explow.c (allocate_dynamic_stack_space): Add REQUIRED_ALIGN parm,
|
||||
remove TARGET parm, convert KNOWN_ALIGN parm to SIZE_ALIGN. Honor
|
||||
required_align, tidy the code a bit. Emit split_stack code in the
|
||||
right place. Mark the return value with the alignment properly.
|
||||
* expr.h (allocate_dynamic_stack_space): Update decl.
|
||||
* builtins.c (expand_builtin_apply): Update call to
|
||||
allocate_dynamic_stack_space.
|
||||
(expand_builtin_alloca): Likewise. Remove TARGET parameter.
|
||||
* calls.c (initialize_argument_information): Update call to
|
||||
allocate_dynamic_stack_space.
|
||||
(expand_call): Likewise.
|
||||
|
||||
* cfgexpand.c (get_decl_align_unit): Don't limit alignment.
|
||||
Don't update_stack_alignment here.
|
||||
(alloc_stack_frame_space): Make ALIGN unsigned.
|
||||
(stack_var_cmp): Sort by alignment too.
|
||||
(partition_stack_vars): Don't merge large and small alignment vars.
|
||||
(expand_one_stack_var_at): Add BASE and BASE_ALIGN parameters.
|
||||
Take care when BASE is not virtual_stack_vars_rtx.
|
||||
(expand_stack_vars): Allocate dynamic stack space for large
|
||||
alignment variables.
|
||||
(expand_one_stack_var): Update all to expand_one_stack_var_at.
|
||||
(defer_stack_allocation): True for large alignment vars.
|
||||
(update_stack_alignment): Merge into ...
|
||||
(expand_one_var): ... here.
|
||||
(gimple_expand_cfg): Place code from expand_stack_vars.
|
||||
|
||||
2010-10-09 Nathan Froyd <froydnj@codesourcery.com>
|
||||
|
||||
* config/pdp11/pdp11.h (FUNCTION_ARG, FUNCTION_ARG_ADVANCE): Delete.
|
||||
|
@ -132,7 +132,7 @@ static rtx expand_builtin_memset (tree, rtx, enum machine_mode);
|
||||
static rtx expand_builtin_memset_args (tree, tree, tree, rtx, enum machine_mode, tree);
|
||||
static rtx expand_builtin_bzero (tree);
|
||||
static rtx expand_builtin_strlen (tree, rtx, enum machine_mode);
|
||||
static rtx expand_builtin_alloca (tree, rtx, bool);
|
||||
static rtx expand_builtin_alloca (tree, bool);
|
||||
static rtx expand_builtin_unop (enum machine_mode, tree, rtx, rtx, optab);
|
||||
static rtx expand_builtin_frame_address (tree, tree);
|
||||
static tree stabilize_va_list_loc (location_t, tree, int);
|
||||
@ -1572,7 +1572,7 @@ expand_builtin_apply (rtx function, rtx arguments, rtx argsize)
|
||||
arguments to the outgoing arguments address. We can pass TRUE
|
||||
as the 4th argument because we just saved the stack pointer
|
||||
and will restore it right after the call. */
|
||||
allocate_dynamic_stack_space (argsize, 0, BITS_PER_UNIT, TRUE);
|
||||
allocate_dynamic_stack_space (argsize, 0, BIGGEST_ALIGNMENT, true);
|
||||
|
||||
/* Set DRAP flag to true, even though allocate_dynamic_stack_space
|
||||
may have already set current_function_calls_alloca to true.
|
||||
@ -4931,12 +4931,11 @@ expand_builtin_frame_address (tree fndecl, tree exp)
|
||||
}
|
||||
|
||||
/* Expand EXP, a call to the alloca builtin. Return NULL_RTX if we
|
||||
failed and the caller should emit a normal call, otherwise try to
|
||||
get the result in TARGET, if convenient. CANNOT_ACCUMULATE is the
|
||||
same as for allocate_dynamic_stack_space. */
|
||||
failed and the caller should emit a normal call. CANNOT_ACCUMULATE
|
||||
is the same as for allocate_dynamic_stack_space. */
|
||||
|
||||
static rtx
|
||||
expand_builtin_alloca (tree exp, rtx target, bool cannot_accumulate)
|
||||
expand_builtin_alloca (tree exp, bool cannot_accumulate)
|
||||
{
|
||||
rtx op0;
|
||||
rtx result;
|
||||
@ -4952,7 +4951,7 @@ expand_builtin_alloca (tree exp, rtx target, bool cannot_accumulate)
|
||||
op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
|
||||
|
||||
/* Allocate the desired space. */
|
||||
result = allocate_dynamic_stack_space (op0, target, BITS_PER_UNIT,
|
||||
result = allocate_dynamic_stack_space (op0, 0, BIGGEST_ALIGNMENT,
|
||||
cannot_accumulate);
|
||||
result = convert_memory_address (ptr_mode, result);
|
||||
|
||||
@ -5997,7 +5996,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
|
||||
case BUILT_IN_ALLOCA:
|
||||
/* If the allocation stems from the declaration of a variable-sized
|
||||
object, it cannot accumulate. */
|
||||
target = expand_builtin_alloca (exp, target, ALLOCA_FOR_VAR_P (exp));
|
||||
target = expand_builtin_alloca (exp, ALLOCA_FOR_VAR_P (exp));
|
||||
if (target)
|
||||
return target;
|
||||
break;
|
||||
|
13
gcc/calls.c
13
gcc/calls.c
@ -1100,10 +1100,11 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
|
||||
/* We can pass TRUE as the 4th argument because we just
|
||||
saved the stack pointer and will restore it right after
|
||||
the call. */
|
||||
copy = gen_rtx_MEM (BLKmode,
|
||||
allocate_dynamic_stack_space
|
||||
(size_rtx, NULL_RTX,
|
||||
TYPE_ALIGN (type), TRUE));
|
||||
copy = allocate_dynamic_stack_space (size_rtx,
|
||||
TYPE_ALIGN (type),
|
||||
TYPE_ALIGN (type),
|
||||
true);
|
||||
copy = gen_rtx_MEM (BLKmode, copy);
|
||||
set_mem_attributes (copy, type, 1);
|
||||
}
|
||||
else
|
||||
@ -2664,8 +2665,8 @@ expand_call (tree exp, rtx target, int ignore)
|
||||
/* We can pass TRUE as the 4th argument because we just
|
||||
saved the stack pointer and will restore it right after
|
||||
the call. */
|
||||
allocate_dynamic_stack_space (push_size, NULL_RTX,
|
||||
BITS_PER_UNIT, TRUE);
|
||||
allocate_dynamic_stack_space (push_size, 0,
|
||||
BIGGEST_ALIGNMENT, true);
|
||||
}
|
||||
|
||||
/* If argument evaluation might modify the stack pointer,
|
||||
|
267
gcc/cfgexpand.c
267
gcc/cfgexpand.c
@ -205,43 +205,13 @@ static bool has_protected_decls;
|
||||
smaller than our cutoff threshold. Used for -Wstack-protector. */
|
||||
static bool has_short_buffer;
|
||||
|
||||
/* Update stack alignment requirement. */
|
||||
|
||||
static void
|
||||
update_stack_alignment (unsigned int align)
|
||||
{
|
||||
if (SUPPORTS_STACK_ALIGNMENT)
|
||||
{
|
||||
if (crtl->stack_alignment_estimated < align)
|
||||
{
|
||||
gcc_assert(!crtl->stack_realign_processed);
|
||||
crtl->stack_alignment_estimated = align;
|
||||
}
|
||||
}
|
||||
|
||||
/* stack_alignment_needed > PREFERRED_STACK_BOUNDARY is permitted.
|
||||
So here we only make sure stack_alignment_needed >= align. */
|
||||
if (crtl->stack_alignment_needed < align)
|
||||
crtl->stack_alignment_needed = align;
|
||||
if (crtl->max_used_stack_slot_alignment < align)
|
||||
crtl->max_used_stack_slot_alignment = align;
|
||||
}
|
||||
|
||||
/* Discover the byte alignment to use for DECL. Ignore alignment
|
||||
we can't do with expected alignment of the stack boundary. */
|
||||
|
||||
static unsigned int
|
||||
get_decl_align_unit (tree decl)
|
||||
{
|
||||
unsigned int align;
|
||||
|
||||
align = LOCAL_DECL_ALIGNMENT (decl);
|
||||
|
||||
if (align > MAX_SUPPORTED_STACK_ALIGNMENT)
|
||||
align = MAX_SUPPORTED_STACK_ALIGNMENT;
|
||||
|
||||
update_stack_alignment (align);
|
||||
|
||||
unsigned int align = LOCAL_DECL_ALIGNMENT (decl);
|
||||
return align / BITS_PER_UNIT;
|
||||
}
|
||||
|
||||
@ -249,7 +219,7 @@ get_decl_align_unit (tree decl)
|
||||
Return the frame offset. */
|
||||
|
||||
static HOST_WIDE_INT
|
||||
alloc_stack_frame_space (HOST_WIDE_INT size, HOST_WIDE_INT align)
|
||||
alloc_stack_frame_space (HOST_WIDE_INT size, unsigned HOST_WIDE_INT align)
|
||||
{
|
||||
HOST_WIDE_INT offset, new_frame_offset;
|
||||
|
||||
@ -402,26 +372,43 @@ add_alias_set_conflicts (void)
|
||||
}
|
||||
|
||||
/* A subroutine of partition_stack_vars. A comparison function for qsort,
|
||||
sorting an array of indices by the size and type of the object. */
|
||||
sorting an array of indices by the properties of the object. */
|
||||
|
||||
static int
|
||||
stack_var_size_cmp (const void *a, const void *b)
|
||||
stack_var_cmp (const void *a, const void *b)
|
||||
{
|
||||
HOST_WIDE_INT sa = stack_vars[*(const size_t *)a].size;
|
||||
HOST_WIDE_INT sb = stack_vars[*(const size_t *)b].size;
|
||||
tree decla, declb;
|
||||
size_t ia = *(const size_t *)a;
|
||||
size_t ib = *(const size_t *)b;
|
||||
unsigned int aligna = stack_vars[ia].alignb;
|
||||
unsigned int alignb = stack_vars[ib].alignb;
|
||||
HOST_WIDE_INT sizea = stack_vars[ia].size;
|
||||
HOST_WIDE_INT sizeb = stack_vars[ib].size;
|
||||
tree decla = stack_vars[ia].decl;
|
||||
tree declb = stack_vars[ib].decl;
|
||||
bool largea, largeb;
|
||||
unsigned int uida, uidb;
|
||||
|
||||
if (sa < sb)
|
||||
/* Primary compare on "large" alignment. Large comes first. */
|
||||
largea = (aligna * BITS_PER_UNIT > MAX_SUPPORTED_STACK_ALIGNMENT);
|
||||
largeb = (alignb * BITS_PER_UNIT > MAX_SUPPORTED_STACK_ALIGNMENT);
|
||||
if (largea != largeb)
|
||||
return (int)largeb - (int)largea;
|
||||
|
||||
/* Secondary compare on size, decreasing */
|
||||
if (sizea < sizeb)
|
||||
return -1;
|
||||
if (sa > sb)
|
||||
if (sizea > sizeb)
|
||||
return 1;
|
||||
decla = stack_vars[*(const size_t *)a].decl;
|
||||
declb = stack_vars[*(const size_t *)b].decl;
|
||||
/* For stack variables of the same size use and id of the decls
|
||||
to make the sort stable. Two SSA names are compared by their
|
||||
version, SSA names come before non-SSA names, and two normal
|
||||
decls are compared by their DECL_UID. */
|
||||
|
||||
/* Tertiary compare on true alignment, decreasing. */
|
||||
if (aligna < alignb)
|
||||
return -1;
|
||||
if (aligna > alignb)
|
||||
return 1;
|
||||
|
||||
/* Final compare on ID for sort stability, increasing.
|
||||
Two SSA names are compared by their version, SSA names come before
|
||||
non-SSA names, and two normal decls are compared by their DECL_UID. */
|
||||
if (TREE_CODE (decla) == SSA_NAME)
|
||||
{
|
||||
if (TREE_CODE (declb) == SSA_NAME)
|
||||
@ -434,9 +421,9 @@ stack_var_size_cmp (const void *a, const void *b)
|
||||
else
|
||||
uida = DECL_UID (decla), uidb = DECL_UID (declb);
|
||||
if (uida < uidb)
|
||||
return -1;
|
||||
if (uida > uidb)
|
||||
return 1;
|
||||
if (uida > uidb)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -634,12 +621,13 @@ partition_stack_vars (void)
|
||||
if (n == 1)
|
||||
return;
|
||||
|
||||
qsort (stack_vars_sorted, n, sizeof (size_t), stack_var_size_cmp);
|
||||
qsort (stack_vars_sorted, n, sizeof (size_t), stack_var_cmp);
|
||||
|
||||
for (si = 0; si < n; ++si)
|
||||
{
|
||||
size_t i = stack_vars_sorted[si];
|
||||
HOST_WIDE_INT isize = stack_vars[i].size;
|
||||
unsigned int ialign = stack_vars[i].alignb;
|
||||
HOST_WIDE_INT offset = 0;
|
||||
|
||||
for (sj = si; sj-- > 0; )
|
||||
@ -660,6 +648,12 @@ partition_stack_vars (void)
|
||||
if (stack_var_conflict_p (i, j))
|
||||
continue;
|
||||
|
||||
/* Do not mix objects of "small" (supported) alignment
|
||||
and "large" (unsupported) alignment. */
|
||||
if ((ialign * BITS_PER_UNIT <= MAX_SUPPORTED_STACK_ALIGNMENT)
|
||||
!= (jalign * BITS_PER_UNIT <= MAX_SUPPORTED_STACK_ALIGNMENT))
|
||||
continue;
|
||||
|
||||
/* Refine the remaining space check to include alignment. */
|
||||
if (offset & (jalign - 1))
|
||||
{
|
||||
@ -715,19 +709,19 @@ dump_stack_var_partition (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* Assign rtl to DECL at frame offset OFFSET. */
|
||||
/* Assign rtl to DECL at BASE + OFFSET. */
|
||||
|
||||
static void
|
||||
expand_one_stack_var_at (tree decl, HOST_WIDE_INT offset)
|
||||
expand_one_stack_var_at (tree decl, rtx base, unsigned base_align,
|
||||
HOST_WIDE_INT offset)
|
||||
{
|
||||
/* Alignment is unsigned. */
|
||||
unsigned HOST_WIDE_INT align, max_align;
|
||||
unsigned align;
|
||||
rtx x;
|
||||
|
||||
/* If this fails, we've overflowed the stack frame. Error nicely? */
|
||||
gcc_assert (offset == trunc_int_for_mode (offset, Pmode));
|
||||
|
||||
x = plus_constant (virtual_stack_vars_rtx, offset);
|
||||
x = plus_constant (base, offset);
|
||||
x = gen_rtx_MEM (DECL_MODE (SSAVAR (decl)), x);
|
||||
|
||||
if (TREE_CODE (decl) != SSA_NAME)
|
||||
@ -735,12 +729,16 @@ expand_one_stack_var_at (tree decl, HOST_WIDE_INT offset)
|
||||
/* Set alignment we actually gave this decl if it isn't an SSA name.
|
||||
If it is we generate stack slots only accidentally so it isn't as
|
||||
important, we'll simply use the alignment that is already set. */
|
||||
offset -= frame_phase;
|
||||
if (base == virtual_stack_vars_rtx)
|
||||
offset -= frame_phase;
|
||||
align = offset & -offset;
|
||||
align *= BITS_PER_UNIT;
|
||||
max_align = crtl->max_used_stack_slot_alignment;
|
||||
if (align == 0 || align > max_align)
|
||||
align = max_align;
|
||||
if (align == 0 || align > base_align)
|
||||
align = base_align;
|
||||
|
||||
/* One would think that we could assert that we're not decreasing
|
||||
alignment here, but (at least) the i386 port does exactly this
|
||||
via the MINIMUM_ALIGNMENT hook. */
|
||||
|
||||
DECL_ALIGN (decl) = align;
|
||||
DECL_USER_ALIGN (decl) = 0;
|
||||
@ -758,9 +756,56 @@ static void
|
||||
expand_stack_vars (bool (*pred) (tree))
|
||||
{
|
||||
size_t si, i, j, n = stack_vars_num;
|
||||
HOST_WIDE_INT large_size = 0, large_alloc = 0;
|
||||
rtx large_base = NULL;
|
||||
unsigned large_align = 0;
|
||||
tree decl;
|
||||
|
||||
/* Determine if there are any variables requiring "large" alignment.
|
||||
Since these are dynamically allocated, we only process these if
|
||||
no predicate involved. */
|
||||
large_align = stack_vars[stack_vars_sorted[0]].alignb * BITS_PER_UNIT;
|
||||
if (pred == NULL && large_align > MAX_SUPPORTED_STACK_ALIGNMENT)
|
||||
{
|
||||
/* Find the total size of these variables. */
|
||||
for (si = 0; si < n; ++si)
|
||||
{
|
||||
unsigned alignb;
|
||||
|
||||
i = stack_vars_sorted[si];
|
||||
alignb = stack_vars[i].alignb;
|
||||
|
||||
/* Stop when we get to the first decl with "small" alignment. */
|
||||
if (alignb * BITS_PER_UNIT <= MAX_SUPPORTED_STACK_ALIGNMENT)
|
||||
break;
|
||||
|
||||
/* Skip variables that aren't partition representatives. */
|
||||
if (stack_vars[i].representative != i)
|
||||
continue;
|
||||
|
||||
/* Skip variables that have already had rtl assigned. See also
|
||||
add_stack_var where we perpetrate this pc_rtx hack. */
|
||||
decl = stack_vars[i].decl;
|
||||
if ((TREE_CODE (decl) == SSA_NAME
|
||||
? SA.partition_to_pseudo[var_to_partition (SA.map, decl)]
|
||||
: DECL_RTL (decl)) != pc_rtx)
|
||||
continue;
|
||||
|
||||
large_size += alignb - 1;
|
||||
large_size &= -(HOST_WIDE_INT)alignb;
|
||||
large_size += stack_vars[i].size;
|
||||
}
|
||||
|
||||
/* If there were any, allocate space. */
|
||||
if (large_size > 0)
|
||||
large_base = allocate_dynamic_stack_space (GEN_INT (large_size), 0,
|
||||
large_align, true);
|
||||
}
|
||||
|
||||
for (si = 0; si < n; ++si)
|
||||
{
|
||||
rtx base;
|
||||
unsigned base_align, alignb;
|
||||
HOST_WIDE_INT offset;
|
||||
|
||||
i = stack_vars_sorted[si];
|
||||
@ -771,18 +816,38 @@ expand_stack_vars (bool (*pred) (tree))
|
||||
|
||||
/* Skip variables that have already had rtl assigned. See also
|
||||
add_stack_var where we perpetrate this pc_rtx hack. */
|
||||
if ((TREE_CODE (stack_vars[i].decl) == SSA_NAME
|
||||
? SA.partition_to_pseudo[var_to_partition (SA.map, stack_vars[i].decl)]
|
||||
: DECL_RTL (stack_vars[i].decl)) != pc_rtx)
|
||||
decl = stack_vars[i].decl;
|
||||
if ((TREE_CODE (decl) == SSA_NAME
|
||||
? SA.partition_to_pseudo[var_to_partition (SA.map, decl)]
|
||||
: DECL_RTL (decl)) != pc_rtx)
|
||||
continue;
|
||||
|
||||
/* Check the predicate to see whether this variable should be
|
||||
allocated in this pass. */
|
||||
if (pred && !pred (stack_vars[i].decl))
|
||||
if (pred && !pred (decl))
|
||||
continue;
|
||||
|
||||
offset = alloc_stack_frame_space (stack_vars[i].size,
|
||||
stack_vars[i].alignb);
|
||||
alignb = stack_vars[i].alignb;
|
||||
if (alignb * BITS_PER_UNIT <= MAX_SUPPORTED_STACK_ALIGNMENT)
|
||||
{
|
||||
offset = alloc_stack_frame_space (stack_vars[i].size, alignb);
|
||||
base = virtual_stack_vars_rtx;
|
||||
base_align = crtl->max_used_stack_slot_alignment;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Large alignment is only processed in the last pass. */
|
||||
if (pred)
|
||||
continue;
|
||||
|
||||
large_alloc += alignb - 1;
|
||||
large_alloc &= -(HOST_WIDE_INT)alignb;
|
||||
offset = large_alloc;
|
||||
large_alloc += stack_vars[i].size;
|
||||
|
||||
base = large_base;
|
||||
base_align = large_align;
|
||||
}
|
||||
|
||||
/* Create rtl for each variable based on their location within the
|
||||
partition. */
|
||||
@ -790,9 +855,12 @@ expand_stack_vars (bool (*pred) (tree))
|
||||
{
|
||||
gcc_assert (stack_vars[j].offset <= stack_vars[i].size);
|
||||
expand_one_stack_var_at (stack_vars[j].decl,
|
||||
base, base_align,
|
||||
stack_vars[j].offset + offset);
|
||||
}
|
||||
}
|
||||
|
||||
gcc_assert (large_alloc == large_size);
|
||||
}
|
||||
|
||||
/* Take into account all sizes of partitions and reset DECL_RTLs. */
|
||||
@ -823,13 +891,19 @@ account_stack_vars (void)
|
||||
static void
|
||||
expand_one_stack_var (tree var)
|
||||
{
|
||||
HOST_WIDE_INT size, offset, align;
|
||||
HOST_WIDE_INT size, offset;
|
||||
unsigned byte_align;
|
||||
|
||||
size = tree_low_cst (DECL_SIZE_UNIT (SSAVAR (var)), 1);
|
||||
align = get_decl_align_unit (SSAVAR (var));
|
||||
offset = alloc_stack_frame_space (size, align);
|
||||
byte_align = get_decl_align_unit (SSAVAR (var));
|
||||
|
||||
expand_one_stack_var_at (var, offset);
|
||||
/* We handle highly aligned variables in expand_stack_vars. */
|
||||
gcc_assert (byte_align * BITS_PER_UNIT <= MAX_SUPPORTED_STACK_ALIGNMENT);
|
||||
|
||||
offset = alloc_stack_frame_space (size, byte_align);
|
||||
|
||||
expand_one_stack_var_at (var, virtual_stack_vars_rtx,
|
||||
crtl->max_used_stack_slot_alignment, offset);
|
||||
}
|
||||
|
||||
/* A subroutine of expand_one_var. Called to assign rtl to a VAR_DECL
|
||||
@ -898,6 +972,11 @@ defer_stack_allocation (tree var, bool toplevel)
|
||||
if (flag_stack_protect)
|
||||
return true;
|
||||
|
||||
/* We handle "large" alignment via dynamic allocation. We want to handle
|
||||
this extra complication in only one place, so defer them. */
|
||||
if (DECL_ALIGN (var) > MAX_SUPPORTED_STACK_ALIGNMENT)
|
||||
return true;
|
||||
|
||||
/* Variables in the outermost scope automatically conflict with
|
||||
every other variable. The only reason to want to defer them
|
||||
at all is that, after sorting, we can more efficiently pack
|
||||
@ -927,15 +1006,13 @@ defer_stack_allocation (tree var, bool toplevel)
|
||||
static HOST_WIDE_INT
|
||||
expand_one_var (tree var, bool toplevel, bool really_expand)
|
||||
{
|
||||
unsigned int align = BITS_PER_UNIT;
|
||||
tree origvar = var;
|
||||
|
||||
var = SSAVAR (var);
|
||||
|
||||
if (SUPPORTS_STACK_ALIGNMENT
|
||||
&& TREE_TYPE (var) != error_mark_node
|
||||
&& TREE_CODE (var) == VAR_DECL)
|
||||
if (TREE_TYPE (var) != error_mark_node && TREE_CODE (var) == VAR_DECL)
|
||||
{
|
||||
unsigned int align;
|
||||
|
||||
/* Because we don't know if VAR will be in register or on stack,
|
||||
we conservatively assume it will be on stack even if VAR is
|
||||
eventually put into register after RA pass. For non-automatic
|
||||
@ -955,15 +1032,28 @@ expand_one_var (tree var, bool toplevel, bool really_expand)
|
||||
else
|
||||
align = MINIMUM_ALIGNMENT (var, DECL_MODE (var), DECL_ALIGN (var));
|
||||
|
||||
if (crtl->stack_alignment_estimated < align)
|
||||
{
|
||||
/* stack_alignment_estimated shouldn't change after stack
|
||||
realign decision made */
|
||||
gcc_assert(!crtl->stack_realign_processed);
|
||||
crtl->stack_alignment_estimated = align;
|
||||
}
|
||||
/* If the variable alignment is very large we'll dynamicaly allocate
|
||||
it, which means that in-frame portion is just a pointer. */
|
||||
if (align > MAX_SUPPORTED_STACK_ALIGNMENT)
|
||||
align = POINTER_SIZE;
|
||||
}
|
||||
|
||||
if (SUPPORTS_STACK_ALIGNMENT
|
||||
&& crtl->stack_alignment_estimated < align)
|
||||
{
|
||||
/* stack_alignment_estimated shouldn't change after stack
|
||||
realign decision made */
|
||||
gcc_assert(!crtl->stack_realign_processed);
|
||||
crtl->stack_alignment_estimated = align;
|
||||
}
|
||||
|
||||
/* stack_alignment_needed > PREFERRED_STACK_BOUNDARY is permitted.
|
||||
So here we only make sure stack_alignment_needed >= align. */
|
||||
if (crtl->stack_alignment_needed < align)
|
||||
crtl->stack_alignment_needed = align;
|
||||
if (crtl->max_used_stack_slot_alignment < align)
|
||||
crtl->max_used_stack_slot_alignment = align;
|
||||
|
||||
if (TREE_CODE (origvar) == SSA_NAME)
|
||||
{
|
||||
gcc_assert (TREE_CODE (var) != VAR_DECL
|
||||
@ -3787,6 +3877,7 @@ gimple_expand_cfg (void)
|
||||
sbitmap blocks;
|
||||
edge_iterator ei;
|
||||
edge e;
|
||||
rtx var_seq;
|
||||
unsigned i;
|
||||
|
||||
timevar_push (TV_OUT_OF_SSA);
|
||||
@ -3832,10 +3923,14 @@ gimple_expand_cfg (void)
|
||||
crtl->preferred_stack_boundary = STACK_BOUNDARY;
|
||||
cfun->cfg->max_jumptable_ents = 0;
|
||||
|
||||
|
||||
/* Expand the variables recorded during gimple lowering. */
|
||||
timevar_push (TV_VAR_EXPAND);
|
||||
start_sequence ();
|
||||
|
||||
expand_used_vars ();
|
||||
|
||||
var_seq = get_insns ();
|
||||
end_sequence ();
|
||||
timevar_pop (TV_VAR_EXPAND);
|
||||
|
||||
/* Honor stack protection warnings. */
|
||||
@ -3855,6 +3950,18 @@ gimple_expand_cfg (void)
|
||||
/* Set up parameters and prepare for return, for the function. */
|
||||
expand_function_start (current_function_decl);
|
||||
|
||||
/* If we emitted any instructions for setting up the variables,
|
||||
emit them before the FUNCTION_START note. */
|
||||
if (var_seq)
|
||||
{
|
||||
emit_insn_before (var_seq, parm_birth_insn);
|
||||
|
||||
/* In expand_function_end we'll insert the alloca save/restore
|
||||
before parm_birth_insn. We've just insertted an alloca call.
|
||||
Adjust the pointer to match. */
|
||||
parm_birth_insn = var_seq;
|
||||
}
|
||||
|
||||
/* Now that we also have the parameter RTXs, copy them over to our
|
||||
partitions. */
|
||||
for (i = 0; i < SA.map->num_partitions; i++)
|
||||
|
125
gcc/explow.c
125
gcc/explow.c
@ -1123,15 +1123,18 @@ update_nonlocal_goto_save_area (void)
|
||||
}
|
||||
|
||||
/* Return an rtx representing the address of an area of memory dynamically
|
||||
pushed on the stack. This region of memory is always aligned to
|
||||
a multiple of BIGGEST_ALIGNMENT.
|
||||
pushed on the stack.
|
||||
|
||||
Any required stack pointer alignment is preserved.
|
||||
|
||||
SIZE is an rtx representing the size of the area.
|
||||
TARGET is a place in which the address can be placed.
|
||||
|
||||
KNOWN_ALIGN is the alignment (in bits) that we know SIZE has.
|
||||
SIZE_ALIGN is the alignment (in bits) that we know SIZE has. This
|
||||
parameter may be zero. If so, a proper value will be extracted
|
||||
from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed.
|
||||
|
||||
REQUIRED_ALIGN is the alignment (in bits) required for the region
|
||||
of memory.
|
||||
|
||||
If CANNOT_ACCUMULATE is set to TRUE, the caller guarantees that the
|
||||
stack space allocated by the generated code cannot be added with itself
|
||||
@ -1141,12 +1144,12 @@ update_nonlocal_goto_save_area (void)
|
||||
loops to it executes the associated deallocation code. */
|
||||
|
||||
rtx
|
||||
allocate_dynamic_stack_space (rtx size, rtx target, int known_align,
|
||||
bool cannot_accumulate)
|
||||
allocate_dynamic_stack_space (rtx size, unsigned size_align,
|
||||
unsigned required_align, bool cannot_accumulate)
|
||||
{
|
||||
HOST_WIDE_INT stack_usage_size = -1;
|
||||
bool known_align_valid = true;
|
||||
rtx final_label, final_target;
|
||||
rtx final_label, final_target, target;
|
||||
bool must_align;
|
||||
|
||||
/* If we're asking for zero bytes, it doesn't matter what we point
|
||||
to since we can't dereference it. But return a reasonable
|
||||
@ -1192,6 +1195,23 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align,
|
||||
if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
|
||||
size = convert_to_mode (Pmode, size, 1);
|
||||
|
||||
/* Adjust SIZE_ALIGN, if needed. */
|
||||
if (CONST_INT_P (size))
|
||||
{
|
||||
unsigned HOST_WIDE_INT lsb;
|
||||
|
||||
lsb = INTVAL (size);
|
||||
lsb &= -lsb;
|
||||
|
||||
/* Watch out for overflow truncating to "unsigned". */
|
||||
if (lsb > UINT_MAX / BITS_PER_UNIT)
|
||||
size_align = 1u << (HOST_BITS_PER_INT - 1);
|
||||
else
|
||||
size_align = (unsigned)lsb * BITS_PER_UNIT;
|
||||
}
|
||||
else if (size_align < BITS_PER_UNIT)
|
||||
size_align = BITS_PER_UNIT;
|
||||
|
||||
/* We can't attempt to minimize alignment necessary, because we don't
|
||||
know the final value of preferred_stack_boundary yet while executing
|
||||
this code. */
|
||||
@ -1199,35 +1219,43 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align,
|
||||
crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
|
||||
|
||||
/* We will need to ensure that the address we return is aligned to
|
||||
BIGGEST_ALIGNMENT. If STACK_DYNAMIC_OFFSET is defined, we don't
|
||||
REQUIRED_ALIGN. If STACK_DYNAMIC_OFFSET is defined, we don't
|
||||
always know its final value at this point in the compilation (it
|
||||
might depend on the size of the outgoing parameter lists, for
|
||||
example), so we must align the value to be returned in that case.
|
||||
(Note that STACK_DYNAMIC_OFFSET will have a default nonzero value if
|
||||
STACK_POINTER_OFFSET or ACCUMULATE_OUTGOING_ARGS are defined).
|
||||
We must also do an alignment operation on the returned value if
|
||||
the stack pointer alignment is less strict that BIGGEST_ALIGNMENT.
|
||||
the stack pointer alignment is less strict than REQUIRED_ALIGN.
|
||||
|
||||
If we have to align, we must leave space in SIZE for the hole
|
||||
that might result from the alignment operation. */
|
||||
|
||||
must_align = (crtl->preferred_stack_boundary < required_align);
|
||||
#if defined (STACK_DYNAMIC_OFFSET) || defined (STACK_POINTER_OFFSET)
|
||||
#define MUST_ALIGN 1
|
||||
#else
|
||||
#define MUST_ALIGN (crtl->preferred_stack_boundary < BIGGEST_ALIGNMENT)
|
||||
must_align = true;
|
||||
#endif
|
||||
|
||||
if (MUST_ALIGN)
|
||||
if (must_align)
|
||||
{
|
||||
size
|
||||
= force_operand (plus_constant (size,
|
||||
BIGGEST_ALIGNMENT / BITS_PER_UNIT - 1),
|
||||
NULL_RTX);
|
||||
unsigned extra, extra_align;
|
||||
|
||||
if (required_align > PREFERRED_STACK_BOUNDARY)
|
||||
extra_align = PREFERRED_STACK_BOUNDARY;
|
||||
else if (required_align > STACK_BOUNDARY)
|
||||
extra_align = STACK_BOUNDARY;
|
||||
else
|
||||
extra_align = BITS_PER_UNIT;
|
||||
extra = (required_align - extra_align) / BITS_PER_UNIT;
|
||||
|
||||
size = plus_constant (size, extra);
|
||||
size = force_operand (size, NULL_RTX);
|
||||
|
||||
if (flag_stack_usage)
|
||||
stack_usage_size += BIGGEST_ALIGNMENT / BITS_PER_UNIT - 1;
|
||||
stack_usage_size += extra;
|
||||
|
||||
known_align_valid = false;
|
||||
if (extra && size_align > extra_align)
|
||||
size_align = extra_align;
|
||||
}
|
||||
|
||||
#ifdef SETJMP_VIA_SAVE_AREA
|
||||
@ -1257,7 +1285,8 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align,
|
||||
if (flag_stack_usage)
|
||||
current_function_dynamic_alloc_count++;
|
||||
|
||||
known_align_valid = false;
|
||||
/* ??? Can we infer a minimum of STACK_BOUNDARY here? */
|
||||
size_align = BITS_PER_UNIT;
|
||||
}
|
||||
#endif /* SETJMP_VIA_SAVE_AREA */
|
||||
|
||||
@ -1274,7 +1303,7 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align,
|
||||
insns. Since this is an extremely rare event, we have no reliable
|
||||
way of knowing which systems have this problem. So we avoid even
|
||||
momentarily mis-aligning the stack. */
|
||||
if (!known_align_valid || known_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0)
|
||||
if (size_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0)
|
||||
{
|
||||
size = round_push (size);
|
||||
|
||||
@ -1285,13 +1314,7 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align,
|
||||
}
|
||||
}
|
||||
|
||||
/* Don't use a TARGET that isn't a pseudo or is the wrong mode. */
|
||||
if (target == 0 || !REG_P (target)
|
||||
|| REGNO (target) < FIRST_PSEUDO_REGISTER
|
||||
|| GET_MODE (target) != Pmode)
|
||||
target = gen_reg_rtx (Pmode);
|
||||
|
||||
mark_reg_pointer (target, known_align);
|
||||
target = gen_reg_rtx (Pmode);
|
||||
|
||||
/* The size is supposed to be fully adjusted at this point so record it
|
||||
if stack usage info is requested. */
|
||||
@ -1341,7 +1364,6 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align,
|
||||
return space;
|
||||
|
||||
final_target = gen_reg_rtx (Pmode);
|
||||
mark_reg_pointer (final_target, known_align);
|
||||
|
||||
emit_move_insn (final_target, space);
|
||||
|
||||
@ -1440,26 +1462,6 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align,
|
||||
#endif
|
||||
}
|
||||
|
||||
if (MUST_ALIGN)
|
||||
{
|
||||
/* CEIL_DIV_EXPR needs to worry about the addition overflowing,
|
||||
but we know it can't. So add ourselves and then do
|
||||
TRUNC_DIV_EXPR. */
|
||||
target = expand_binop (Pmode, add_optab, target,
|
||||
GEN_INT (BIGGEST_ALIGNMENT / BITS_PER_UNIT - 1),
|
||||
NULL_RTX, 1, OPTAB_LIB_WIDEN);
|
||||
target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
|
||||
GEN_INT (BIGGEST_ALIGNMENT / BITS_PER_UNIT),
|
||||
NULL_RTX, 1);
|
||||
target = expand_mult (Pmode, target,
|
||||
GEN_INT (BIGGEST_ALIGNMENT / BITS_PER_UNIT),
|
||||
NULL_RTX, 1);
|
||||
}
|
||||
|
||||
/* Record the new stack level for nonlocal gotos. */
|
||||
if (cfun->nonlocal_goto_save_area != 0)
|
||||
update_nonlocal_goto_save_area ();
|
||||
|
||||
/* Finish up the split stack handling. */
|
||||
if (final_label != NULL_RTX)
|
||||
{
|
||||
@ -1469,6 +1471,29 @@ allocate_dynamic_stack_space (rtx size, rtx target, int known_align,
|
||||
target = final_target;
|
||||
}
|
||||
|
||||
if (must_align)
|
||||
{
|
||||
/* CEIL_DIV_EXPR needs to worry about the addition overflowing,
|
||||
but we know it can't. So add ourselves and then do
|
||||
TRUNC_DIV_EXPR. */
|
||||
target = expand_binop (Pmode, add_optab, target,
|
||||
GEN_INT (required_align / BITS_PER_UNIT - 1),
|
||||
NULL_RTX, 1, OPTAB_LIB_WIDEN);
|
||||
target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
|
||||
GEN_INT (required_align / BITS_PER_UNIT),
|
||||
NULL_RTX, 1);
|
||||
target = expand_mult (Pmode, target,
|
||||
GEN_INT (required_align / BITS_PER_UNIT),
|
||||
NULL_RTX, 1);
|
||||
}
|
||||
|
||||
/* Now that we've committed to a return value, mark its alignment. */
|
||||
mark_reg_pointer (target, required_align);
|
||||
|
||||
/* Record the new stack level for nonlocal gotos. */
|
||||
if (cfun->nonlocal_goto_save_area != 0)
|
||||
update_nonlocal_goto_save_area ();
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
|
@ -642,7 +642,7 @@ extern void emit_stack_restore (enum save_level, rtx, rtx);
|
||||
extern void update_nonlocal_goto_save_area (void);
|
||||
|
||||
/* Allocate some space on the stack dynamically and return its address. */
|
||||
extern rtx allocate_dynamic_stack_space (rtx, rtx, int, bool);
|
||||
extern rtx allocate_dynamic_stack_space (rtx, unsigned, unsigned, bool);
|
||||
|
||||
/* Emit one stack probe at ADDRESS, an address within the stack. */
|
||||
extern void emit_stack_probe (rtx);
|
||||
|
@ -1,3 +1,8 @@
|
||||
2010-10-09 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* lib/target-supports.exp
|
||||
(check_effective_target_automatic_stack_alignment): Always true.
|
||||
|
||||
2010-10-09 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR lto/45956
|
||||
|
@ -3551,12 +3551,8 @@ proc check_effective_target_4byte_wchar_t { } {
|
||||
# Return 1 if the target supports automatic stack alignment.
|
||||
|
||||
proc check_effective_target_automatic_stack_alignment { } {
|
||||
if { [istarget i?86*-*-*]
|
||||
|| [istarget x86_64-*-*] } then {
|
||||
return 1
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
# Not "stack alignment" per se, but proper stack alignment of decls.
|
||||
return 1;
|
||||
}
|
||||
|
||||
# Return 1 if avx instructions can be compiled.
|
||||
|
Loading…
Reference in New Issue
Block a user