analyzer: fix ICE in print_mem_ref [PR98969]

PR analyzer/98969 and PR analyzer/99064 describes ICEs, in both cases
within print_mem_ref, when falsely reporting memory leaks - though it
is possible to generate the ICE on other diagnostics (which I added
in one of the test cases).

This patch fixes the ICE, leaving the fix for the leak false positives
as followup work.

The analyzer uses region_model::get_representative_path_var and
region_model::get_representative_tree to map back from its svalue
and region classes to the tree type used by the rest of the compiler,
and, in particular, for diagnostics.

The root cause of the ICE is sloppiness about types within those
functions; specifically when casts were stripped off svalues.  To
track these down I added wrapper functions that verify that the
types of the results are correct, and in doing so found various
other type-safety issues, which the patch also fixes.

Doing so led to various changes in diagnostics messages due to
more accurate types, but I felt that these changes weren't
desirable.
For example, the warning at CVE-2005-1689-minimal.c line 48
which expects:
  double-'free' of 'inbuf.data'
changed fo
  double-'free' of '(char *)inbuf.data'

So I added stripping of top-level casts where necessary to avoid
cluttering diagnostics.

Finally, the more accurate types led to worse results from
readability_comparator, where e.g. the event message at line 50
of sensitive-1.c regressed from the precise:
  passing sensitive value 'password' in call to 'called_by_test_5' from 'test_5'
to the vaguer:
  calling 'called_by_test_5' from 'test_5'
This was due to erroneously picking the initial value of "password"
in the caller frame as the best value within the *callee* frame, due to
"char *" vs "const char *", which confuses the logic for tracking values
that pass along callgraph edges.  The patch fixes this by combining the
readability tests for tree and stack depth, rather than performing
them in sequence, so that it favors the value in the deepest frame.

As noted above, the patch fixes the ICEs, but does not fix the
leak false positives.

gcc/analyzer/ChangeLog:
	PR analyzer/98969
	* engine.cc (readability): Add names for the various arbitrary
	values.  Handle NOP_EXPR and INTEGER_CST.
	(readability_comparator): Combine the readability tests for
	tree and stack depth, rather than performing them sequentially.
	(impl_region_model_context::on_state_leak): Strip off top-level
	casts.
	* region-model.cc (region_model::get_representative_path_var): Add
	type-checking, moving the bulk of the implementation to...
	(region_model::get_representative_path_var_1): ...here.  Respect
	types in casts by recursing and re-adding the cast, rather than
	merely stripping them off.  Use the correct type when handling
	region_svalue.
	(region_model::get_representative_tree): Strip off any top-level
	cast.
	(region_model::get_representative_path_var): Add type-checking,
	moving the bulk of the implementation to...
	(region_model::get_representative_path_var_1): ...here.
	* region-model.h (region_model::get_representative_path_var_1):
	New decl
	(region_model::get_representative_path_var_1): New decl.
	* store.cc (append_pathvar_with_type): New.
	(binding_cluster::get_representative_path_vars): Cast path_vars
	to the correct type when adding them to *OUT_PVS.

gcc/testsuite/ChangeLog:
	PR analyzer/98969
	* g++.dg/analyzer/pr99064.C: New test.
	* gcc.dg/analyzer/pr98969.c: New test.
This commit is contained in:
David Malcolm 2021-02-11 20:31:28 -05:00
parent 0c5cdb31bd
commit 467a482052
6 changed files with 200 additions and 24 deletions

View File

@ -456,6 +456,9 @@ private:
static int
readability (const_tree expr)
{
/* Arbitrarily-chosen "high readability" value. */
const int HIGH_READABILITY = 65536;
gcc_assert (expr);
switch (TREE_CODE (expr))
{
@ -479,8 +482,7 @@ readability (const_tree expr)
case PARM_DECL:
case VAR_DECL:
if (DECL_NAME (expr))
/* Arbitrarily-chosen "high readability" value. */
return 65536;
return HIGH_READABILITY;
else
/* We don't want to print temporaries. For example, the C FE
prints them as e.g. "<Uxxxx>" where "xxxx" is the low 16 bits
@ -490,7 +492,17 @@ readability (const_tree expr)
case RESULT_DECL:
/* Printing "<return-value>" isn't ideal, but is less awful than
trying to print a temporary. */
return 32768;
return HIGH_READABILITY / 2;
case NOP_EXPR:
{
/* Impose a moderate readability penalty for casts. */
const int CAST_PENALTY = 32;
return readability (TREE_OPERAND (expr, 0)) - CAST_PENALTY;
}
case INTEGER_CST:
return HIGH_READABILITY;
default:
return 0;
@ -508,14 +520,25 @@ readability_comparator (const void *p1, const void *p2)
path_var pv1 = *(path_var const *)p1;
path_var pv2 = *(path_var const *)p2;
int r1 = readability (pv1.m_tree);
int r2 = readability (pv2.m_tree);
if (int cmp = r2 - r1)
return cmp;
const int tree_r1 = readability (pv1.m_tree);
const int tree_r2 = readability (pv2.m_tree);
/* Favor items that are deeper on the stack and hence more recent;
this also favors locals over globals. */
if (int cmp = pv2.m_stack_depth - pv1.m_stack_depth)
const int COST_PER_FRAME = 64;
const int depth_r1 = pv1.m_stack_depth * COST_PER_FRAME;
const int depth_r2 = pv2.m_stack_depth * COST_PER_FRAME;
/* Combine the scores from the tree and from the stack depth.
This e.g. lets us have a slightly penalized cast in the most
recent stack frame "beat" an uncast value in an older stack frame. */
const int sum_r1 = tree_r1 + depth_r1;
const int sum_r2 = tree_r2 + depth_r2;
if (int cmp = sum_r2 - sum_r1)
return cmp;
/* Otherwise, more readable trees win. */
if (int cmp = tree_r2 - tree_r1)
return cmp;
/* Otherwise, if they have the same readability, then impose an
@ -580,6 +603,10 @@ impl_region_model_context::on_state_leak (const state_machine &sm,
= m_old_state->m_region_model->get_representative_path_var (sval,
&visited);
/* Strip off top-level casts */
if (leaked_pv.m_tree && TREE_CODE (leaked_pv.m_tree) == NOP_EXPR)
leaked_pv.m_tree = TREE_OPERAND (leaked_pv.m_tree, 0);
/* This might be NULL; the pending_diagnostic subclasses need to cope
with this. */
tree leaked_tree = leaked_pv.m_tree;

View File

@ -2202,25 +2202,33 @@ region_model::eval_condition (tree lhs,
return eval_condition (get_rvalue (lhs, ctxt), op, get_rvalue (rhs, ctxt));
}
/* Attempt to return a path_var that represents SVAL, or return NULL_TREE.
/* Implementation of region_model::get_representative_path_var.
Attempt to return a path_var that represents SVAL, or return NULL_TREE.
Use VISITED to prevent infinite mutual recursion with the overload for
regions. */
path_var
region_model::get_representative_path_var (const svalue *sval,
svalue_set *visited) const
region_model::get_representative_path_var_1 (const svalue *sval,
svalue_set *visited) const
{
if (sval == NULL)
return path_var (NULL_TREE, 0);
if (const svalue *cast_sval = sval->maybe_undo_cast ())
sval = cast_sval;
gcc_assert (sval);
/* Prevent infinite recursion. */
if (visited->contains (sval))
return path_var (NULL_TREE, 0);
visited->add (sval);
/* Handle casts by recursion into get_representative_path_var. */
if (const svalue *cast_sval = sval->maybe_undo_cast ())
{
path_var result = get_representative_path_var (cast_sval, visited);
tree orig_type = sval->get_type ();
/* If necessary, wrap the result in a cast. */
if (result.m_tree && orig_type)
result.m_tree = build1 (NOP_EXPR, orig_type, result.m_tree);
return result;
}
auto_vec<path_var> pvs;
m_store.get_representative_path_vars (this, visited, sval, &pvs);
@ -2233,7 +2241,7 @@ region_model::get_representative_path_var (const svalue *sval,
const region *reg = ptr_sval->get_pointee ();
if (path_var pv = get_representative_path_var (reg, visited))
return path_var (build1 (ADDR_EXPR,
TREE_TYPE (sval->get_type ()),
sval->get_type (),
pv.m_tree),
pv.m_stack_depth);
}
@ -2261,16 +2269,54 @@ region_model::get_representative_path_var (const svalue *sval,
return pvs[0];
}
/* Attempt to return a tree that represents SVAL, or return NULL_TREE. */
/* Attempt to return a path_var that represents SVAL, or return NULL_TREE.
Use VISITED to prevent infinite mutual recursion with the overload for
regions
This function defers to get_representative_path_var_1 to do the work;
it adds verification that get_representative_path_var_1 returned a tree
of the correct type. */
path_var
region_model::get_representative_path_var (const svalue *sval,
svalue_set *visited) const
{
if (sval == NULL)
return path_var (NULL_TREE, 0);
tree orig_type = sval->get_type ();
path_var result = get_representative_path_var_1 (sval, visited);
/* Verify that the result has the same type as SVAL, if any. */
if (result.m_tree && orig_type)
gcc_assert (TREE_TYPE (result.m_tree) == orig_type);
return result;
}
/* Attempt to return a tree that represents SVAL, or return NULL_TREE.
Strip off any top-level cast, to avoid messages like
double-free of '(void *)ptr'
from analyzer diagnostics. */
tree
region_model::get_representative_tree (const svalue *sval) const
{
svalue_set visited;
return get_representative_path_var (sval, &visited).m_tree;
tree expr = get_representative_path_var (sval, &visited).m_tree;
/* Strip off any top-level cast. */
if (expr && TREE_CODE (expr) == NOP_EXPR)
return TREE_OPERAND (expr, 0);
return expr;
}
/* Attempt to return a path_var that represents REG, or return
/* Implementation of region_model::get_representative_path_var.
Attempt to return a path_var that represents REG, or return
the NULL path_var.
For example, a region for a field of a local would be a path_var
wrapping a COMPONENT_REF.
@ -2278,8 +2324,8 @@ region_model::get_representative_tree (const svalue *sval) const
svalues. */
path_var
region_model::get_representative_path_var (const region *reg,
svalue_set *visited) const
region_model::get_representative_path_var_1 (const region *reg,
svalue_set *visited) const
{
switch (reg->get_kind ())
{
@ -2411,6 +2457,30 @@ region_model::get_representative_path_var (const region *reg,
}
}
/* Attempt to return a path_var that represents REG, or return
the NULL path_var.
For example, a region for a field of a local would be a path_var
wrapping a COMPONENT_REF.
Use VISITED to prevent infinite mutual recursion with the overload for
svalues.
This function defers to get_representative_path_var_1 to do the work;
it adds verification that get_representative_path_var_1 returned a tree
of the correct type. */
path_var
region_model::get_representative_path_var (const region *reg,
svalue_set *visited) const
{
path_var result = get_representative_path_var_1 (reg, visited);
/* Verify that the result has the same type as REG, if any. */
if (result.m_tree && reg->get_type ())
gcc_assert (TREE_TYPE (result.m_tree) == reg->get_type ());
return result;
}
/* Update this model for any phis in SNODE, assuming we came from
LAST_CFG_SUPEREDGE. */

View File

@ -581,6 +581,13 @@ class region_model
const region *get_lvalue_1 (path_var pv, region_model_context *ctxt);
const svalue *get_rvalue_1 (path_var pv, region_model_context *ctxt);
path_var
get_representative_path_var_1 (const svalue *sval,
svalue_set *visited) const;
path_var
get_representative_path_var_1 (const region *reg,
svalue_set *visited) const;
void add_any_constraints_from_ssa_def_stmt (tree lhs,
enum tree_code op,
tree rhs,

View File

@ -1375,6 +1375,21 @@ binding_cluster::redundant_p () const
&& !m_touched);
}
/* Add PV to OUT_PVS, casting it to TYPE if if is not already of that type. */
static void
append_pathvar_with_type (path_var pv,
tree type,
auto_vec<path_var> *out_pvs)
{
gcc_assert (pv.m_tree);
if (TREE_TYPE (pv.m_tree) != type)
pv.m_tree = build1 (NOP_EXPR, type, pv.m_tree);
out_pvs->safe_push (pv);
}
/* Find representative path_vars for SVAL within this binding of BASE_REG,
appending the results to OUT_PVS. */
@ -1411,7 +1426,7 @@ binding_cluster::get_representative_path_vars (const region_model *model,
if (path_var pv
= model->get_representative_path_var (subregion,
visited))
out_pvs->safe_push (pv);
append_pathvar_with_type (pv, sval->get_type (), out_pvs);
}
}
else
@ -1420,7 +1435,7 @@ binding_cluster::get_representative_path_vars (const region_model *model,
if (path_var pv
= model->get_representative_path_var (skey->get_region (),
visited))
out_pvs->safe_push (pv);
append_pathvar_with_type (pv, sval->get_type (), out_pvs);
}
}
}

View File

@ -0,0 +1,39 @@
// { dg-do compile { target c++17 } }
// { dg-additional-options "-Wno-analyzer-too-complex" } */
template <typename> struct iterator_traits;
template <typename _Tp> struct iterator_traits<_Tp *> {
typedef _Tp &reference;
};
template <typename _Iterator> struct __normal_iterator {
_Iterator _M_current;
__normal_iterator(_Iterator &__i) : _M_current(__i) {}
typename iterator_traits<_Iterator>::reference operator*() {
return *_M_current;
}
};
template <typename> struct allocator;
template <typename> struct allocator_traits;
template <typename _Tp> struct allocator_traits<allocator<_Tp>> {
using pointer = _Tp *;
};
struct TPkcs11Token;
struct __alloc_traits : allocator_traits<allocator<TPkcs11Token>> {};
struct _Vector_base {
typedef __alloc_traits::pointer pointer;
struct {
pointer _M_start;
} _M_impl;
};
struct : _Vector_base {
__normal_iterator<pointer> begin() { return _M_impl._M_start; }
} list_tokens_token_list;
struct TPkcs11Token {
int *add_info;
};
void list_tokens() {
for (__normal_iterator base = list_tokens_token_list.begin();;) {
int *add_info = new int;
(*base).add_info = add_info; // { dg-bogus "leak" "PR analyzer/98969" { xfail *-*-* } }
}
}

View File

@ -0,0 +1,18 @@
struct foo
{
char *expr;
};
void
test_1 (long int i)
{
struct foo *f = (struct foo *)i;
f->expr = __builtin_malloc (1024);
} /* { dg-bogus "leak" "PR analyzer/98969" { xfail *-*-* } } */
void
test_2 (long int i)
{
__builtin_free (((struct foo *)i)->expr);
__builtin_free (((struct foo *)i)->expr); /* { dg-warning "double-'free' of '\\*\\(\\(struct foo \\*\\)i\\)\\.expr'" } */
}