Make strcmp_iw NOT ignore whitespace in the middle of tokens

currently "b func tion" manages to set a breakpoint at "function" !

All these years I had never noticed this, but now that the linespec
completer actually works, this easily happens by accident, with:

  "b func t<tab>"

expecting to get "thread", but getting instead:

  "b func tion"

...

Also, this:

  "b rettypefunc<int>"

manages to set a breakpoint on "rettype func<int>()".

These things happen due to strcmp_iw "magic".

Fix it by teaching strcmp_iw about when can it skip whitespace.  This
required handling user-defined operators, and scope operators,
complicating the code a bit, unfortunately.  I added unit tests for
all the corner cases I stumbled on, as I was developing this, and then
in the end wrote a testsuite testcase covering many of the same things
and more (to be added later).

gdb/ChangeLog:
2017-11-24  Pedro Alves  <palves@redhat.com>

	* cp-support.c (cp_symbol_name_matches_1): New, factored out from
	cp_fq_symbol_name_matches.  Pass language_cplus to
	strncmp_with_mode.
	(cp_fq_symbol_name_matches): Call cp_symbol_name_matches_1.
	(selftests::test_cp_symbol_name_cmp): New.
	(_initialize_cp_support): Register "cp_symbol_name_matches"
	selftests.
	* language.c (default_symbol_name_matcher): Pass language_minimal
	to strncmp_iw_with_mode.
	* utils.c: Include "cp-support.h" and <algorithm>.
	(valid_identifier_name_char, cp_skip_operator_token, skip_ws)
	(cp_is_operator): New functions.
	(strncmp_iw_with_mode): Use them.  Add language parameter.  Don't
	skip whitespace in the symbol name when the lookup name doesn't
	have spaces, and vice versa.
	(strncmp_iw, strcmp_iw): Pass language to strncmp_iw_with_mode.
	* utils.h (strncmp_iw_with_mode): Add language parameter.
This commit is contained in:
Pedro Alves 2017-11-24 23:30:04 +00:00
parent 276da9b31b
commit 0662b6a7c1
5 changed files with 429 additions and 22 deletions

View File

@ -1,3 +1,23 @@
2017-11-24 Pedro Alves <palves@redhat.com>
* cp-support.c (cp_symbol_name_matches_1): New, factored out from
cp_fq_symbol_name_matches. Pass language_cplus to
strncmp_with_mode.
(cp_fq_symbol_name_matches): Call cp_symbol_name_matches_1.
(selftests::test_cp_symbol_name_cmp): New.
(_initialize_cp_support): Register "cp_symbol_name_matches"
selftests.
* language.c (default_symbol_name_matcher): Pass language_minimal
to strncmp_iw_with_mode.
* utils.c: Include "cp-support.h" and <algorithm>.
(valid_identifier_name_char, cp_skip_operator_token, skip_ws)
(cp_is_operator): New functions.
(strncmp_iw_with_mode): Use them. Add language parameter. Don't
skip whitespace in the symbol name when the lookup name doesn't
have spaces, and vice versa.
(strncmp_iw, strcmp_iw): Pass language to strncmp_iw_with_mode.
* utils.h (strncmp_iw_with_mode): Add language parameter.
2017-11-24 Joel Brobecker <brobecker@adacore.com>
* ada-lang.c (ada_exception_message_1, ada_exception_message):

View File

@ -1617,6 +1617,39 @@ gdb_sniff_from_mangled_name (const char *mangled, char **demangled)
/* C++ symbol_name_matcher_ftype implementation. */
/* Helper for cp_fq_symbol_name_matches (i.e.,
symbol_name_matcher_ftype implementation). Split to a separate
function for unit-testing convenience.
See symbol_name_matcher_ftype for description of SYMBOL_SEARCH_NAME
and COMP_MATCH_RES.
LOOKUP_NAME/LOOKUP_NAME_LEN is the name we're looking up.
See strncmp_iw_with_mode for description of MODE.
*/
static bool
cp_symbol_name_matches_1 (const char *symbol_search_name,
const char *lookup_name,
size_t lookup_name_len,
strncmp_iw_mode mode,
completion_match *match)
{
if (strncmp_iw_with_mode (symbol_search_name,
lookup_name, lookup_name_len,
mode, language_cplus) == 0)
{
if (match != NULL)
match->set_match (symbol_search_name);
return true;
}
return false;
}
/* C++ symbol_name_matcher_ftype implementation. */
static bool
cp_fq_symbol_name_matches (const char *symbol_search_name,
const lookup_name_info &lookup_name,
@ -1629,16 +1662,9 @@ cp_fq_symbol_name_matches (const char *symbol_search_name,
? strncmp_iw_mode::NORMAL
: strncmp_iw_mode::MATCH_PARAMS);
if (strncmp_iw_with_mode (symbol_search_name,
name.c_str (), name.size (),
mode) == 0)
{
if (match != NULL)
match->set_match (symbol_search_name);
return true;
}
return false;
return cp_symbol_name_matches_1 (symbol_search_name,
name.c_str (), name.size (),
mode, match);
}
/* See cp-support.h. */
@ -1653,6 +1679,136 @@ cp_get_symbol_name_matcher (const lookup_name_info &lookup_name)
namespace selftests {
void
test_cp_symbol_name_matches ()
{
#define CHECK_MATCH(SYMBOL, INPUT) \
SELF_CHECK (cp_symbol_name_matches_1 (SYMBOL, \
INPUT, sizeof (INPUT) - 1, \
strncmp_iw_mode::MATCH_PARAMS, \
NULL))
#define CHECK_NOT_MATCH(SYMBOL, INPUT) \
SELF_CHECK (!cp_symbol_name_matches_1 (SYMBOL, \
INPUT, sizeof (INPUT) - 1, \
strncmp_iw_mode::MATCH_PARAMS, \
NULL))
/* Like CHECK_MATCH, and also check that INPUT (and all substrings
that start at index 0) completes to SYMBOL. */
#define CHECK_MATCH_C(SYMBOL, INPUT) \
do \
{ \
CHECK_MATCH (SYMBOL, INPUT); \
for (size_t i = 0; i < sizeof (INPUT) - 1; i++) \
SELF_CHECK (cp_symbol_name_matches_1 (SYMBOL, INPUT, i, \
strncmp_iw_mode::NORMAL, \
NULL)); \
} while (0)
/* Like CHECK_NOT_MATCH, and also check that INPUT does NOT complete
to SYMBOL. */
#define CHECK_NOT_MATCH_C(SYMBOL, INPUT) \
do \
{ \
CHECK_NOT_MATCH (SYMBOL, INPUT); \
SELF_CHECK (!cp_symbol_name_matches_1 (SYMBOL, INPUT, \
sizeof (INPUT) - 1, \
strncmp_iw_mode::NORMAL, \
NULL)); \
} while (0)
/* Lookup name without parens matches all overloads. */
CHECK_MATCH_C ("function()", "function");
CHECK_MATCH_C ("function(int)", "function");
/* Check whitespace around parameters is ignored. */
CHECK_MATCH_C ("function()", "function ()");
CHECK_MATCH_C ("function ( )", "function()");
CHECK_MATCH_C ("function ()", "function( )");
CHECK_MATCH_C ("func(int)", "func( int )");
CHECK_MATCH_C ("func(int)", "func ( int ) ");
CHECK_MATCH_C ("func ( int )", "func( int )");
CHECK_MATCH_C ("func ( int )", "func ( int ) ");
/* Check symbol name prefixes aren't incorrectly matched. */
CHECK_NOT_MATCH ("func", "function");
CHECK_NOT_MATCH ("function", "func");
CHECK_NOT_MATCH ("function()", "func");
/* Check that if the lookup name includes parameters, only the right
overload matches. */
CHECK_MATCH_C ("function(int)", "function(int)");
CHECK_NOT_MATCH_C ("function(int)", "function()");
/* Check that whitespace within symbol names is not ignored. */
CHECK_NOT_MATCH_C ("function", "func tion");
CHECK_NOT_MATCH_C ("func__tion", "func_ _tion");
CHECK_NOT_MATCH_C ("func11tion", "func1 1tion");
/* Check the converse, which can happen with template function,
where the return type is part of the demangled name. */
CHECK_NOT_MATCH_C ("func tion", "function");
CHECK_NOT_MATCH_C ("func1 1tion", "func11tion");
CHECK_NOT_MATCH_C ("func_ _tion", "func__tion");
/* Within parameters too. */
CHECK_NOT_MATCH_C ("func(param)", "func(par am)");
/* Check handling of whitespace around C++ operators. */
CHECK_NOT_MATCH_C ("operator<<", "opera tor<<");
CHECK_NOT_MATCH_C ("operator<<", "operator< <");
CHECK_NOT_MATCH_C ("operator<<", "operator < <");
CHECK_NOT_MATCH_C ("operator==", "operator= =");
CHECK_NOT_MATCH_C ("operator==", "operator = =");
CHECK_MATCH_C ("operator<<", "operator <<");
CHECK_MATCH_C ("operator<<()", "operator <<");
CHECK_NOT_MATCH_C ("operator<<()", "operator<<(int)");
CHECK_NOT_MATCH_C ("operator<<(int)", "operator<<()");
CHECK_MATCH_C ("operator==", "operator ==");
CHECK_MATCH_C ("operator==()", "operator ==");
CHECK_MATCH_C ("operator <<", "operator<<");
CHECK_MATCH_C ("operator ==", "operator==");
CHECK_MATCH_C ("operator bool", "operator bool");
CHECK_MATCH_C ("operator bool ()", "operator bool");
CHECK_MATCH_C ("operatorX<<", "operatorX < <");
CHECK_MATCH_C ("Xoperator<<", "Xoperator < <");
CHECK_MATCH_C ("operator()(int)", "operator()(int)");
CHECK_MATCH_C ("operator()(int)", "operator ( ) ( int )");
CHECK_MATCH_C ("operator()<long>(int)", "operator ( ) < long > ( int )");
/* The first "()" is not the parameter list. */
CHECK_NOT_MATCH ("operator()(int)", "operator");
/* Misc user-defined operator tests. */
CHECK_NOT_MATCH_C ("operator/=()", "operator ^=");
/* Same length at end of input. */
CHECK_NOT_MATCH_C ("operator>>", "operator[]");
/* Same length but not at end of input. */
CHECK_NOT_MATCH_C ("operator>>()", "operator[]()");
CHECK_MATCH_C ("base::operator char*()", "base::operator char*()");
CHECK_MATCH_C ("base::operator char*()", "base::operator char * ()");
CHECK_MATCH_C ("base::operator char**()", "base::operator char * * ()");
CHECK_MATCH ("base::operator char**()", "base::operator char * *");
CHECK_MATCH_C ("base::operator*()", "base::operator*()");
CHECK_NOT_MATCH_C ("base::operator char*()", "base::operatorc");
CHECK_NOT_MATCH ("base::operator char*()", "base::operator char");
CHECK_NOT_MATCH ("base::operator char*()", "base::operat");
/* Check handling of whitespace around C++ scope operators. */
CHECK_NOT_MATCH_C ("foo::bar", "foo: :bar");
CHECK_MATCH_C ("foo::bar", "foo :: bar");
CHECK_MATCH_C ("foo :: bar", "foo::bar");
CHECK_MATCH_C ("abc::def::ghi()", "abc::def::ghi()");
CHECK_MATCH_C ("abc::def::ghi ( )", "abc::def::ghi()");
CHECK_MATCH_C ("abc::def::ghi()", "abc::def::ghi ( )");
CHECK_MATCH_C ("function()", "function()");
CHECK_MATCH_C ("bar::function()", "bar::function()");
}
/* If non-NULL, return STR wrapped in quotes. Otherwise, return a
"<null>" string (with no quotes). */
@ -1856,6 +2012,8 @@ display the offending symbol."),
#endif
#if GDB_SELF_TEST
selftests::register_test ("cp_symbol_name_matches",
selftests::test_cp_symbol_name_matches);
selftests::register_test ("cp_remove_params",
selftests::test_cp_remove_params);
#endif

View File

@ -713,7 +713,7 @@ default_symbol_name_matcher (const char *symbol_search_name,
: strncmp_iw_mode::MATCH_PARAMS);
if (strncmp_iw_with_mode (symbol_search_name, name.c_str (), name.size (),
mode) == 0)
mode, language_minimal) == 0)
{
if (match != NULL)
match->set_match (symbol_search_name);

View File

@ -68,6 +68,8 @@
#include "job-control.h"
#include "common/selftest.h"
#include "common/gdb_optional.h"
#include "cp-support.h"
#include <algorithm>
#if !HAVE_DECL_MALLOC
extern PTR malloc (); /* ARI: PTR */
@ -2156,22 +2158,233 @@ fprintf_symbol_filtered (struct ui_file *stream, const char *name,
}
}
/* True if CH is a character that can be part of a symbol name. I.e.,
either a number, a letter, or a '_'. */
static bool
valid_identifier_name_char (int ch)
{
return (isalnum (ch) || ch == '_');
}
/* Skip to end of token, or to END, whatever comes first. Input is
assumed to be a C++ operator name. */
static const char *
cp_skip_operator_token (const char *token, const char *end)
{
const char *p = token;
while (p != end && !isspace (*p) && *p != '(')
{
if (valid_identifier_name_char (*p))
{
while (p != end && valid_identifier_name_char (*p))
p++;
return p;
}
else
{
/* Note, ordered such that among ops that share a prefix,
longer comes first. This is so that the loop below can
bail on first match. */
static const char *ops[] =
{
"[",
"]",
"~",
",",
"-=", "--", "->", "-",
"+=", "++", "+",
"*=", "*",
"/=", "/",
"%=", "%",
"|=", "||", "|",
"&=", "&&", "&",
"^=", "^",
"!=", "!",
"<<=", "<=", "<<", "<",
">>=", ">=", ">>", ">",
"==", "=",
};
for (const char *op : ops)
{
size_t oplen = strlen (op);
size_t lencmp = std::min<size_t> (oplen, end - p);
if (strncmp (p, op, lencmp) == 0)
return p + lencmp;
}
/* Some unidentified character. Return it. */
return p + 1;
}
}
return p;
}
/* Advance STRING1/STRING2 past whitespace. */
static void
skip_ws (const char *&string1, const char *&string2, const char *end_str2)
{
while (isspace (*string1))
string1++;
while (string2 < end_str2 && isspace (*string2))
string2++;
}
/* True if STRING points at the start of a C++ operator name. START
is the start of the string that STRING points to, hence when
reading backwards, we must not read any character before START. */
static bool
cp_is_operator (const char *string, const char *start)
{
return ((string == start
|| !valid_identifier_name_char (string[-1]))
&& strncmp (string, CP_OPERATOR_STR, CP_OPERATOR_LEN) == 0
&& !valid_identifier_name_char (string[CP_OPERATOR_LEN]));
}
/* See utils.h. */
int
strncmp_iw_with_mode (const char *string1, const char *string2,
size_t string2_len, strncmp_iw_mode mode)
size_t string2_len, strncmp_iw_mode mode,
enum language language)
{
const char *string1_start = string1;
const char *end_str2 = string2 + string2_len;
bool skip_spaces = true;
bool have_colon_op = (language == language_cplus
|| language == language_rust
|| language == language_fortran);
while (1)
{
while (isspace (*string1))
string1++;
while (string2 < end_str2 && isspace (*string2))
string2++;
if (skip_spaces
|| ((isspace (*string1) && !valid_identifier_name_char (*string2))
|| (isspace (*string2) && !valid_identifier_name_char (*string1))))
{
skip_ws (string1, string2, end_str2);
skip_spaces = false;
}
if (*string1 == '\0' || string2 == end_str2)
break;
/* Handle the :: operator. */
if (have_colon_op && string1[0] == ':' && string1[1] == ':')
{
if (*string2 != ':')
return 1;
string1++;
string2++;
if (string2 == end_str2)
break;
if (*string2 != ':')
return 1;
string1++;
string2++;
while (isspace (*string1))
string1++;
while (string2 < end_str2 && isspace (*string2))
string2++;
continue;
}
/* Handle C++ user-defined operators. */
else if (language == language_cplus
&& *string1 == 'o')
{
if (cp_is_operator (string1, string1_start))
{
/* An operator name in STRING1. Check STRING2. */
size_t cmplen
= std::min<size_t> (CP_OPERATOR_LEN, end_str2 - string2);
if (strncmp (string1, string2, cmplen) != 0)
return 1;
string1 += cmplen;
string2 += cmplen;
if (string2 != end_str2)
{
/* Check for "operatorX" in STRING2. */
if (valid_identifier_name_char (*string2))
return 1;
skip_ws (string1, string2, end_str2);
}
/* Handle operator(). */
if (*string1 == '(')
{
if (string2 == end_str2)
{
if (mode == strncmp_iw_mode::NORMAL)
return 0;
else
{
/* Don't break for the regular return at the
bottom, because "operator" should not
match "operator()", since this open
parentheses is not the parameter list
start. */
return *string1 != '\0';
}
}
if (*string1 != *string2)
return 1;
string1++;
string2++;
}
while (1)
{
skip_ws (string1, string2, end_str2);
/* Skip to end of token, or to END, whatever comes
first. */
const char *end_str1 = string1 + strlen (string1);
const char *p1 = cp_skip_operator_token (string1, end_str1);
const char *p2 = cp_skip_operator_token (string2, end_str2);
cmplen = std::min (p1 - string1, p2 - string2);
if (p2 == end_str2)
{
if (strncmp (string1, string2, cmplen) != 0)
return 1;
}
else
{
if (p1 - string1 != p2 - string2)
return 1;
if (strncmp (string1, string2, cmplen) != 0)
return 1;
}
string1 += cmplen;
string2 += cmplen;
if (*string1 == '\0' || string2 == end_str2)
break;
if (*string1 == '(' || *string2 == '(')
break;
}
continue;
}
}
if (case_sensitivity == case_sensitive_on && *string1 != *string2)
break;
if (case_sensitivity == case_sensitive_off
@ -2179,6 +2392,12 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
!= tolower ((unsigned char) *string2)))
break;
/* If we see any non-whitespace, non-identifier-name character
(any of "()<>*&" etc.), then skip spaces the next time
around. */
if (!isspace (*string1) && !valid_identifier_name_char (*string1))
skip_spaces = true;
string1++;
string2++;
}
@ -2200,7 +2419,7 @@ int
strncmp_iw (const char *string1, const char *string2, size_t string2_len)
{
return strncmp_iw_with_mode (string1, string2, string2_len,
strncmp_iw_mode::NORMAL);
strncmp_iw_mode::NORMAL, language_minimal);
}
/* See utils.h. */
@ -2209,7 +2428,7 @@ int
strcmp_iw (const char *string1, const char *string2)
{
return strncmp_iw_with_mode (string1, string2, strlen (string2),
strncmp_iw_mode::MATCH_PARAMS);
strncmp_iw_mode::MATCH_PARAMS, language_minimal);
}
/* This is like strcmp except that it ignores whitespace and treats

View File

@ -48,17 +48,24 @@ enum class strncmp_iw_mode
/* Helper for strcmp_iw and strncmp_iw. Exported so that languages
can implement both NORMAL and MATCH_PARAMS variants in a single
function and defer part of the work to strncmp_iw_with_mode. */
function and defer part of the work to strncmp_iw_with_mode.
LANGUAGE is used to implement some context-sensitive
language-specific comparisons. For example, for C++,
"string1=operator()" should not match "string2=operator" even in
MATCH_PARAMS mode. */
extern int strncmp_iw_with_mode (const char *string1,
const char *string2,
size_t string2_len,
strncmp_iw_mode mode);
strncmp_iw_mode mode,
enum language language);
/* Do a strncmp() type operation on STRING1 and STRING2, ignoring any
differences in whitespace. STRING2_LEN is STRING2's length.
Returns 0 if STRING1 matches STRING2_LEN characters of STRING2,
non-zero otherwise (slightly different than strncmp()'s range of
return values). */
return values). Note: passes language_minimal to
strncmp_iw_with_mode, and should therefore be avoided if a more
suitable language is available. */
extern int strncmp_iw (const char *string1, const char *string2,
size_t string2_len);
@ -70,7 +77,10 @@ extern int strncmp_iw (const char *string1, const char *string2,
As an extra hack, string1=="FOO(ARGS)" matches string2=="FOO".
This "feature" is useful when searching for matching C++ function
names (such as if the user types 'break FOO', where FOO is a
mangled C++ function). */
mangled C++ function).
Note: passes language_minimal to strncmp_iw_with_mode, and should
therefore be avoided if a more suitable language is available. */
extern int strcmp_iw (const char *string1, const char *string2);
extern int strcmp_iw_ordered (const char *, const char *);