Use safe-ctype.h (ISSPACE etc.) in symbol parsing & comparison

This patch avoids depending on the current locale when parsing &
comparing symbol names, by using libiberty's safe-ctype.h uppercase
TOLOWER, ISXDIGIT, etc. macros instead of the standard ctype.h
tolower, isxdigit, etc. macros/functions.

This commit:

 commit b1b60145ae
 Author:     Pedro Alves <palves@redhat.com>
 AuthorDate: Tue May 22 17:35:38 2018 +0100

    Support UTF-8 identifiers in C/C++ expressions (PR gdb/22973)

did something similar, except in the expression parser.

This can improve GDB's symbol loading performance significantly.
Currently strcmp_iw_ordered can show up high on profiles (called from
sort_pst_symbols -> std::sort) because of the isspace and tolower
functions.  Hannes mentions seeing it as high as in ~24% of the
profiling samples on Windows
(https://sourceware.org/pipermail/gdb-patches/2020-May/168858.html).

I tested GDB's performance (built with "-g -O2") loading a "-g -O0"
build of gdb.

I ran GDB 10 times like:

	  /bin/time -f %e \
		    ./gdb/gdb --data-directory ./gdb/data-directory -nx \
		    -batch /tmp/gdb-g-O0

Then I computed the mean time.

The baseline mean time was

 gdb    2.515

This patch brings the number down to

 gdb    2.096

Which is an around 16% improvement.

gdb/ChangeLog:
2020-05-23  Pedro Alves  <palves@redhat.com>

	* utils.c: Include "gdbsupport/gdb-safe-ctype.h".
	(parse_escape): Use ISDIGIT instead of isdigit.
	(puts_debug): Use gdb_isprint instead of isprint.
	(fprintf_symbol_filtered): Use ISALNUM instead of isalnum.
	(cp_skip_operator_token, skip_ws, strncmp_iw_with_mode): Use
	ISSPACE instead of isspace.
	(strncmp_iw_with_mode): Use TOLOWER instead of tolower and ISSPACE
	instead of isspace.
	(strcmp_iw_ordered): Use ISSPACE instead of isspace.
	(string_to_core_addr): Use TOLOWER instead of tolower, ISXDIGIT
	instead of isxdigit and ISDIGIT instead of isdigit.

gdbsupport/ChangeLog:
2020-05-23  Pedro Alves  <palves@redhat.com>

	* gdb-safe-ctype.h: New.
This commit is contained in:
Pedro Alves 2020-05-23 12:46:37 +01:00
parent c892b44730
commit 51e2cfa2dc
4 changed files with 88 additions and 23 deletions

View File

@ -1,3 +1,17 @@
2020-05-23 Pedro Alves <palves@redhat.com>
* utils.c: Include "gdbsupport/gdb-safe-ctype.h".
(parse_escape): Use ISDIGIT instead of isdigit.
(puts_debug): Use gdb_isprint instead of isprint.
(fprintf_symbol_filtered): Use ISALNUM instead of isalnum.
(cp_skip_operator_token, skip_ws, strncmp_iw_with_mode): Use
ISSPACE instead of isspace.
(strncmp_iw_with_mode): Use TOLOWER instead of tolower and ISSPACE
instead of isspace.
(strcmp_iw_ordered): Use ISSPACE instead of isspace.
(string_to_core_addr): Use TOLOWER instead of tolower, ISXDIGIT
instead of isxdigit and ISDIGIT instead of isdigit.
2020-05-22 Simon Marchi <simon.marchi@efficios.com> 2020-05-22 Simon Marchi <simon.marchi@efficios.com>
* gdbtypes.h (struct type) <field>: New method. * gdbtypes.h (struct type) <field>: New method.

View File

@ -74,6 +74,7 @@
#include "gdbsupport/scope-exit.h" #include "gdbsupport/scope-exit.h"
#include "gdbarch.h" #include "gdbarch.h"
#include "cli-out.h" #include "cli-out.h"
#include "gdbsupport/gdb-safe-ctype.h"
void (*deprecated_error_begin_hook) (void); void (*deprecated_error_begin_hook) (void);
@ -1025,7 +1026,7 @@ parse_escape (struct gdbarch *gdbarch, const char **string_ptr)
while (++count < 3) while (++count < 3)
{ {
c = (**string_ptr); c = (**string_ptr);
if (isdigit (c) && c != '8' && c != '9') if (ISDIGIT (c) && c != '8' && c != '9')
{ {
(*string_ptr)++; (*string_ptr)++;
i *= 8; i *= 8;
@ -1995,7 +1996,7 @@ puts_debug (char *prefix, char *string, char *suffix)
switch (ch) switch (ch)
{ {
default: default:
if (isprint (ch)) if (gdb_isprint (ch))
fputc_unfiltered (ch, gdb_stdlog); fputc_unfiltered (ch, gdb_stdlog);
else else
@ -2316,7 +2317,7 @@ fprintf_symbol_filtered (struct ui_file *stream, const char *name,
static bool static bool
valid_identifier_name_char (int ch) valid_identifier_name_char (int ch)
{ {
return (isalnum (ch) || ch == '_'); return (ISALNUM (ch) || ch == '_');
} }
/* Skip to end of token, or to END, whatever comes first. Input is /* Skip to end of token, or to END, whatever comes first. Input is
@ -2326,7 +2327,7 @@ static const char *
cp_skip_operator_token (const char *token, const char *end) cp_skip_operator_token (const char *token, const char *end)
{ {
const char *p = token; const char *p = token;
while (p != end && !isspace (*p) && *p != '(') while (p != end && !ISSPACE (*p) && *p != '(')
{ {
if (valid_identifier_name_char (*p)) if (valid_identifier_name_char (*p))
{ {
@ -2380,9 +2381,9 @@ cp_skip_operator_token (const char *token, const char *end)
static void static void
skip_ws (const char *&string1, const char *&string2, const char *end_str2) skip_ws (const char *&string1, const char *&string2, const char *end_str2)
{ {
while (isspace (*string1)) while (ISSPACE (*string1))
string1++; string1++;
while (string2 < end_str2 && isspace (*string2)) while (string2 < end_str2 && ISSPACE (*string2))
string2++; string2++;
} }
@ -2444,8 +2445,8 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
while (1) while (1)
{ {
if (skip_spaces if (skip_spaces
|| ((isspace (*string1) && !valid_identifier_name_char (*string2)) || ((ISSPACE (*string1) && !valid_identifier_name_char (*string2))
|| (isspace (*string2) && !valid_identifier_name_char (*string1)))) || (ISSPACE (*string2) && !valid_identifier_name_char (*string1))))
{ {
skip_ws (string1, string2, end_str2); skip_ws (string1, string2, end_str2);
skip_spaces = false; skip_spaces = false;
@ -2478,7 +2479,7 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
if (match_for_lcd != NULL && abi_start != string1) if (match_for_lcd != NULL && abi_start != string1)
match_for_lcd->mark_ignored_range (abi_start, string1); match_for_lcd->mark_ignored_range (abi_start, string1);
while (isspace (*string1)) while (ISSPACE (*string1))
string1++; string1++;
} }
@ -2503,9 +2504,9 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
string1++; string1++;
string2++; string2++;
while (isspace (*string1)) while (ISSPACE (*string1))
string1++; string1++;
while (string2 < end_str2 && isspace (*string2)) while (string2 < end_str2 && ISSPACE (*string2))
string2++; string2++;
continue; continue;
} }
@ -2599,14 +2600,14 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
if (case_sensitivity == case_sensitive_on && *string1 != *string2) if (case_sensitivity == case_sensitive_on && *string1 != *string2)
break; break;
if (case_sensitivity == case_sensitive_off if (case_sensitivity == case_sensitive_off
&& (tolower ((unsigned char) *string1) && (TOLOWER ((unsigned char) *string1)
!= tolower ((unsigned char) *string2))) != TOLOWER ((unsigned char) *string2)))
break; break;
/* If we see any non-whitespace, non-identifier-name character /* If we see any non-whitespace, non-identifier-name character
(any of "()<>*&" etc.), then skip spaces the next time (any of "()<>*&" etc.), then skip spaces the next time
around. */ around. */
if (!isspace (*string1) && !valid_identifier_name_char (*string1)) if (!ISSPACE (*string1) && !valid_identifier_name_char (*string1))
skip_spaces = true; skip_spaces = true;
string1++; string1++;
@ -2727,16 +2728,16 @@ strcmp_iw_ordered (const char *string1, const char *string2)
while (*string1 != '\0' && *string2 != '\0') while (*string1 != '\0' && *string2 != '\0')
{ {
while (isspace (*string1)) while (ISSPACE (*string1))
string1++; string1++;
while (isspace (*string2)) while (ISSPACE (*string2))
string2++; string2++;
switch (case_pass) switch (case_pass)
{ {
case case_sensitive_off: case case_sensitive_off:
c1 = tolower ((unsigned char) *string1); c1 = TOLOWER ((unsigned char) *string1);
c2 = tolower ((unsigned char) *string2); c2 = TOLOWER ((unsigned char) *string2);
break; break;
case case_sensitive_on: case case_sensitive_on:
c1 = *string1; c1 = *string1;
@ -2924,17 +2925,17 @@ string_to_core_addr (const char *my_string)
{ {
CORE_ADDR addr = 0; CORE_ADDR addr = 0;
if (my_string[0] == '0' && tolower (my_string[1]) == 'x') if (my_string[0] == '0' && TOLOWER (my_string[1]) == 'x')
{ {
/* Assume that it is in hex. */ /* Assume that it is in hex. */
int i; int i;
for (i = 2; my_string[i] != '\0'; i++) for (i = 2; my_string[i] != '\0'; i++)
{ {
if (isdigit (my_string[i])) if (ISDIGIT (my_string[i]))
addr = (my_string[i] - '0') + (addr * 16); addr = (my_string[i] - '0') + (addr * 16);
else if (isxdigit (my_string[i])) else if (ISXDIGIT (my_string[i]))
addr = (tolower (my_string[i]) - 'a' + 0xa) + (addr * 16); addr = (TOLOWER (my_string[i]) - 'a' + 0xa) + (addr * 16);
else else
error (_("invalid hex \"%s\""), my_string); error (_("invalid hex \"%s\""), my_string);
} }
@ -2946,7 +2947,7 @@ string_to_core_addr (const char *my_string)
for (i = 0; my_string[i] != '\0'; i++) for (i = 0; my_string[i] != '\0'; i++)
{ {
if (isdigit (my_string[i])) if (ISDIGIT (my_string[i]))
addr = (my_string[i] - '0') + (addr * 10); addr = (my_string[i] - '0') + (addr * 10);
else else
error (_("invalid decimal \"%s\""), my_string); error (_("invalid decimal \"%s\""), my_string);

View File

@ -1,3 +1,7 @@
2020-05-23 Pedro Alves <palves@redhat.com>
* gdb-safe-ctype.h: New.
2020-05-16 Tankut Baris Aktemur <tankut.baris.aktemur@intel.com> 2020-05-16 Tankut Baris Aktemur <tankut.baris.aktemur@intel.com>
* linux-ia64-low.cc (ia64_target::sw_breakpoint_from_kind): * linux-ia64-low.cc (ia64_target::sw_breakpoint_from_kind):

View File

@ -0,0 +1,46 @@
/* Wrapper around libiberty's safe-ctype.h for GDB, the GNU debugger.
Copyright (C) 2019-2020 Free Software Foundation, Inc.
This file is part of GDB.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef GDB_SAFE_CTYPE_H
#define GDB_SAFE_CTYPE_H
/* After safe-ctype.h is included, we can no longer use the host's
ctype routines. Trying to do so results in compile errors. Code
that uses safe-ctype.h that wants to refer to the locale-dependent
ctype functions must call these wrapper versions instead. */
static inline int
gdb_isprint (int ch)
{
return isprint (ch);
}
/* readline.h defines these symbols too, but we want libiberty's
versions. */
#undef ISALPHA
#undef ISALNUM
#undef ISDIGIT
#undef ISLOWER
#undef ISPRINT
#undef ISUPPER
#undef ISXDIGIT
#include "safe-ctype.h"
#endif