Use safe-ctype.h (ISSPACE etc.) in symbol parsing & comparison
This patch avoids depending on the current locale when parsing &
comparing symbol names, by using libiberty's safe-ctype.h uppercase
TOLOWER, ISXDIGIT, etc. macros instead of the standard ctype.h
tolower, isxdigit, etc. macros/functions.
This commit:
commit b1b60145ae
Author: Pedro Alves <palves@redhat.com>
AuthorDate: Tue May 22 17:35:38 2018 +0100
Support UTF-8 identifiers in C/C++ expressions (PR gdb/22973)
did something similar, except in the expression parser.
This can improve GDB's symbol loading performance significantly.
Currently strcmp_iw_ordered can show up high on profiles (called from
sort_pst_symbols -> std::sort) because of the isspace and tolower
functions. Hannes mentions seeing it as high as in ~24% of the
profiling samples on Windows
(https://sourceware.org/pipermail/gdb-patches/2020-May/168858.html).
I tested GDB's performance (built with "-g -O2") loading a "-g -O0"
build of gdb.
I ran GDB 10 times like:
/bin/time -f %e \
./gdb/gdb --data-directory ./gdb/data-directory -nx \
-batch /tmp/gdb-g-O0
Then I computed the mean time.
The baseline mean time was
gdb 2.515
This patch brings the number down to
gdb 2.096
Which is an around 16% improvement.
gdb/ChangeLog:
2020-05-23 Pedro Alves <palves@redhat.com>
* utils.c: Include "gdbsupport/gdb-safe-ctype.h".
(parse_escape): Use ISDIGIT instead of isdigit.
(puts_debug): Use gdb_isprint instead of isprint.
(fprintf_symbol_filtered): Use ISALNUM instead of isalnum.
(cp_skip_operator_token, skip_ws, strncmp_iw_with_mode): Use
ISSPACE instead of isspace.
(strncmp_iw_with_mode): Use TOLOWER instead of tolower and ISSPACE
instead of isspace.
(strcmp_iw_ordered): Use ISSPACE instead of isspace.
(string_to_core_addr): Use TOLOWER instead of tolower, ISXDIGIT
instead of isxdigit and ISDIGIT instead of isdigit.
gdbsupport/ChangeLog:
2020-05-23 Pedro Alves <palves@redhat.com>
* gdb-safe-ctype.h: New.
This commit is contained in:
parent
c892b44730
commit
51e2cfa2dc
@ -1,3 +1,17 @@
|
||||
2020-05-23 Pedro Alves <palves@redhat.com>
|
||||
|
||||
* utils.c: Include "gdbsupport/gdb-safe-ctype.h".
|
||||
(parse_escape): Use ISDIGIT instead of isdigit.
|
||||
(puts_debug): Use gdb_isprint instead of isprint.
|
||||
(fprintf_symbol_filtered): Use ISALNUM instead of isalnum.
|
||||
(cp_skip_operator_token, skip_ws, strncmp_iw_with_mode): Use
|
||||
ISSPACE instead of isspace.
|
||||
(strncmp_iw_with_mode): Use TOLOWER instead of tolower and ISSPACE
|
||||
instead of isspace.
|
||||
(strcmp_iw_ordered): Use ISSPACE instead of isspace.
|
||||
(string_to_core_addr): Use TOLOWER instead of tolower, ISXDIGIT
|
||||
instead of isxdigit and ISDIGIT instead of isdigit.
|
||||
|
||||
2020-05-22 Simon Marchi <simon.marchi@efficios.com>
|
||||
|
||||
* gdbtypes.h (struct type) <field>: New method.
|
||||
|
47
gdb/utils.c
47
gdb/utils.c
@ -74,6 +74,7 @@
|
||||
#include "gdbsupport/scope-exit.h"
|
||||
#include "gdbarch.h"
|
||||
#include "cli-out.h"
|
||||
#include "gdbsupport/gdb-safe-ctype.h"
|
||||
|
||||
void (*deprecated_error_begin_hook) (void);
|
||||
|
||||
@ -1025,7 +1026,7 @@ parse_escape (struct gdbarch *gdbarch, const char **string_ptr)
|
||||
while (++count < 3)
|
||||
{
|
||||
c = (**string_ptr);
|
||||
if (isdigit (c) && c != '8' && c != '9')
|
||||
if (ISDIGIT (c) && c != '8' && c != '9')
|
||||
{
|
||||
(*string_ptr)++;
|
||||
i *= 8;
|
||||
@ -1995,7 +1996,7 @@ puts_debug (char *prefix, char *string, char *suffix)
|
||||
switch (ch)
|
||||
{
|
||||
default:
|
||||
if (isprint (ch))
|
||||
if (gdb_isprint (ch))
|
||||
fputc_unfiltered (ch, gdb_stdlog);
|
||||
|
||||
else
|
||||
@ -2316,7 +2317,7 @@ fprintf_symbol_filtered (struct ui_file *stream, const char *name,
|
||||
static bool
|
||||
valid_identifier_name_char (int ch)
|
||||
{
|
||||
return (isalnum (ch) || ch == '_');
|
||||
return (ISALNUM (ch) || ch == '_');
|
||||
}
|
||||
|
||||
/* Skip to end of token, or to END, whatever comes first. Input is
|
||||
@ -2326,7 +2327,7 @@ static const char *
|
||||
cp_skip_operator_token (const char *token, const char *end)
|
||||
{
|
||||
const char *p = token;
|
||||
while (p != end && !isspace (*p) && *p != '(')
|
||||
while (p != end && !ISSPACE (*p) && *p != '(')
|
||||
{
|
||||
if (valid_identifier_name_char (*p))
|
||||
{
|
||||
@ -2380,9 +2381,9 @@ cp_skip_operator_token (const char *token, const char *end)
|
||||
static void
|
||||
skip_ws (const char *&string1, const char *&string2, const char *end_str2)
|
||||
{
|
||||
while (isspace (*string1))
|
||||
while (ISSPACE (*string1))
|
||||
string1++;
|
||||
while (string2 < end_str2 && isspace (*string2))
|
||||
while (string2 < end_str2 && ISSPACE (*string2))
|
||||
string2++;
|
||||
}
|
||||
|
||||
@ -2444,8 +2445,8 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
|
||||
while (1)
|
||||
{
|
||||
if (skip_spaces
|
||||
|| ((isspace (*string1) && !valid_identifier_name_char (*string2))
|
||||
|| (isspace (*string2) && !valid_identifier_name_char (*string1))))
|
||||
|| ((ISSPACE (*string1) && !valid_identifier_name_char (*string2))
|
||||
|| (ISSPACE (*string2) && !valid_identifier_name_char (*string1))))
|
||||
{
|
||||
skip_ws (string1, string2, end_str2);
|
||||
skip_spaces = false;
|
||||
@ -2478,7 +2479,7 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
|
||||
if (match_for_lcd != NULL && abi_start != string1)
|
||||
match_for_lcd->mark_ignored_range (abi_start, string1);
|
||||
|
||||
while (isspace (*string1))
|
||||
while (ISSPACE (*string1))
|
||||
string1++;
|
||||
}
|
||||
|
||||
@ -2503,9 +2504,9 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
|
||||
string1++;
|
||||
string2++;
|
||||
|
||||
while (isspace (*string1))
|
||||
while (ISSPACE (*string1))
|
||||
string1++;
|
||||
while (string2 < end_str2 && isspace (*string2))
|
||||
while (string2 < end_str2 && ISSPACE (*string2))
|
||||
string2++;
|
||||
continue;
|
||||
}
|
||||
@ -2599,14 +2600,14 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
|
||||
if (case_sensitivity == case_sensitive_on && *string1 != *string2)
|
||||
break;
|
||||
if (case_sensitivity == case_sensitive_off
|
||||
&& (tolower ((unsigned char) *string1)
|
||||
!= tolower ((unsigned char) *string2)))
|
||||
&& (TOLOWER ((unsigned char) *string1)
|
||||
!= TOLOWER ((unsigned char) *string2)))
|
||||
break;
|
||||
|
||||
/* If we see any non-whitespace, non-identifier-name character
|
||||
(any of "()<>*&" etc.), then skip spaces the next time
|
||||
around. */
|
||||
if (!isspace (*string1) && !valid_identifier_name_char (*string1))
|
||||
if (!ISSPACE (*string1) && !valid_identifier_name_char (*string1))
|
||||
skip_spaces = true;
|
||||
|
||||
string1++;
|
||||
@ -2727,16 +2728,16 @@ strcmp_iw_ordered (const char *string1, const char *string2)
|
||||
|
||||
while (*string1 != '\0' && *string2 != '\0')
|
||||
{
|
||||
while (isspace (*string1))
|
||||
while (ISSPACE (*string1))
|
||||
string1++;
|
||||
while (isspace (*string2))
|
||||
while (ISSPACE (*string2))
|
||||
string2++;
|
||||
|
||||
switch (case_pass)
|
||||
{
|
||||
case case_sensitive_off:
|
||||
c1 = tolower ((unsigned char) *string1);
|
||||
c2 = tolower ((unsigned char) *string2);
|
||||
c1 = TOLOWER ((unsigned char) *string1);
|
||||
c2 = TOLOWER ((unsigned char) *string2);
|
||||
break;
|
||||
case case_sensitive_on:
|
||||
c1 = *string1;
|
||||
@ -2924,17 +2925,17 @@ string_to_core_addr (const char *my_string)
|
||||
{
|
||||
CORE_ADDR addr = 0;
|
||||
|
||||
if (my_string[0] == '0' && tolower (my_string[1]) == 'x')
|
||||
if (my_string[0] == '0' && TOLOWER (my_string[1]) == 'x')
|
||||
{
|
||||
/* Assume that it is in hex. */
|
||||
int i;
|
||||
|
||||
for (i = 2; my_string[i] != '\0'; i++)
|
||||
{
|
||||
if (isdigit (my_string[i]))
|
||||
if (ISDIGIT (my_string[i]))
|
||||
addr = (my_string[i] - '0') + (addr * 16);
|
||||
else if (isxdigit (my_string[i]))
|
||||
addr = (tolower (my_string[i]) - 'a' + 0xa) + (addr * 16);
|
||||
else if (ISXDIGIT (my_string[i]))
|
||||
addr = (TOLOWER (my_string[i]) - 'a' + 0xa) + (addr * 16);
|
||||
else
|
||||
error (_("invalid hex \"%s\""), my_string);
|
||||
}
|
||||
@ -2946,7 +2947,7 @@ string_to_core_addr (const char *my_string)
|
||||
|
||||
for (i = 0; my_string[i] != '\0'; i++)
|
||||
{
|
||||
if (isdigit (my_string[i]))
|
||||
if (ISDIGIT (my_string[i]))
|
||||
addr = (my_string[i] - '0') + (addr * 10);
|
||||
else
|
||||
error (_("invalid decimal \"%s\""), my_string);
|
||||
|
@ -1,3 +1,7 @@
|
||||
2020-05-23 Pedro Alves <palves@redhat.com>
|
||||
|
||||
* gdb-safe-ctype.h: New.
|
||||
|
||||
2020-05-16 Tankut Baris Aktemur <tankut.baris.aktemur@intel.com>
|
||||
|
||||
* linux-ia64-low.cc (ia64_target::sw_breakpoint_from_kind):
|
||||
|
46
gdbsupport/gdb-safe-ctype.h
Normal file
46
gdbsupport/gdb-safe-ctype.h
Normal file
@ -0,0 +1,46 @@
|
||||
/* Wrapper around libiberty's safe-ctype.h for GDB, the GNU debugger.
|
||||
|
||||
Copyright (C) 2019-2020 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GDB.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GDB_SAFE_CTYPE_H
|
||||
#define GDB_SAFE_CTYPE_H
|
||||
|
||||
/* After safe-ctype.h is included, we can no longer use the host's
|
||||
ctype routines. Trying to do so results in compile errors. Code
|
||||
that uses safe-ctype.h that wants to refer to the locale-dependent
|
||||
ctype functions must call these wrapper versions instead. */
|
||||
|
||||
static inline int
|
||||
gdb_isprint (int ch)
|
||||
{
|
||||
return isprint (ch);
|
||||
}
|
||||
|
||||
/* readline.h defines these symbols too, but we want libiberty's
|
||||
versions. */
|
||||
#undef ISALPHA
|
||||
#undef ISALNUM
|
||||
#undef ISDIGIT
|
||||
#undef ISLOWER
|
||||
#undef ISPRINT
|
||||
#undef ISUPPER
|
||||
#undef ISXDIGIT
|
||||
|
||||
#include "safe-ctype.h"
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user