From a04c0734e366a167a55f635b7d09de09b5013051 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Wed, 19 Jun 2019 19:08:24 +0000 Subject: [PATCH] PR translation/90156 - add linter check suggesting to replace %<%s%> with %qs gcc/c-family/ChangeLog: PR translation/90156 * c-format.c (function_format_info::format_type): Adjust type. (function_format_info::is_raw): New member. (decode_format_type): Adjust signature. Handle "raw" diag attributes. (decode_format_attr): Adjust call to decode_format_type. Avoid a redundant call to convert_format_name_to_system_name. Avoid abbreviating the word "arguments" in a diagnostic. (format_warning_substr): New function. (avoid_dollar_number): Quote dollar sign in a diagnostic. (finish_dollar_format_checking): Same. (check_format_info): Same. (struct baltoks_t): New. (c_opers, c_keywords, cxx_keywords, badwords, contrs): New arrays. (maybe_diag_unbalanced_tokens, check_tokens, check_plain): New functions. (check_format_info_main): Call check_plain. Use baltoks_t. Call maybe_diag_unbalanced_tokens. (handle_format_attribute): Spell out the word "arguments" in a diagnostic. gcc/testsuite/ChangeLog: PR translation/90156 * gcc.dg/format/gcc_diag-11.c: Enable. From-SVN: r272483 --- gcc/c-family/ChangeLog | 22 + gcc/c-family/c-format.c | 1051 ++++++++++++++++++++- gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.dg/format/gcc_diag-11.c | 3 +- 4 files changed, 1044 insertions(+), 37 deletions(-) diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 42247182f11..f64b749e86b 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,25 @@ +2019-06-19 Martin Sebor + + PR translation/90156 + * c-format.c (function_format_info::format_type): Adjust type. + (function_format_info::is_raw): New member. + (decode_format_type): Adjust signature. Handle "raw" diag attributes. + (decode_format_attr): Adjust call to decode_format_type. + Avoid a redundant call to convert_format_name_to_system_name. + Avoid abbreviating the word "arguments" in a diagnostic. + (format_warning_substr): New function. + (avoid_dollar_number): Quote dollar sign in a diagnostic. + (finish_dollar_format_checking): Same. + (check_format_info): Same. + (struct baltoks_t): New. + (c_opers, c_keywords, cxx_keywords, badwords, contrs): New arrays. + (maybe_diag_unbalanced_tokens, check_tokens, check_plain): New + functions. + (check_format_info_main): Call check_plain. Use baltoks_t. Call + maybe_diag_unbalanced_tokens. + (handle_format_attribute): Spell out the word "arguments" in + a diagnostic. + 2019-06-11 Matthew Beliveau PR c++/90449 - add -Winaccessible-base option. diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c index b10599f9982..a790f3c0394 100644 --- a/gcc/c-family/c-format.c +++ b/gcc/c-family/c-format.c @@ -52,7 +52,13 @@ enum format_type { printf_format_type, asm_fprintf_format_type, struct function_format_info { - int format_type; /* type of format (printf, scanf, etc.) */ + enum format_type format_type; /* type of format (printf, scanf, etc.) */ + /* IS_RAW is relevant only for GCC diagnostic format functions. + It is set for "raw" formatting functions like pp_printf that + are not intended to produce complete diagnostics according to + GCC guidelines, and clear for others like error and warning + whose format string is checked for proper quoting and spelling. */ + bool is_raw; unsigned HOST_WIDE_INT format_num; /* number of format argument */ unsigned HOST_WIDE_INT first_arg_num; /* number of first arg (zero for varargs) */ }; @@ -65,7 +71,7 @@ static GTY(()) tree locus; static bool decode_format_attr (const_tree, tree, tree, function_format_info *, bool); -static int decode_format_type (const char *); +static format_type decode_format_type (const char *, bool * = NULL); static bool check_format_string (const_tree argument, unsigned HOST_WIDE_INT format_num, @@ -111,6 +117,32 @@ format_warning_at_char (location_t fmt_string_loc, tree format_string_cst, return warned; } + +/* Emit a warning as per format_warning_va, but construct the substring_loc + for the substring at offset (POS1, POS2 - 1) within a string constant + FORMAT_STRING_CST at FMT_STRING_LOC. */ + +ATTRIBUTE_GCC_DIAG (6,7) +static bool +format_warning_substr (location_t fmt_string_loc, tree format_string_cst, + int pos1, int pos2, int opt, const char *gmsgid, ...) +{ + va_list ap; + va_start (ap, gmsgid); + tree string_type = TREE_TYPE (format_string_cst); + + pos2 -= 1; + + substring_loc fmt_loc (fmt_string_loc, string_type, pos1, pos1, pos2); + format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL, + NULL); + bool warned = diag.emit_warning_va (opt, gmsgid, &ap); + va_end (ap); + + return warned; +} + + /* Check that we have a pointer to a string suitable for use as a format. The default is to check for a char type. For objective-c dialects, this is extended to include references to string @@ -320,10 +352,8 @@ decode_format_attr (const_tree fntype, tree atname, tree args, { const char *p = IDENTIFIER_POINTER (format_type_id); - p = convert_format_name_to_system_name (p); + info->format_type = decode_format_type (p, &info->is_raw); - info->format_type = decode_format_type (p); - if (!c_dialect_objc () && info->format_type == gcc_objc_string_format_type) { @@ -359,7 +389,7 @@ decode_format_attr (const_tree fntype, tree atname, tree args, if (info->first_arg_num != 0 && info->first_arg_num <= info->format_num) { gcc_assert (!validated_p); - error ("format string argument follows the args to be formatted"); + error ("format string argument follows the arguments to be formatted"); return false; } @@ -1067,27 +1097,55 @@ static void format_type_warning (const substring_loc &fmt_loc, char conversion_char); /* Decode a format type from a string, returning the type, or - format_type_error if not valid, in which case the caller should print an - error message. */ -static int -decode_format_type (const char *s) + format_type_error if not valid, in which case the caller should + print an error message. On success, when IS_RAW is non-null, set + *IS_RAW when the format type corresponds to a GCC "raw" diagnostic + formatting function and clear it otherwise. */ +static format_type +decode_format_type (const char *s, bool *is_raw /* = NULL */) { - int i; - int slen; + bool is_raw_buf; + + if (!is_raw) + is_raw = &is_raw_buf; + + *is_raw = false; s = convert_format_name_to_system_name (s); - slen = strlen (s); - for (i = 0; i < n_format_types; i++) + + size_t slen = strlen (s); + for (int i = 0; i < n_format_types; i++) { - int alen; + /* Check for a match with no underscores. */ if (!strcmp (s, format_types[i].name)) - return i; - alen = strlen (format_types[i].name); + return static_cast (i); + + /* Check for leading and trailing underscores. */ + size_t alen = strlen (format_types[i].name); if (slen == alen + 4 && s[0] == '_' && s[1] == '_' && s[slen - 1] == '_' && s[slen - 2] == '_' && !strncmp (s + 2, format_types[i].name, alen)) - return i; + return static_cast(i); + + /* Check for the "_raw" suffix and no leading underscores. */ + if (slen == alen + 4 + && !strncmp (s, format_types[i].name, alen) + && !strcmp (s + alen, "_raw")) + { + *is_raw = true; + return static_cast(i); + } + + /* Check for the "_raw__" suffix and leading underscores. */ + if (slen == alen + 8 && s[0] == '_' && s[1] == '_' + && !strncmp (s + 2, format_types[i].name, alen) + && !strcmp (s + 2 + alen, "_raw__")) + { + *is_raw = true; + return static_cast(i); + } } + return format_type_error; } @@ -1350,7 +1408,8 @@ avoid_dollar_number (const char *format) format++; if (*format == '$') { - warning (OPT_Wformat_, "$ operand number used after format without operand number"); + warning (OPT_Wformat_, + "%<$%>operand number used after format without operand number"); return true; } return false; @@ -1381,7 +1440,8 @@ finish_dollar_format_checking (format_check_results *res, int pointer_gap_ok) found_pointer_gap = true; else warning_at (res->format_string_loc, OPT_Wformat_, - "format argument %d unused before used argument %d in $-style format", + "format argument %d unused before used argument %d " + "in %<$%>-style format", i + 1, dollar_max_arg_used); } } @@ -1525,7 +1585,8 @@ check_format_info (function_format_info *info, tree params, } if (res.number_dollar_extra_args > 0 && res.number_non_literal == 0 && res.number_other == 0) - warning_at (loc, OPT_Wformat_extra_args, "unused arguments in $-style format"); + warning_at (loc, OPT_Wformat_extra_args, + "unused arguments in %<$%>-style format"); if (res.number_empty > 0 && res.number_non_literal == 0 && res.number_other == 0) warning_at (loc, OPT_Wformat_zero_length, "zero-length %s format string", @@ -2789,6 +2850,907 @@ check_argument_type (const format_char_info *fci, return true; } +/* Describes "paired tokens" within the format string that are + expected to be balanced. */ + +struct baltoks_t +{ + baltoks_t (): singlequote (), doublequote () { } + + typedef auto_vec balanced_tokens_t; + /* Vectors of pointers to opening brackets ('['), curly brackets ('{'), + quoting directives (like GCC "%<"), parentheses, and angle brackets + ('<'). Used to detect unbalanced tokens. */ + balanced_tokens_t brackets; + balanced_tokens_t curly; + balanced_tokens_t quotdirs; + balanced_tokens_t parens; + balanced_tokens_t pointy; + /* Pointer to the last opening quote. */ + const char *singlequote; + const char *doublequote; +}; + +/* Describes a keyword, operator, or other name. */ + +struct token_t +{ + const char *name; /* Keyword/operator name. */ + unsigned char len; /* Its length. */ + const char *alt; /* Alternate spelling. */ +}; + +/* Helper for initializing global token_t arrays below. */ +#define NAME(name) { name, sizeof name - 1, NULL } + +/* C/C++ operators that are expected to be quoted within the format + string. */ + +static const token_t c_opers[] = + { + NAME ("!="), NAME ("%="), NAME ("&&"), NAME ("&="), NAME ("*="), + NAME ("++"), NAME ("+="), NAME ("--"), NAME ("-="), NAME ("->"), + NAME ("/="), NAME ("<<"), NAME ("<<="), NAME ("<="), NAME ("=="), + NAME (">="), NAME (">>="), NAME (">>"), NAME ("?:"), NAME ("^="), + NAME ("|="), NAME ("||") + }; + +static const token_t cxx_opers[] = + { + NAME ("->*"), NAME (".*"), NAME ("::"), NAME ("<=>") + }; + +/* Common C/C++ keywords that are expected to be quoted within the format + string. Keywords like auto, inline, or volatile are exccluded because + they are sometimes used in common terms like /auto variables/, /inline + function/, or /volatile access/ where they should not be quoted. */ + +static const token_t c_keywords[] = + { +#undef NAME +#define NAME(name, alt) { name, sizeof name - 1, alt } + + NAME ("alignas", NULL), + NAME ("alignof", NULL), + NAME ("asm", NULL), + NAME ("bool", NULL), + NAME ("char", NULL), + NAME ("const %", NULL), + NAME ("const-qualified", "%-qualified"), + NAME ("float", NULL), + NAME ("ifunc", NULL), + NAME ("int", NULL), + NAME ("long double", NULL), + NAME ("long int", NULL), + NAME ("long long", NULL), + NAME ("malloc", NULL), + NAME ("noclone", NULL), + NAME ("noinline", NULL), + NAME ("nonnull", NULL), + NAME ("noreturn", NULL), + NAME ("nothrow", NULL), + NAME ("offsetof", NULL), + NAME ("readonly", "read-only"), + NAME ("readwrite", "read-write"), + NAME ("restrict %", NULL), + NAME ("restrict-qualified", "%-qualified"), + NAME ("short int", NULL), + NAME ("signed char", NULL), + NAME ("signed int", NULL), + NAME ("signed long", NULL), + NAME ("signed short", NULL), + NAME ("sizeof", NULL), + NAME ("typeof", NULL), + NAME ("unsigned char", NULL), + NAME ("unsigned int", NULL), + NAME ("unsigned long", NULL), + NAME ("unsigned short", NULL), + NAME ("volatile %", NULL), + NAME ("volatile-qualified", "%-qualified"), + NAME ("weakref", NULL), + }; + +static const token_t cxx_keywords[] = + { + /* C++ only keywords and operators. */ + NAME ("catch", NULL), + NAME ("constexpr if", NULL), + NAME ("constexpr", NULL), + NAME ("consteval", NULL), + NAME ("decltype", NULL), + NAME ("nullptr", NULL), + NAME ("operator delete", NULL), + NAME ("operator new", NULL), + NAME ("typeid", NULL), + NAME ("typeinfo", NULL) + }; + +/* Blacklisted words such as misspellings that should be avoided in favor + of the specified alternatives. */ +static const struct +{ + const char *name; /* Bad word. */ + unsigned char len; /* Its length. */ + const char *alt; /* Preferred alternative. */ +} badwords[] = + { + NAME ("arg", "argument"), + NAME ("bitfield", "bit-field"), + NAME ("builtin function", "built-in function"), + NAME ("can not", "cannot"), + NAME ("commandline option", "command-line option"), + NAME ("commandline", "command line"), + NAME ("command line option", "command-line option"), + NAME ("decl", "declaration"), + NAME ("enumeral", "enumerated"), + NAME ("floating point", "floating-point"), + NAME ("non-zero", "nonzero"), + NAME ("reg", "register"), + NAME ("stmt", "statement"), + }; + +/* Common contractions that should be avoided in favor of the specified + alternatives. */ + +static const struct +{ + const char *name; /* Contraction. */ + unsigned char len; /* Its length. */ + const char *alt; /* Preferred alternative. */ +} contrs[] = + { + NAME ("can't", "cannot"), + NAME ("didn't", "did not"), + /* These are commonly abused. Avoid diagnosing them for now. + NAME ("isn't", "is not"), + NAME ("don't", "is not"), + */ + NAME ("mustn't", "must not"), + NAME ("needn't", "need not"), + NAME ("should't", "should not"), + NAME ("that's", "that is"), + NAME ("there's", "there is"), + NAME ("they're", "they are"), + NAME ("what's", "what is"), + NAME ("won't", "will not") + }; + +/* Check for unquoted TOKENS. FORMAT_STRING_LOC is the location of + the format string, FORMAT_STRING_CST the format string itself (as + a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are pointers to + the beginning of the format string and the character currently + being processed, and BALTOKS describes paired "tokens" within + the format string that are expected to be balanced. + Returns a pointer to the last processed character or null when + nothing was done. */ + +static const char* +check_tokens (const token_t *tokens, unsigned ntoks, + location_t format_string_loc, tree format_string_cst, + const char *orig_format_chars, const char *format_chars, + baltoks_t &baltoks) +{ + /* For brevity. */ + const int opt = OPT_Wformat_diag; + /* Zero-based starting position of a problem sequence. */ + int fmtchrpos = format_chars - orig_format_chars; + + /* For identifier-like "words," set to the word length. */ + unsigned wlen = 0; + /* Set for an operator, clear for an identifier/word. */ + bool is_oper = false; + bool underscore = false; + + if (format_chars[0] == '_' || ISALPHA (format_chars[0])) + { + while (format_chars[wlen] == '_' || ISALNUM (format_chars[wlen])) + { + underscore |= format_chars[wlen] == '_'; + ++wlen; + } + } + else + is_oper = true; + + for (unsigned i = 0; i != ntoks; ++i) + { + unsigned toklen = tokens[i].len; + + if (toklen < wlen + || strncmp (format_chars, tokens[i].name, toklen)) + continue; + + if (toklen == 2 + && format_chars - orig_format_chars > 0 + && (TOUPPER (format_chars[-1]) == 'C' + || TOUPPER (format_chars[-1]) == 'G')) + return format_chars + toklen - 1; /* Reference to C++ or G++. */ + + if (ISPUNCT (format_chars[toklen - 1])) + { + if (format_chars[toklen - 1] == format_chars[toklen]) + return NULL; /* Operator followed by another punctuator. */ + } + else if (ISALNUM (format_chars[toklen])) + return NULL; /* Keyword prefix for a longer word. */ + + if (toklen == 2 + && format_chars[0] == '-' + && format_chars[1] == '-' + && ISALNUM (format_chars[2])) + return NULL; /* Probably option like --help. */ + + /* Allow this ugly warning for the time being. */ + if (toklen == 2 + && format_chars - orig_format_chars > 6 + && !strncmp (format_chars - 7, " count >= width of ", 19)) + return format_chars + 10; + + /* The token is a type if it ends in an alphabetic character. */ + bool is_type = (ISALPHA (tokens[i].name[toklen - 1]) + && strchr (tokens[i].name, ' ')); + + /* Backtrack to the last alphabetic character (for tokens whose + names end in '%'). */ + if (!is_oper) + while (!ISALPHA (tokens[i].name[toklen - 1])) + --toklen; + + if (format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + toklen, opt, + (is_type + ? G_("unquoted type name %<%.*s%> in format") + : (is_oper + ? G_("unquoted operator %<%.*s%> in format") + : G_("unquoted keyword %<%.*s%> in format"))), + toklen, format_chars) + && tokens[i].alt) + inform (format_string_loc, "use %qs instead", tokens[i].alt); + + return format_chars + toklen - 1; + } + + /* Diagnose unquoted __attribute__. Consider any parenthesized + argument to the attribute to avoid redundant warnings for + the double parentheses that might follow. */ + if (!strncmp (format_chars, "__attribute", sizeof "__attribute" - 1)) + { + unsigned nchars = sizeof "__attribute" - 1; + while ('_' == format_chars[nchars]) + ++nchars; + + for (int i = nchars; format_chars[i]; ++i) + if (' ' != format_chars[i]) + { + nchars = i; + break; + } + + if (format_chars[nchars] == '(') + { + baltoks.parens.safe_push (format_chars + nchars); + + ++nchars; + bool close = false; + if (format_chars[nchars] == '(') + { + baltoks.parens.safe_push (format_chars + nchars); + close = true; + ++nchars; + } + for (int i = nchars; format_chars[i]; ++i) + if (')' == format_chars[i]) + { + if (baltoks.parens.length () > 0) + baltoks.parens.pop (); + nchars = i + 1; + break; + } + + if (close && format_chars[nchars] == ')') + { + if (baltoks.parens.length () > 0) + baltoks.parens.pop (); + ++nchars; + } + } + + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted attribute in format"); + return format_chars + nchars - 1; + } + + /* Diagnose unquoted built-ins. */ + if (format_chars[0] == '_' + && format_chars[1] == '_' + && (!strncmp (format_chars + 2, "atomic", sizeof "atomic" - 1) + || !strncmp (format_chars + 2, "builtin", sizeof "builtin" - 1) + || !strncmp (format_chars + 2, "sync", sizeof "sync" - 1))) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + wlen, opt, + "unquoted name of built-in function %<%.*s%> " + "in format", + wlen, format_chars); + return format_chars + wlen - 1; + } + + /* Diagnose unquoted substrings of alphanumeric characters containing + underscores. They most likely refer to identifiers and should be + quoted. */ + if (underscore) + format_warning_substr (format_string_loc, format_string_cst, + format_chars - orig_format_chars, + format_chars + wlen - orig_format_chars, + opt, + "unquoted identifier or keyword %<%.*s%> in format", + wlen, format_chars); + else + { + /* Diagnose some common missspellings. */ + for (unsigned i = 0; i != sizeof badwords / sizeof *badwords; ++i) + { + unsigned badwlen = strspn (badwords[i].name, " -"); + if (wlen >= badwlen + && (wlen <= badwords[i].len + || (wlen == badwords[i].len + 1U + && TOUPPER (format_chars[wlen - 1]) == 'S')) + && !strncasecmp (format_chars, badwords[i].name, badwords[i].len)) + { + /* Handle singular as well as plural forms of all bad words + even though the latter don't necessarily make sense for + all of the former (like "can nots"). */ + badwlen = badwords[i].len; + const char *plural = ""; + if (TOUPPER (format_chars[badwlen]) == 'S') + { + ++badwlen; + plural = "s"; + } + + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + badwords[i].len, + opt, + "misspelled term %<%.*s%> in format; " + "use %<%s%s%> instead", + badwlen, format_chars, + badwords[i].alt, plural); + + return format_chars + badwords[i].len - 1; + } + } + + /* Skip C++/G++. */ + if (!strncasecmp (format_chars, "c++", 3) + || !strncasecmp (format_chars, "g++", 3)) + return format_chars + 2; + } + + return wlen ? format_chars + wlen - 1 : NULL; +} + +/* Check plain text in a format string of a GCC diagnostic function + for common quoting, punctuation, and spelling mistakes, and issue + -Wformat-diag warnings if they are found. FORMAT_STRING_LOC is + the location of the format string, FORMAT_STRING_CST the format + string itself (as a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are + pointers to the beginning of the format string and the character + currently being processed, and BALTOKS describes paired "tokens" + within the format string that are expected to be balanced. + Returns a pointer to the last processed character. */ + +static const char* +check_plain (location_t format_string_loc, tree format_string_cst, + const char *orig_format_chars, const char *format_chars, + baltoks_t &baltoks) +{ + /* For brevity. */ + const int opt = OPT_Wformat_diag; + /* Zero-based starting position of a problem sequence. */ + int fmtchrpos = format_chars - orig_format_chars; + + if (*format_chars == '%') + { + /* Diagnose %<%s%> and suggest using %qs instead. */ + if (!strncmp (format_chars, "%<%s%>", 6)) + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + 6, opt, + "quoted %qs directive in format; " + "use %qs instead", "%s", "%qs"); + else if (format_chars - orig_format_chars > 2 + && !strncasecmp (format_chars - 3, "can%'t", 5) + && !ISALPHA (format_chars[1])) + format_warning_substr (format_string_loc, + format_string_cst, + fmtchrpos - 3, fmtchrpos + 3, opt, + "contraction %<%.*s%> in format; " + "use %qs instead", + 6, format_chars - 3, "cannot"); + + return format_chars; + } + + if (baltoks.quotdirs.length ()) + { + /* Skip over all plain text within a quoting directive until + the next directive. */ + while (*format_chars && '%' != *format_chars) + ++format_chars; + + return format_chars; + } + + /* The length of the problem sequence. */ + int nchars = 0; + + /* Diagnose any whitespace characters other than but only + leading, trailing, and two or more consecutive s. Do + this before diagnosing control characters because whitespace + is a subset of controls. */ + const char *other_than_space = NULL; + while (ISSPACE (format_chars[nchars])) + { + if (format_chars[nchars] != ' ' && !other_than_space) + other_than_space = format_chars + nchars; + ++nchars; + } + + if (nchars) + { + /* This is the most common problem: go the extra mile to describe + the problem in as much helpful detail as possible. */ + if (other_than_space) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted whitespace character %qc in format", + *other_than_space); + return format_chars + nchars - 1; + } + + if (fmtchrpos == 0) + /* Accept strings of leading spaces with no warning. */ + return format_chars + nchars - 1; + + if (!format_chars[nchars]) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "spurious trailing space in format"); + return format_chars + nchars - 1; + } + + if (nchars > 1) + { + if (nchars == 2 + && orig_format_chars < format_chars + && format_chars[-1] == '.' + && format_chars[0] == ' ' + && format_chars[1] == ' ') + { + /* A period followed by two spaces. */ + if (ISUPPER (*orig_format_chars)) + { + /* If the part before the period is a capitalized + sentence check to make sure that what follows + is also capitalized. */ + if (ISLOWER (format_chars[2])) + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "inconsistent capitalization in " + "format"); + } + } + else + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted sequence of %i consecutive " + "space characters in format", nchars); + return format_chars + nchars - 1; + } + + format_chars += nchars; + nchars = 0; + } + + fmtchrpos = format_chars - orig_format_chars; + + /* Diagnose any unquoted control characters other than the terminating + NUL. */ + while (format_chars[nchars] && ISCNTRL (format_chars[nchars])) + ++nchars; + + if (nchars > 1) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted control characters in format"); + return format_chars + nchars - 1; + } + if (nchars) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted control character %qc in format", + *format_chars); + return format_chars + nchars - 1; + } + + if (ISPUNCT (format_chars[0])) + { + size_t nelts = sizeof c_opers / sizeof *c_opers; + if (const char *ret = check_tokens (c_opers, nelts, + format_string_loc, format_string_cst, + orig_format_chars, format_chars, + baltoks)) + return ret; + + nelts = c_dialect_cxx () ? sizeof cxx_opers / sizeof *cxx_opers : 0; + if (const char *ret = check_tokens (cxx_opers, nelts, + format_string_loc, format_string_cst, + orig_format_chars, format_chars, + baltoks)) + return ret; + } + + if (ISALPHA (format_chars[0])) + { + size_t nelts = sizeof c_keywords / sizeof *c_keywords; + if (const char *ret = check_tokens (c_keywords, nelts, + format_string_loc, format_string_cst, + orig_format_chars, format_chars, + baltoks)) + return ret; + + nelts = c_dialect_cxx () ? sizeof cxx_keywords / sizeof *cxx_keywords : 0; + if (const char *ret = check_tokens (cxx_keywords, nelts, + format_string_loc, format_string_cst, + orig_format_chars, format_chars, + baltoks)) + return ret; + } + + nchars = 0; + + /* Diagnose unquoted options. */ + if ((format_chars == orig_format_chars + || format_chars[-1] == ' ') + && format_chars[0] == '-' + && ((format_chars[1] == '-' + && ISALPHA (format_chars[2])) + || ISALPHA (format_chars[1]))) + { + nchars = 1; + while (ISALNUM (format_chars[nchars]) + || '_' == format_chars[nchars] + || '-' == format_chars[nchars] + || '+' == format_chars[nchars]) + ++nchars; + + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted option name %<%.*s%> in format", + nchars, format_chars); + return format_chars + nchars - 1; + } + + /* Diagnose leading, trailing, and two or more consecutive punctuation + characters. */ + const char *unbalanced = NULL; + while ('%' != format_chars[nchars] + && ISPUNCT (format_chars[nchars]) + && !unbalanced) + { + switch (format_chars[nchars]) + { + case '[': + baltoks.brackets.safe_push (format_chars + nchars); + break; + case '{': + baltoks.curly.safe_push (format_chars + nchars); + break; + case '(': + baltoks.parens.safe_push (format_chars + nchars); + break; + case '<': + baltoks.pointy.safe_push (format_chars + nchars); + break; + + case ']': + if (baltoks.brackets.length () > 0) + baltoks.brackets.pop (); + else + unbalanced = format_chars + nchars; + break; + case '}': + if (baltoks.curly.length () > 0) + baltoks.curly.pop (); + else + unbalanced = format_chars + nchars; + break; + case ')': + if (baltoks.parens.length () > 0) + baltoks.parens.pop (); + else + unbalanced = format_chars + nchars; + break; + case '>': + if (baltoks.pointy.length () > 0) + baltoks.pointy.pop (); + else + unbalanced = format_chars + nchars; + break; + } + + ++nchars; + } + + if (unbalanced) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unbalanced punctuation character %qc in format", + *unbalanced); + return format_chars + nchars - 1; + } + + if (nchars) + { + /* Consider any identifier that follows the pound ('#') sign + a preprocessing directive. */ + if (nchars == 1 + && format_chars[0] == '#' + && ISALPHA (format_chars[1])) + { + while (ISALNUM (format_chars[nchars]) + || format_chars[nchars] == '_') + ++nchars; + + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted preprocessing directive %<%.*s%> " + "in format", nchars, format_chars); + return format_chars + nchars - 1; + } + + /* Diagnose a bare single quote. */ + if (nchars == 1 + && format_chars[0] == '\'' + && format_chars - orig_format_chars + && ISALPHA (format_chars[-1]) + && ISALPHA (format_chars[1])) + { + /* Diagnose a subset of contractions that are best avoided. */ + for (unsigned i = 0; i != sizeof contrs / sizeof *contrs; ++i) + { + const char *apos = strchr (contrs[i].name, '\''); + gcc_assert (apos != NULL); + int off = apos - contrs[i].name; + + if (format_chars - orig_format_chars >= off + && !strncmp (format_chars - off, + contrs[i].name, contrs[i].len)) + { + format_warning_substr (format_string_loc, + format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "contraction %<%.*s%> in format; " + "use %qs instead", + contrs[i].len, contrs[i].name, + contrs[i].alt); + return format_chars + nchars - 1; + } + } + + if (format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "bare apostrophe %<'%> in format")) + inform (format_string_loc, + "if avoiding the apostrophe is not feasible, enclose " + "it in a pair of %qs and %qs directives instead", + "%<", "%>"); + return format_chars + nchars - 1; + } + + /* Diagnose a backtick (grave accent). */ + if (nchars == 1 + && format_chars[0] == '`') + { + if (format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "grave accent %<`%> in format")) + inform (format_string_loc, + "use the apostrophe directive %qs instead", "%'"); + return format_chars + nchars - 1; + } + + /* Diagnose a punctuation character after a space. */ + if (nchars == 1 + && format_chars - orig_format_chars + && format_chars[-1] == ' ' + && strspn (format_chars, "!?:;.,") == 1) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos - 1, fmtchrpos, opt, + "space followed by punctuation character " + "%<%c%>", format_chars[0]); + return format_chars; + } + + if (nchars == 1) + { + if (!strncmp (format_chars, "\"%s\"", 4)) + { + if (format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + 4, opt, + "quoted %qs directive in format", + "%s")) + inform (format_string_loc, "if using %qs is not feasible, " + "use %qs instead", "%qs", "\"%-s\""); + } + + if (format_chars[0] == '"') + { + baltoks.doublequote = baltoks.doublequote ? NULL : format_chars; + return format_chars + nchars - 1; + } + if (format_chars[0] == '\'') + { + baltoks.singlequote = baltoks.singlequote ? NULL : format_chars; + return format_chars + nchars - 1; + } + } + + if (fmtchrpos == 0) + { + if (nchars == 1 + && format_chars[0] == '(') + ; /* Text beginning in an open parenthesis. */ + else if (nchars == 3 + && !strncmp (format_chars, "...", 3) + && format_chars[3]) + ; /* Text beginning in an ellipsis. */ + else + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "spurious leading punctuation sequence " + "%<%.*s%> in format", + nchars, format_chars); + return format_chars + nchars - 1; + } + } + else if (!format_chars[nchars]) + { + if (nchars == 1 + && (format_chars[nchars - 1] == ':' + || format_chars[nchars - 1] == ')')) + ; /* Text ending in a colon or a closing parenthesis. */ + else if (nchars == 1 + && ((ISUPPER (*orig_format_chars) + && format_chars[nchars - 1] == '.') + || strspn (format_chars + nchars - 1, "?])") == 1)) + ; /* Capitalized sentence terminated by a single period, + or text ending in a question mark, closing bracket, + or parenthesis. */ + else if (nchars == 2 + && format_chars[0] == '?' + && format_chars[1] == ')') + ; /* A question mark after a closing parenthetical note. */ + else if (nchars == 2 + && format_chars[0] == ')' + && (format_chars[1] == '?' + || format_chars[1] == ';' + || format_chars[1] == ':' + || (ISUPPER (*orig_format_chars) + && format_chars[1] == '.'))) + ; /* Closing parenthetical note followed by a question mark, + semicolon, or colon at the end of the string, or by + a period at the end of a capitalized sentence. */ + else if (nchars == 3 + && format_chars - orig_format_chars > 0 + && !strncmp (format_chars, "...", 3)) + ; /* Text ending in the ellipsis. */ + else + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "spurious trailing punctuation sequence " + "%<%.*s%> in format", + nchars, format_chars); + + return format_chars + nchars - 1; + } + else if (nchars == 2 + && format_chars[0] == ')' + && (format_chars[1] == ':' + || format_chars[1] == ';' + || format_chars[1] == ',') + && format_chars[2] == ' ') + ; /* Closing parenthetical note followed by a colon, semicolon + or a comma followed by a space in the middle of the string. */ + else if (nchars > 1) + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted sequence of %i consecutive " + "punctuation characters %q.*s in format", + nchars, nchars, format_chars); + return format_chars + nchars - 1; + } + + nchars = 0; + + /* Finally, diagnose any unquoted non-graph, non-punctuation characters + other than the terminating NUL. */ + while (format_chars[nchars] + && '%' != format_chars[nchars] + && !ISPUNCT (format_chars[nchars]) + && !ISGRAPH (format_chars[nchars])) + ++nchars; + + if (nchars > 1) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted non-graph characters in format"); + return format_chars + nchars - 1; + } + if (nchars) + { + format_warning_substr (format_string_loc, format_string_cst, + fmtchrpos, fmtchrpos + nchars, opt, + "unquoted non-graph character %qc in format", + *format_chars); + return format_chars + nchars - 1; + } + + return format_chars; +} + +/* Diagnose unbalanced tokens described by BALTOKS in format string + ORIG_FORMAT_CHARS and the corresponding FORMAT_STRING_CST. */ + +static void +maybe_diag_unbalanced_tokens (location_t format_string_loc, + const char *orig_format_chars, + tree format_string_cst, + baltoks_t &baltoks) +{ + const char *unbalanced = NULL; + + if (baltoks.brackets.length ()) + unbalanced = baltoks.brackets.pop (); + else if (baltoks.curly.length ()) + unbalanced = baltoks.curly.pop (); + else if (baltoks.parens.length ()) + unbalanced = baltoks.parens.pop (); + else if (baltoks.pointy.length ()) + unbalanced = baltoks.pointy.pop (); + + if (unbalanced) + format_warning_at_char (format_string_loc, format_string_cst, + unbalanced - orig_format_chars + 1, + OPT_Wformat_diag, + "unbalanced punctuation character %<%c%> in format", + *unbalanced); + + if (baltoks.quotdirs.length ()) + format_warning_at_char (format_string_loc, format_string_cst, + baltoks.quotdirs.pop () - orig_format_chars, + OPT_Wformat_, + "unterminated quoting directive"); + + const char *quote + = baltoks.singlequote ? baltoks.singlequote : baltoks.doublequote; + + if (quote) + format_warning_at_char (format_string_loc, format_string_cst, + quote - orig_format_chars + 1, + OPT_Wformat_diag, + "unterminated quote character %<%c%> in format", + *quote); +} + /* Do the main part of checking a call to a format function. FORMAT_CHARS is the NUL-terminated format string (which at this point may contain internal NUL characters); FORMAT_LENGTH is its length (excluding the @@ -2816,8 +3778,10 @@ check_format_info_main (format_check_results *res, and it didn't use $; 1 if $ formats are in use. */ int has_operand_number = -1; - /* Vector of pointers to opening quoting directives (like GCC "%<"). */ - auto_vec quotdirs; + /* Vectors of pointers to opening quoting directives (like GCC "%<"), + opening braces, brackets, and parentheses. Used to detect unbalanced + tokens. */ + baltoks_t baltoks; /* Pointers to the most recent color directives (like GCC's "%r or %R"). A starting color directive much be terminated before the end of @@ -2828,10 +3792,26 @@ check_format_info_main (format_check_results *res, init_dollar_format_checking (info->first_arg_num, first_fillin_param); + /* In GCC diagnostic functions check plain directives (substrings within + the format string that don't start with %) for quoting and punctuations + problems. */ + bool ck_plain = (!info->is_raw + && (info->format_type == gcc_diag_format_type + || info->format_type == gcc_tdiag_format_type + || info->format_type == gcc_cdiag_format_type + || info->format_type == gcc_cxxdiag_format_type)); + while (*format_chars != 0) { - if (*format_chars++ != '%') + if (ck_plain) + format_chars = check_plain (format_string_loc, + format_string_cst, + orig_format_chars, format_chars, + baltoks); + + if (*format_chars == 0 || *format_chars++ != '%') continue; + if (*format_chars == 0) { format_warning_at_char (format_string_loc, format_string_cst, @@ -2846,6 +3826,8 @@ check_format_info_main (format_check_results *res, continue; } + /* ARGUMENT_PARSER ctor takes FORMAT_CHARS by reference and calls + to ARG_PARSER members may modify the variable. */ flag_chars_t flag_chars; argument_parser arg_parser (info, format_chars, format_string_cst, orig_format_chars, format_string_loc, @@ -2908,7 +3890,7 @@ check_format_info_main (format_check_results *res, flag_chars.validate (fki, fci, flag_specs, format_chars, format_string_cst, format_string_loc, orig_format_chars, format_char, - quotdirs.length () > 0); + baltoks.quotdirs.length () > 0); const int alloc_flag = flag_chars.get_alloc_flag (fki); const bool suppressed = flag_chars.assignment_suppression_p (fki); @@ -2920,17 +3902,17 @@ check_format_info_main (format_check_results *res, if (quot_begin_p && !quot_end_p) { - if (quotdirs.length ()) + if (baltoks.quotdirs.length ()) format_warning_at_char (format_string_loc, format_string_cst, format_chars - orig_format_chars, OPT_Wformat_, "nested quoting directive"); - quotdirs.safe_push (format_chars); + baltoks.quotdirs.safe_push (format_chars); } else if (!quot_begin_p && quot_end_p) { - if (quotdirs.length ()) - quotdirs.pop (); + if (baltoks.quotdirs.length ()) + baltoks.quotdirs.pop (); else format_warning_at_char (format_string_loc, format_string_cst, format_chars - orig_format_chars, @@ -2962,7 +3944,7 @@ check_format_info_main (format_check_results *res, /* Diagnose directives that shouldn't appear in a quoted sequence. (They are denoted by a double quote in FLAGS2.) */ - if (quotdirs.length ()) + if (baltoks.quotdirs.length ()) { if (strchr (fci->flags2, '"')) format_warning_at_char (format_string_loc, format_string_cst, @@ -3018,10 +4000,9 @@ check_format_info_main (format_check_results *res, if (has_operand_number > 0) finish_dollar_format_checking (res, fki->flags & (int) FMT_FLAG_DOLLAR_GAP_POINTER_OK); - if (quotdirs.length ()) - format_warning_at_char (format_string_loc, format_string_cst, - quotdirs.pop () - orig_format_chars, - OPT_Wformat_, "unterminated quoting directive"); + maybe_diag_unbalanced_tokens (format_string_loc, orig_format_chars, + format_string_cst, baltoks); + if (color_begin && !color_end) format_warning_at_char (format_string_loc, format_string_cst, color_begin - orig_format_chars, @@ -4199,7 +5180,7 @@ handle_format_attribute (tree *node, tree atname, tree args, if (arg_num != info.first_arg_num) { if (!(flags & (int) ATTR_FLAG_BUILT_IN)) - error ("args to be formatted is not %<...%>"); + error ("argument to be formatted is not %<...%>"); *no_add_attrs = true; return NULL_TREE; } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bb2e848a743..678de635777 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-06-19 Martin Sebor + + PR translation/90156 + * gcc.dg/format/gcc_diag-11.c: Enable. + 2019-06-19 Steven G. Kargl PR fortran/69499 diff --git a/gcc/testsuite/gcc.dg/format/gcc_diag-11.c b/gcc/testsuite/gcc.dg/format/gcc_diag-11.c index cc5cfd368cf..a976c7aa519 100644 --- a/gcc/testsuite/gcc.dg/format/gcc_diag-11.c +++ b/gcc/testsuite/gcc.dg/format/gcc_diag-11.c @@ -1,8 +1,7 @@ /* Test warnings for common punctuation, quoting, and spelling issues in GCC diagnostics. { dg-do compile } - { dg-options "-Wformat -Wformat-diag" } - { dg-skip-if "-Wformat-diag not available yet" { *-*-* } } + { dg-options "-Wformat -Wformat-diag" } */ /* Magic identifiers must be set before the attribute is used. */