libcpp: escape non-ASCII source bytes in -Wbidi-chars= [PR103026]

This flags rich_locations associated with -Wbidi-chars= so that
non-ASCII bytes will be escaped when printing the source lines
(using the diagnostics support I added in
r12-4825-gbd5e882cf6e0def3dd1bc106075d59a303fe0d1e).

In particular, this ensures that the printed source lines will
be pure ASCII, and thus the visual ordering of the characters
will be the same as the logical ordering.

Before:

  Wbidi-chars-1.c: In function ‘main’:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
      6 |     /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
        |                                           ^
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
      9 |     /* end admins only ‮ { ⁦*/
        |                            ^

  Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
      6 | int LRE_‪_PDF_\u202c;
        |               ^
  Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
      8 | int LRE_\u202a_PDF_‬_;
        |                   ^
  Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
     10 | const char *s1 = "LRE_‪_PDF_\u202c";
        |                            ^
  Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
     12 | const char *s2 = "LRE_\u202a_PDF_‬";
        |                                 ^

After:

  Wbidi-chars-1.c: In function ‘main’:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
      6 |     /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
        |                                                                           ^
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
      9 |     /* end admins only <U+202E> { <U+2066>*/
        |                                            ^

  Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
      6 | int LRE_<U+202A>_PDF_\u202c;
        |                       ^
  Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
      8 | int LRE_\u202a_PDF_<U+202C>_;
        |                   ^
  Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
     10 | const char *s1 = "LRE_<U+202A>_PDF_\u202c";
        |                                    ^
  Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
     12 | const char *s2 = "LRE_\u202a_PDF_<U+202C>";
        |                                 ^

libcpp/ChangeLog:
	PR preprocessor/103026
	* lex.c (maybe_warn_bidi_on_close): Use a rich_location
	and call set_escape_on_output (true) on it.
	(maybe_warn_bidi_on_char): Likewise.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
This commit is contained in:
David Malcolm 2021-11-02 09:54:32 -04:00
parent ea9e0d6c27
commit 1a7f2c0774
1 changed files with 17 additions and 12 deletions

View File

@ -1427,9 +1427,11 @@ maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
const location_t loc
= linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (pfile->buffer, p));
cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
"unpaired UTF-8 bidirectional control character "
"detected");
rich_location rich_loc (pfile->line_table, loc);
rich_loc.set_escape_on_output (true);
cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
"unpaired UTF-8 bidirectional control character "
"detected");
}
/* We're done with this context. */
bidi::on_close ();
@ -1454,6 +1456,9 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
const location_t loc
= linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (pfile->buffer, p));
rich_location rich_loc (pfile->line_table, loc);
rich_loc.set_escape_on_output (true);
/* It seems excessive to warn about a PDI/PDF that is closing
an opened context because we've already warned about the
opening character. Except warn when we have a UCN x UTF-8
@ -1462,20 +1467,20 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
{
if (warn_bidi == bidirectional_unpaired
&& bidi::current_ctx_ucn_p () != ucn_p)
cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
"UTF-8 vs UCN mismatch when closing "
"a context by \"%s\"", bidi::to_str (kind));
cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
"UTF-8 vs UCN mismatch when closing "
"a context by \"%s\"", bidi::to_str (kind));
}
else if (warn_bidi == bidirectional_any)
{
if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
"\"%s\" is closing an unopened context",
bidi::to_str (kind));
cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
"\"%s\" is closing an unopened context",
bidi::to_str (kind));
else
cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
"found problematic Unicode character \"%s\"",
bidi::to_str (kind));
cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
"found problematic Unicode character \"%s\"",
bidi::to_str (kind));
}
}
/* We're done with this context. */