diagnostics: Make line-ending logic consistent with libcpp [PR91733]

libcpp recognizes a lone \r as a valid line ending, so the infrastructure
for retrieving source lines to be output in diagnostics needs to do the
same. This patch fixes file_cache_slot::get_next_line() accordingly so that
diagnostics display the correct part of the source when \r line endings are in
use.

gcc/ChangeLog:

	PR preprocessor/91733
	* input.cc (find_end_of_line): New helper function.
	(file_cache_slot::get_next_line): Recognize \r as a line ending.
	* diagnostic-show-locus.cc (test_escaping_bytes_1): Adapt selftest
	since \r will now be interpreted as a line-ending.

gcc/testsuite/ChangeLog:

	PR preprocessor/91733
	* c-c++-common/pr91733.c: New test.
This commit is contained in:
Lewis Hyatt 2022-07-07 13:59:27 -04:00
parent 6da7f7c5ac
commit 2bd15617e7
3 changed files with 76 additions and 22 deletions

View File

@ -5533,7 +5533,7 @@ test_tab_expansion (const line_table_case &case_)
static void
test_escaping_bytes_1 (const line_table_case &case_)
{
const char content[] = "before\0\1\2\3\r\x80\xff""after\n";
const char content[] = "before\0\1\2\3\v\x80\xff""after\n";
const size_t sz = sizeof (content);
temp_source_file tmp (SELFTEST_LOCATION, ".c", content, sz);
line_table_test ltt (case_);
@ -5548,18 +5548,18 @@ test_escaping_bytes_1 (const line_table_case &case_)
if (finish > LINE_MAP_MAX_LOCATION_WITH_COLS)
return;
/* Locations of the NUL and \r bytes. */
/* Locations of the NUL and \v bytes. */
location_t nul_loc
= linemap_position_for_line_and_column (line_table, ord_map, 1, 7);
location_t r_loc
location_t v_loc
= linemap_position_for_line_and_column (line_table, ord_map, 1, 11);
gcc_rich_location richloc (nul_loc);
richloc.add_range (r_loc);
richloc.add_range (v_loc);
{
test_diagnostic_context dc;
diagnostic_show_locus (&dc, &richloc, DK_ERROR);
ASSERT_STREQ (" before \1\2\3 \x80\xff""after\n"
ASSERT_STREQ (" before \1\2\3\v\x80\xff""after\n"
" ^ ~\n",
pp_formatted_text (dc.printer));
}
@ -5569,7 +5569,7 @@ test_escaping_bytes_1 (const line_table_case &case_)
dc.escape_format = DIAGNOSTICS_ESCAPE_FORMAT_UNICODE;
diagnostic_show_locus (&dc, &richloc, DK_ERROR);
ASSERT_STREQ
(" before<U+0000><U+0001><U+0002><U+0003><U+000D><80><ff>after\n"
(" before<U+0000><U+0001><U+0002><U+0003><U+000B><80><ff>after\n"
" ^~~~~~~~ ~~~~~~~~\n",
pp_formatted_text (dc.printer));
}
@ -5577,7 +5577,7 @@ test_escaping_bytes_1 (const line_table_case &case_)
test_diagnostic_context dc;
dc.escape_format = DIAGNOSTICS_ESCAPE_FORMAT_BYTES;
diagnostic_show_locus (&dc, &richloc, DK_ERROR);
ASSERT_STREQ (" before<00><01><02><03><0d><80><ff>after\n"
ASSERT_STREQ (" before<00><01><02><03><0b><80><ff>after\n"
" ^~~~ ~~~~\n",
pp_formatted_text (dc.printer));
}

View File

@ -646,6 +646,37 @@ file_cache_slot::maybe_read_data ()
return read_data ();
}
/* Helper function for file_cache_slot::get_next_line (), to find the end of
the next line. Returns with the memchr convention, i.e. nullptr if a line
terminator was not found. We need to determine line endings in the same
manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
static char *
find_end_of_line (char *s, size_t len)
{
for (const auto end = s + len; s != end; ++s)
{
if (*s == '\n')
return s;
if (*s == '\r')
{
const auto next = s + 1;
if (next == end)
{
/* Don't find the line ending if \r is the very last character
in the buffer; we do not know if it's the end of the file or
just the end of what has been read so far, and we wouldn't
want to break in the middle of what's actually a \r\n
sequence. Instead, we will handle the case of a file ending
in a \r later. */
break;
}
return (*next == '\n' ? next : s);
}
}
return nullptr;
}
/* Read a new line from file FP, using C as a cache for the data
coming from the file. Upon successful completion, *LINE is set to
the beginning of the line found. *LINE points directly in the
@ -671,17 +702,16 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len)
char *next_line_start = NULL;
size_t len = 0;
char *line_end = (char *) memchr (line_start, '\n', remaining_size);
char *line_end = find_end_of_line (line_start, remaining_size);
if (line_end == NULL)
{
/* We haven't found the end-of-line delimiter in the cache.
Fill the cache with more data from the file and look for the
'\n'. */
/* We haven't found an end-of-line delimiter in the cache.
Fill the cache with more data from the file and look again. */
while (maybe_read_data ())
{
line_start = m_data + m_line_start_idx;
remaining_size = m_nb_read - m_line_start_idx;
line_end = (char *) memchr (line_start, '\n', remaining_size);
line_end = find_end_of_line (line_start, remaining_size);
if (line_end != NULL)
{
next_line_start = line_end + 1;
@ -690,14 +720,22 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len)
}
if (line_end == NULL)
{
/* We've loadded all the file into the cache and still no
'\n'. Let's say the line ends up at one byte passed the
/* We've loaded all the file into the cache and still no
terminator. Let's say the line ends up at one byte past the
end of the file. This is to stay consistent with the case
of when the line ends up with a '\n' and line_end points to
that terminal '\n'. That consistency is useful below in
the len calculation. */
line_end = m_data + m_nb_read ;
m_missing_trailing_newline = true;
of when the line ends up with a terminator and line_end points to
that. That consistency is useful below in the len calculation.
If the file ends in a \r, we didn't identify it as a line
terminator above, so do that now instead. */
line_end = m_data + m_nb_read;
if (m_nb_read && line_end[-1] == '\r')
{
--line_end;
m_missing_trailing_newline = false;
}
else
m_missing_trailing_newline = true;
}
else
m_missing_trailing_newline = false;
@ -711,9 +749,8 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len)
if (m_fp && ferror (m_fp))
return false;
/* At this point, we've found the end of the of line. It either
points to the '\n' or to one byte after the last byte of the
file. */
/* At this point, we've found the end of the of line. It either points to
the line terminator or to one byte after the last byte of the file. */
gcc_assert (line_end != NULL);
len = line_end - line_start;

View File

@ -0,0 +1,17 @@
/* { dg-do preprocess } */
/* { dg-additional-options "-fdiagnostics-show-caret" } */
const char *s = " ";
/* { dg-warning "missing terminating \"" "test1" { target *-*-* } 4 } */
/* { dg-warning "missing terminating \"" "test2" { target *-*-* } 5 } */
/* { dg-begin-multiline-output "test3" }
const char *s = "
^
{ dg-end-multiline-output "test3" } */
/* { dg-begin-multiline-output "test4" }
";
^
{ dg-end-multiline-output "test4" } */