Fix locations within raw strings

Whilst investigating PR preprocessor/78324 I noticed that the
substring location code currently doesn't handle raw strings
correctly, by not skipping the 'R', opening quote, delimiter
and opening parenthesis.

For example, an attempt to underline chars 4-7 with caret at 6 of
this raw string yields this erroneous output:
   __emit_string_literal_range (R"foo(0123456789)foo",
                                    ~~^~

With the patch, the correct range/caret is printed:

   __emit_string_literal_range (R"foo(0123456789)foo",
                                          ~~^~

gcc/ChangeLog:
	* input.c (selftest::test_lexer_string_locations_long_line): New
	function.
	(selftest::test_lexer_string_locations_raw_string_multiline): New
	function.
	(selftest::input_c_tests): Call the new functions, via
	for_each_line_table_case.

gcc/testsuite/ChangeLog:
	* gcc.dg/plugin/diagnostic-test-string-literals-1.c
	(test_raw_string_one_liner): New function.
	(test_raw_string_multiline): New function.

libcpp/ChangeLog:
	* charset.c (cpp_interpret_string_1): Skip locations from
	loc_reader when advancing 'p' when handling raw strings.

From-SVN: r242552
This commit is contained in:
David Malcolm 2016-11-17 15:55:26 +00:00 committed by David Malcolm
parent 141a3ccff1
commit b8f564124e
6 changed files with 139 additions and 1 deletions

View File

@ -1,3 +1,12 @@
2016-11-17 David Malcolm <dmalcolm@redhat.com>
* input.c (selftest::test_lexer_string_locations_long_line): New
function.
(selftest::test_lexer_string_locations_raw_string_multiline): New
function.
(selftest::input_c_tests): Call the new functions, via
for_each_line_table_case.
2016-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.md (mov<mode>): Call

View File

@ -3156,6 +3156,78 @@ test_lexer_string_locations_long_line (const line_table_case &case_)
i, 2, 7 + i, 7 + i);
}
/* Test of locations within a raw string that doesn't contain a newline. */
static void
test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
{
/* .....................00.0000000111111111122.
.....................12.3456789012345678901. */
const char *content = ("R\"foo(0123456789)foo\"\n");
lexer_test test (case_, content, NULL);
/* Verify that we get the expected token back. */
const cpp_token *tok = test.get_token ();
ASSERT_EQ (tok->type, CPP_STRING);
/* Verify that cpp_interpret_string works. */
cpp_string dst_string;
const enum cpp_ttype type = CPP_STRING;
bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
&dst_string, type);
ASSERT_TRUE (result);
ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
free (const_cast <unsigned char *> (dst_string.text));
if (!should_have_column_data_p (line_table->highest_location))
return;
/* 0-9, plus the nil terminator. */
ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
for (int i = 0; i < 11; i++)
ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
i, 1, 7 + i, 7 + i);
}
/* Test of locations within a raw string that contains a newline. */
static void
test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
{
/* .....................00.0000.
.....................12.3456. */
const char *content = ("R\"foo(\n"
/* .....................00000.
.....................12345. */
"hello\n"
"world\n"
/* .....................00000.
.....................12345. */
")foo\"\n");
lexer_test test (case_, content, NULL);
/* Verify that we get the expected token back. */
const cpp_token *tok = test.get_token ();
ASSERT_EQ (tok->type, CPP_STRING);
/* Verify that cpp_interpret_string works. */
cpp_string dst_string;
const enum cpp_ttype type = CPP_STRING;
bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
&dst_string, type);
ASSERT_TRUE (result);
ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
free (const_cast <unsigned char *> (dst_string.text));
if (!should_have_column_data_p (line_table->highest_location))
return;
/* Currently we don't support locations within raw strings that
contain newlines. */
ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
"range endpoints are on different lines");
}
/* Test of lexing char constants. */
static void
@ -3297,6 +3369,8 @@ input_c_tests ()
for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
for_each_line_table_case (test_lexer_string_locations_non_string);
for_each_line_table_case (test_lexer_string_locations_long_line);
for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
for_each_line_table_case (test_lexer_char_constants);
test_reading_source_line ();

View File

@ -1,3 +1,9 @@
2016-11-17 David Malcolm <dmalcolm@redhat.com>
* gcc.dg/plugin/diagnostic-test-string-literals-1.c
(test_raw_string_one_liner): New function.
(test_raw_string_multiline): New function.
2016-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/store_repeating_constant_1.c: New test.

View File

@ -193,6 +193,39 @@ test_L (void)
{ dg-end-multiline-output "" } */
}
void
test_raw_string_one_liner (void)
{
/* Digits 0-9. */
__emit_string_literal_range (R"foo(0123456789)foo", /* { dg-warning "range" } */
6, 4, 7);
/* { dg-begin-multiline-output "" }
__emit_string_literal_range (R"foo(0123456789)foo",
~~^~
{ dg-end-multiline-output "" } */
}
void
test_raw_string_multiline (void)
{
__emit_string_literal_range (R"foo(
hello
world
)foo",
6, 4, 7);
/* { dg-error "unable to read substring location: range endpoints are on different lines" "" { target *-*-* } .-5 } */
/* { dg-begin-multiline-output "" }
__emit_string_literal_range (R"foo(
^~~~~~
hello
~~~~~
world
~~~~~
)foo",
~~~~~
{ dg-end-multiline-output "" } */
}
void
test_macro (void)
{

View File

@ -1,3 +1,8 @@
2016-11-17 David Malcolm <dmalcolm@redhat.com>
* charset.c (cpp_interpret_string_1): Skip locations from
loc_reader when advancing 'p' when handling raw strings.
2016-11-16 Jakub Jelinek <jakub@redhat.com>
PR bootstrap/72823

View File

@ -1564,10 +1564,21 @@ cpp_interpret_string_1 (cpp_reader *pfile, const cpp_string *from, size_t count,
/* Skip over 'R"'. */
p += 2;
if (loc_reader)
{
loc_reader->get_next ();
loc_reader->get_next ();
}
prefix = p;
while (*p != '(')
p++;
{
p++;
if (loc_reader)
loc_reader->get_next ();
}
p++;
if (loc_reader)
loc_reader->get_next ();
limit = from[i].text + from[i].len;
if (limit >= p + (p - prefix) + 1)
limit -= (p - prefix) + 1;