* read.c (read_symbol_name): New function. Reads a symbol names.

Allows escape codes in names.
	(s_comm_internal): Use read_symbol_name.
	(s_globl, s_lsym, s_set, s_weakref): Likewise.
	* doc/as.texinfo: Document support for multibyte characters in
	symbol names.

	* gas/elf/syms.s: New test - checks the generation of multibyte
	symbol names.
	* gas/elf/syms.d: New file - expected readelf output.
	* gas/elf/elf.exp: Add syms.

	* readelf.c (print_symbol): Display multibyte characters in symbol
	names.
	(process_section_headers): Use print_symbol.

	* ld-ifunc/ifunc-13a-i386.s: Fix use of .global directive.
	* ld-ifunc/ifunc-15a-i385.s: Likewise.
This commit is contained in:
Nick Clifton 2012-05-28 14:20:19 +00:00
parent e54e67a9e9
commit 7bfd842d05
12 changed files with 260 additions and 172 deletions

View File

@ -1,3 +1,9 @@
2012-05-28 Nick Clifton <nickc@redhat.com>
* readelf.c (print_symbol): Display multibyte characters in symbol
names.
(process_section_headers): Use print_symbol.
2012-05-18 Andreas Schwab <schwab@linux-m68k.org>
* aclocal.m4: Regenerate.

View File

@ -48,6 +48,7 @@
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
#include <wchar.h>
#if __GNUC__ >= 2
/* Define BFD64 here, even if our default architecture is 32 bit ELF
@ -383,93 +384,89 @@ print_vma (bfd_vma vma, print_mode mode)
return 0;
}
/* Display a symbol on stdout. Handles the display of non-printing characters.
/* Display a symbol on stdout. Handles the display of control characters and
multibye characters.
If DO_WIDE is not true then format the symbol to be at most WIDTH characters,
truncating as necessary. If WIDTH is negative then format the string to be
exactly - WIDTH characters, truncating or padding as necessary.
Display at most abs(WIDTH) characters, truncating as necessary, unless do_wide is true.
If WIDTH is negative then ensure that the output is at least (- WIDTH) characters,
padding as necessary.
Returns the number of emitted characters. */
static unsigned int
print_symbol (int width, const char *symbol)
{
const char *c;
bfd_boolean extra_padding = FALSE;
unsigned int num_printed = 0;
int num_printed = 0;
mbstate_t state;
int width_remaining;
if (do_wide)
{
/* Set the width to a very large value. This simplifies the
code below. */
width = INT_MAX;
}
else if (width < 0)
if (width < 0)
{
/* Keep the width positive. This also helps. */
width = - width;
extra_padding = TRUE;
}
}
while (width)
if (do_wide)
/* Set the remaining width to a very large value.
This simplifies the code below. */
width_remaining = INT_MAX;
else
width_remaining = width;
/* Initialise the multibyte conversion state. */
memset (& state, 0, sizeof (state));
while (width_remaining)
{
int len;
size_t n;
wchar_t w;
const char c = *symbol++;
c = symbol;
/* Look for non-printing symbols inside the symbol's name.
This test is triggered in particular by the names generated
by the assembler for local labels. */
while (ISPRINT (*c))
c++;
len = c - symbol;
if (len)
{
if (len > width)
len = width;
printf ("%.*s", len, symbol);
width -= len;
num_printed += len;
}
if (*c == 0 || width == 0)
if (c == 0)
break;
/* Now display the non-printing character, if
there is room left in which to dipslay it. */
if ((unsigned char) *c < 32)
/* Do not print control characters directly as they can affect terminal
settings. Such characters usually appear in the names generated
by the assembler for local labels. */
if (ISCNTRL (c))
{
if (width < 2)
if (width_remaining < 2)
break;
printf ("^%c", *c + 0x40);
width -= 2;
printf ("^%c", c + 0x40);
width_remaining -= 2;
num_printed += 2;
}
else if (ISPRINT (c))
{
putchar (c);
width_remaining --;
num_printed ++;
}
else
{
if (width < 6)
break;
/* Let printf do the hard work of displaying multibyte characters. */
printf ("%.1s", symbol - 1);
width_remaining --;
num_printed ++;
printf ("<0x%.2x>", (unsigned char) *c);
width -= 6;
num_printed += 6;
/* Try to find out how many bytes made up the character that was
just printed. Advance the symbol pointer past the bytes that
were displayed. */
n = mbrtowc (& w, symbol - 1, MB_CUR_MAX, & state);
if (n != (size_t) -1 && n != (size_t) -2 && n > 0)
symbol += (n - 1);
}
symbol = c + 1;
}
if (extra_padding && width > 0)
if (extra_padding && num_printed < width)
{
/* Fill in the remaining spaces. */
printf ("%-*s", width, " ");
num_printed += 2;
printf ("%-*s", width - num_printed, " ");
num_printed = width;
}
return num_printed;
@ -4737,21 +4734,21 @@ process_section_headers (FILE * file)
i < elf_header.e_shnum;
i++, section++)
{
printf (" [%2u] ", i);
if (do_section_details)
{
printf (" [%2u] %s\n",
i,
SECTION_NAME (section));
print_symbol (INT_MAX, SECTION_NAME (section));
putchar ('\n');
if (is_32bit_elf || do_wide)
printf (" %-15.15s ",
get_section_type_name (section->sh_type));
}
else
printf ((do_wide ? " [%2u] %-17s %-15s "
: " [%2u] %-17.17s %-15.15s "),
i,
SECTION_NAME (section),
get_section_type_name (section->sh_type));
{
print_symbol (-17, SECTION_NAME (section));
printf (" %-15.15s ",
get_section_type_name (section->sh_type));
}
if (is_32bit_elf)
{

View File

@ -1,3 +1,12 @@
2012-05-28 Nick Clifton <nickc@redhat.com>
* read.c (read_symbol_name): New function. Reads a symbol names.
Allows escape codes in names.
(s_comm_internal): Use read_symbol_name.
(s_globl, s_lsym, s_set, s_weakref): Likewise.
* doc/as.texinfo: Document support for multibyte characters in
symbol names.
2012-05-21 Mike Frysinger <vapier@gentoo.org>
* config/tc-mips.c (mips_after_parse_args): Assert that arch_info

View File

@ -2485,10 +2485,10 @@ On most machines, you can also use @code{$} in symbol names; exceptions
are noted in @ref{Machine Dependencies}.
@end ifset
No symbol may begin with a digit. Case is significant.
There is no length limit: all characters are significant. Symbols are
delimited by characters not in that set, or by the beginning of a file
(since the source program must end with a newline, the end of a file is
not a possible symbol delimiter). @xref{Symbols}.
There is no length limit: all characters are significant. Multibyte characters
are supported. Symbols are delimited by characters not in that set, or by the
beginning of a file (since the source program must end with a newline, the end
of a file is not a possible symbol delimiter). @xref{Symbols}.
@cindex length of symbols
@node Statements
@ -3414,6 +3414,11 @@ on the H8/300), and underscores.
Case of letters is significant: @code{foo} is a different symbol name
than @code{Foo}.
Multibyte characters are supported. To generate a symbol name containing
multibyte characters enclose it within double quotes and use escape codes. cf
@xref{Strings}. Generating a multibyte symbol name from a label is not
currently supported.
Each symbol has exactly one name. Each name in an assembly language program
refers to exactly one symbol. You may use that symbol name any number of times
in a program.

View File

@ -40,6 +40,7 @@
#include "obstack.h"
#include "ecoff.h"
#include "dw2gencfi.h"
#include "wchar.h"
#ifndef TC_START_LABEL
#define TC_START_LABEL(x,y,z) (x == ':')
@ -1583,13 +1584,106 @@ s_altmacro (int on)
macro_set_alternate (on);
}
/* Read a symbol name from input_line_pointer.
Stores the symbol name in a buffer and returns a pointer to this buffer.
The buffer is xalloc'ed. It is the caller's responsibility to free
this buffer.
The name is not left in the i_l_p buffer as it may need processing
to handle escape characters.
Advances i_l_p to the next non-whitespace character.
If a symbol name could not be read, the routine issues an error
messages, skips to the end of the line and returns NULL. */
static char *
read_symbol_name (void)
{
char * name;
char * start;
char c;
c = *input_line_pointer++;
if (c == '"')
{
#define SYM_NAME_CHUNK_LEN 128
ptrdiff_t len = SYM_NAME_CHUNK_LEN;
char * name_end;
unsigned int C;
start = name = xmalloc (len + 1);
name_end = name + SYM_NAME_CHUNK_LEN;
while (is_a_char (C = next_char_of_string ()))
{
if (name >= name_end)
{
ptrdiff_t sofar;
sofar = name - start;
len += SYM_NAME_CHUNK_LEN;
start = xrealloc (start, len + 1);
name_end = start + len;
name = start + sofar;
}
*name++ = (char) C;
}
*name = 0;
/* Since quoted symbol names can contain non-ASCII characters,
check the string and warn if it cannot be recognised by the
current character set. */
if (mbstowcs (NULL, name, len) == (size_t) -1)
as_warn (_("symbol name not recognised in the current locale"));
}
else if (is_name_beginner (c) || c == '\001')
{
ptrdiff_t len;
name = input_line_pointer - 1;
/* We accept \001 in a name in case this is
being called with a constructed string. */
while (is_part_of_name (c = *input_line_pointer++)
|| c == '\001')
;
len = (input_line_pointer - name) - 1;
start = xmalloc (len + 1);
memcpy (start, name, len);
start[len] = 0;
/* Skip a name ender char if one is present. */
if (! is_name_ender (c))
--input_line_pointer;
}
else
name = start = NULL;
if (name == start)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
return NULL;
}
SKIP_WHITESPACE ();
return start;
}
symbolS *
s_comm_internal (int param,
symbolS *(*comm_parse_extra) (int, symbolS *, addressT))
{
char *name;
char c;
char *p;
offsetT temp, size;
symbolS *symbolP = NULL;
char *stop = NULL;
@ -1599,20 +1693,8 @@ s_comm_internal (int param,
if (flag_mri)
stop = mri_comment_field (&stopc);
name = input_line_pointer;
c = get_symbol_end ();
/* Just after name is now '\0'. */
p = input_line_pointer;
*p = c;
if (name == p)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
goto out;
}
SKIP_WHITESPACE ();
if ((name = read_symbol_name ()) == NULL)
goto out;
/* Accept an optional comma after the name. The comma used to be
required, but Irix 5 cc does not generate it for .lcomm. */
@ -1635,7 +1717,6 @@ s_comm_internal (int param,
goto out;
}
*p = 0;
symbolP = symbol_find_or_make (name);
if ((S_IS_DEFINED (symbolP) || symbol_equated_p (symbolP))
&& !S_IS_COMMON (symbolP))
@ -1644,7 +1725,6 @@ s_comm_internal (int param,
{
symbolP = NULL;
as_bad (_("symbol `%s' is already defined"), name);
*p = c;
ignore_rest_of_line ();
goto out;
}
@ -1662,7 +1742,6 @@ s_comm_internal (int param,
as_warn (_("size of \"%s\" is already %ld; not changing to %ld"),
name, (long) size, (long) temp);
*p = c;
if (comm_parse_extra != NULL)
symbolP = (*comm_parse_extra) (param, symbolP, size);
else
@ -1676,6 +1755,8 @@ s_comm_internal (int param,
out:
if (flag_mri)
mri_comment_end (stop, stopc);
if (name != NULL)
free (name);
return symbolP;
}
@ -2179,12 +2260,12 @@ s_globl (int ignore ATTRIBUTE_UNUSED)
do
{
name = input_line_pointer;
c = get_symbol_end ();
if ((name = read_symbol_name ()) == NULL)
return;
symbolP = symbol_find_or_make (name);
S_SET_EXTERNAL (symbolP);
*input_line_pointer = c;
SKIP_WHITESPACE ();
c = *input_line_pointer;
if (c == ',')
@ -2194,6 +2275,8 @@ s_globl (int ignore ATTRIBUTE_UNUSED)
if (is_end_of_line[(unsigned char) *input_line_pointer])
c = '\n';
}
free (name);
}
while (c == ',');
@ -2580,33 +2663,17 @@ void
s_lsym (int ignore ATTRIBUTE_UNUSED)
{
char *name;
char c;
char *p;
expressionS exp;
symbolS *symbolP;
/* We permit ANY defined expression: BSD4.2 demands constants. */
name = input_line_pointer;
c = get_symbol_end ();
p = input_line_pointer;
*p = c;
if (name == p)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
return;
}
SKIP_WHITESPACE ();
if ((name = read_symbol_name ()) == NULL)
return;
if (*input_line_pointer != ',')
{
*p = 0;
as_bad (_("expected comma after \"%s\""), name);
*p = c;
ignore_rest_of_line ();
return;
goto err_out;
}
input_line_pointer++;
@ -2616,11 +2683,9 @@ s_lsym (int ignore ATTRIBUTE_UNUSED)
&& exp.X_op != O_register)
{
as_bad (_("bad expression"));
ignore_rest_of_line ();
return;
goto err_out;
}
*p = 0;
symbolP = symbol_find_or_make (name);
if (S_GET_SEGMENT (symbolP) == undefined_section)
@ -2638,8 +2703,14 @@ s_lsym (int ignore ATTRIBUTE_UNUSED)
as_bad (_("symbol `%s' is already defined"), name);
}
*p = c;
demand_empty_rest_of_line ();
free (name);
return;
err_out:
ignore_rest_of_line ();
free (name);
return;
}
/* Read a line into an sb. Returns the character that ended the line
@ -3283,42 +3354,25 @@ void
s_set (int equiv)
{
char *name;
char delim;
char *end_name;
/* Especial apologies for the random logic:
this just grew, and could be parsed much more simply!
Dean in haste. */
name = input_line_pointer;
delim = get_symbol_end ();
end_name = input_line_pointer;
*end_name = delim;
if (name == end_name)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
return;
}
SKIP_WHITESPACE ();
if ((name = read_symbol_name ()) == NULL)
return;
if (*input_line_pointer != ',')
{
*end_name = 0;
as_bad (_("expected comma after \"%s\""), name);
*end_name = delim;
ignore_rest_of_line ();
free (name);
return;
}
input_line_pointer++;
*end_name = 0;
assign_symbol (name, equiv);
*end_name = delim;
demand_empty_rest_of_line ();
free (name);
}
void
@ -3622,23 +3676,12 @@ void
s_weakref (int ignore ATTRIBUTE_UNUSED)
{
char *name;
char delim;
char *end_name;
symbolS *symbolP;
symbolS *symbolP2;
expressionS exp;
name = input_line_pointer;
delim = get_symbol_end ();
end_name = input_line_pointer;
if (name == end_name)
{
as_bad (_("expected symbol name"));
*end_name = delim;
ignore_rest_of_line ();
return;
}
if ((name = read_symbol_name ()) == NULL)
return;
symbolP = symbol_find_or_make (name);
@ -3647,41 +3690,27 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
if (!S_IS_VOLATILE (symbolP))
{
as_bad (_("symbol `%s' is already defined"), name);
*end_name = delim;
ignore_rest_of_line ();
return;
goto err_out;
}
symbolP = symbol_clone (symbolP, 1);
S_CLEAR_VOLATILE (symbolP);
}
*end_name = delim;
SKIP_WHITESPACE ();
if (*input_line_pointer != ',')
{
*end_name = 0;
as_bad (_("expected comma after \"%s\""), name);
*end_name = delim;
ignore_rest_of_line ();
return;
goto err_out;
}
input_line_pointer++;
SKIP_WHITESPACE ();
free (name);
name = input_line_pointer;
delim = get_symbol_end ();
end_name = input_line_pointer;
if (name == end_name)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
return;
}
if ((name = read_symbol_name ()) == NULL)
return;
if ((symbolP2 = symbol_find_noref (name, 1)) == NULL
&& (symbolP2 = md_undefined_symbol (name)) == NULL)
@ -3712,6 +3741,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
while (symp != symbolP)
{
char *old_loop = loop;
symp = symbol_get_value_expression (symp)->X_add_symbol;
loop = concat (loop, " => ", S_GET_NAME (symp),
(const char *) NULL);
@ -3722,8 +3752,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
S_GET_NAME (symbolP), loop);
free (loop);
*end_name = delim;
free (name);
ignore_rest_of_line ();
return;
}
@ -3734,8 +3763,6 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
/* symbolP2 = symp; */
}
*end_name = delim;
memset (&exp, 0, sizeof (exp));
exp.X_op = O_symbol;
exp.X_add_symbol = symbolP2;
@ -3746,6 +3773,13 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
S_SET_WEAKREFR (symbolP);
demand_empty_rest_of_line ();
free (name);
return;
err_out:
ignore_rest_of_line ();
free (name);
return;
}

View File

@ -1,3 +1,10 @@
2012-05-28 Nick Clifton <nickc@redhat.com>
* gas/elf/syms.s: New test - checks the generation of multibyte
symbol names.
* gas/elf/syms.d: New file - expected readelf output.
* gas/elf/elf.exp: Add syms.
2012-05-25 Alan Modra <amodra@gmail.com>
* gas/lns/lns-big-delta.s: Add nops.

View File

@ -184,6 +184,8 @@ if { [is_elf_format] } then {
run_dump_test "bad-size"
run_dump_test "bad-group"
run_dump_test "syms"
load_lib gas-dg.exp
dg-init
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/err-*.s $srcdir/$subdir/warn-*.s]] "" ""

View File

@ -0,0 +1,18 @@
#readelf: -S -s -p .strtab
#name: Multibyte symbol names
# The following targets use an unusual .set syntax...
#not-target: alpha*-*-* h8300-*-*
#...
Section Headers:
#...
\[ .\] sec.*tion.*
#...
Symbol table.*
#...
..: .*sy.*mbol
#...
String dump.*
#...
\[......\] sy.*mbol
#pass

View File

@ -0,0 +1,5 @@
.section "sec\xa5\xc2tion"
.set "sy\xa5\xc2mbol", .
.string8 "str\xa5\xc2ing"

View File

@ -1,3 +1,8 @@
2012-05-28 Nick Clifton <nickc@redhat.com>
* ld-ifunc/ifunc-13a-i386.s: Fix use of .global directive.
* ld-ifunc/ifunc-15a-i385.s: Likewise.
2012-05-28 Alan Modra <amodra@gmail.com>
PR ld/14170

View File

@ -1,6 +1,6 @@
.text
.type foo, @function
.global
.global foo
foo:
movl xxx@GOT(%ebx), %eax
ret

View File

@ -1,6 +1,6 @@
.text
.type foo, @function
.global
.global foo
foo:
movl ifunc@GOT(%ebx), %eax
ret