summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--binutils/ChangeLog6
-rw-r--r--binutils/readelf.c123
-rw-r--r--gas/ChangeLog9
-rw-r--r--gas/doc/as.texinfo13
-rw-r--r--gas/read.c238
-rw-r--r--gas/testsuite/ChangeLog7
-rw-r--r--gas/testsuite/gas/elf/elf.exp2
-rw-r--r--gas/testsuite/gas/elf/syms.d18
-rw-r--r--gas/testsuite/gas/elf/syms.s5
-rw-r--r--ld/testsuite/ChangeLog5
-rw-r--r--ld/testsuite/ld-ifunc/ifunc-13a-i386.s2
-rw-r--r--ld/testsuite/ld-ifunc/ifunc-15-i386.s2
12 files changed, 259 insertions, 171 deletions
diff --git a/binutils/ChangeLog b/binutils/ChangeLog
index c19e45c227..2d2bc08085 100644
--- a/binutils/ChangeLog
+++ b/binutils/ChangeLog
@@ -1,3 +1,9 @@
+2012-05-28 Nick Clifton <nickc@redhat.com>
+
+ * readelf.c (print_symbol): Display multibyte characters in symbol
+ names.
+ (process_section_headers): Use print_symbol.
+
2012-05-18 Andreas Schwab <schwab@linux-m68k.org>
* aclocal.m4: Regenerate.
diff --git a/binutils/readelf.c b/binutils/readelf.c
index 212f70e6c2..762a1a8700 100644
--- a/binutils/readelf.c
+++ b/binutils/readelf.c
@@ -48,6 +48,7 @@
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
+#include <wchar.h>
#if __GNUC__ >= 2
/* Define BFD64 here, even if our default architecture is 32 bit ELF
@@ -383,93 +384,89 @@ print_vma (bfd_vma vma, print_mode mode)
return 0;
}
-/* Display a symbol on stdout. Handles the display of non-printing characters.
+/* Display a symbol on stdout. Handles the display of control characters and
+ multibye characters.
- If DO_WIDE is not true then format the symbol to be at most WIDTH characters,
- truncating as necessary. If WIDTH is negative then format the string to be
- exactly - WIDTH characters, truncating or padding as necessary.
+ Display at most abs(WIDTH) characters, truncating as necessary, unless do_wide is true.
+
+ If WIDTH is negative then ensure that the output is at least (- WIDTH) characters,
+ padding as necessary.
Returns the number of emitted characters. */
static unsigned int
print_symbol (int width, const char *symbol)
{
- const char *c;
bfd_boolean extra_padding = FALSE;
- unsigned int num_printed = 0;
+ int num_printed = 0;
+ mbstate_t state;
+ int width_remaining;
- if (do_wide)
- {
- /* Set the width to a very large value. This simplifies the
- code below. */
- width = INT_MAX;
- }
- else if (width < 0)
+ if (width < 0)
{
/* Keep the width positive. This also helps. */
width = - width;
extra_padding = TRUE;
- }
-
- while (width)
- {
- int len;
-
- c = symbol;
-
- /* Look for non-printing symbols inside the symbol's name.
- This test is triggered in particular by the names generated
- by the assembler for local labels. */
- while (ISPRINT (*c))
- c++;
+ }
- len = c - symbol;
-
- if (len)
- {
- if (len > width)
- len = width;
+ if (do_wide)
+ /* Set the remaining width to a very large value.
+ This simplifies the code below. */
+ width_remaining = INT_MAX;
+ else
+ width_remaining = width;
- printf ("%.*s", len, symbol);
+ /* Initialise the multibyte conversion state. */
+ memset (& state, 0, sizeof (state));
- width -= len;
- num_printed += len;
- }
+ while (width_remaining)
+ {
+ size_t n;
+ wchar_t w;
+ const char c = *symbol++;
- if (*c == 0 || width == 0)
+ if (c == 0)
break;
- /* Now display the non-printing character, if
- there is room left in which to dipslay it. */
- if ((unsigned char) *c < 32)
+ /* Do not print control characters directly as they can affect terminal
+ settings. Such characters usually appear in the names generated
+ by the assembler for local labels. */
+ if (ISCNTRL (c))
{
- if (width < 2)
+ if (width_remaining < 2)
break;
- printf ("^%c", *c + 0x40);
-
- width -= 2;
+ printf ("^%c", c + 0x40);
+ width_remaining -= 2;
num_printed += 2;
}
+ else if (ISPRINT (c))
+ {
+ putchar (c);
+ width_remaining --;
+ num_printed ++;
+ }
else
{
- if (width < 6)
- break;
-
- printf ("<0x%.2x>", (unsigned char) *c);
+ /* Let printf do the hard work of displaying multibyte characters. */
+ printf ("%.1s", symbol - 1);
+ width_remaining --;
+ num_printed ++;
- width -= 6;
- num_printed += 6;
+ /* Try to find out how many bytes made up the character that was
+ just printed. Advance the symbol pointer past the bytes that
+ were displayed. */
+ n = mbrtowc (& w, symbol - 1, MB_CUR_MAX, & state);
+ if (n != (size_t) -1 && n != (size_t) -2 && n > 0)
+ symbol += (n - 1);
}
-
- symbol = c + 1;
}
- if (extra_padding && width > 0)
+ if (extra_padding && num_printed < width)
{
/* Fill in the remaining spaces. */
- printf ("%-*s", width, " ");
- num_printed += 2;
+ printf ("%-*s", width - num_printed, " ");
+ num_printed = width;
}
return num_printed;
@@ -4737,21 +4734,21 @@ process_section_headers (FILE * file)
i < elf_header.e_shnum;
i++, section++)
{
+ printf (" [%2u] ", i);
if (do_section_details)
{
- printf (" [%2u] %s\n",
- i,
- SECTION_NAME (section));
+ print_symbol (INT_MAX, SECTION_NAME (section));
+ putchar ('\n');
if (is_32bit_elf || do_wide)
printf (" %-15.15s ",
get_section_type_name (section->sh_type));
}
else
- printf ((do_wide ? " [%2u] %-17s %-15s "
- : " [%2u] %-17.17s %-15.15s "),
- i,
- SECTION_NAME (section),
- get_section_type_name (section->sh_type));
+ {
+ print_symbol (-17, SECTION_NAME (section));
+ printf (" %-15.15s ",
+ get_section_type_name (section->sh_type));
+ }
if (is_32bit_elf)
{
diff --git a/gas/ChangeLog b/gas/ChangeLog
index caef2baa55..a32f87c2ff 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,3 +1,12 @@
+2012-05-28 Nick Clifton <nickc@redhat.com>
+
+ * read.c (read_symbol_name): New function. Reads a symbol names.
+ Allows escape codes in names.
+ (s_comm_internal): Use read_symbol_name.
+ (s_globl, s_lsym, s_set, s_weakref): Likewise.
+ * doc/as.texinfo: Document support for multibyte characters in
+ symbol names.
+
2012-05-21 Mike Frysinger <vapier@gentoo.org>
* config/tc-mips.c (mips_after_parse_args): Assert that arch_info
diff --git a/gas/doc/as.texinfo b/gas/doc/as.texinfo
index 694c8067d9..72b5d0522f 100644
--- a/gas/doc/as.texinfo
+++ b/gas/doc/as.texinfo
@@ -2485,10 +2485,10 @@ On most machines, you can also use @code{$} in symbol names; exceptions
are noted in @ref{Machine Dependencies}.
@end ifset
No symbol may begin with a digit. Case is significant.
-There is no length limit: all characters are significant. Symbols are
-delimited by characters not in that set, or by the beginning of a file
-(since the source program must end with a newline, the end of a file is
-not a possible symbol delimiter). @xref{Symbols}.
+There is no length limit: all characters are significant. Multibyte characters
+are supported. Symbols are delimited by characters not in that set, or by the
+beginning of a file (since the source program must end with a newline, the end
+of a file is not a possible symbol delimiter). @xref{Symbols}.
@cindex length of symbols
@node Statements
@@ -3414,6 +3414,11 @@ on the H8/300), and underscores.
Case of letters is significant: @code{foo} is a different symbol name
than @code{Foo}.
+Multibyte characters are supported. To generate a symbol name containing
+multibyte characters enclose it within double quotes and use escape codes. cf
+@xref{Strings}. Generating a multibyte symbol name from a label is not
+currently supported.
+
Each symbol has exactly one name. Each name in an assembly language program
refers to exactly one symbol. You may use that symbol name any number of times
in a program.
diff --git a/gas/read.c b/gas/read.c
index 4ff33132f8..cf7f7529f6 100644
--- a/gas/read.c
+++ b/gas/read.c
@@ -40,6 +40,7 @@
#include "obstack.h"
#include "ecoff.h"
#include "dw2gencfi.h"
+#include "wchar.h"
#ifndef TC_START_LABEL
#define TC_START_LABEL(x,y,z) (x == ':')
@@ -1583,13 +1584,106 @@ s_altmacro (int on)
macro_set_alternate (on);
}
+/* Read a symbol name from input_line_pointer.
+
+ Stores the symbol name in a buffer and returns a pointer to this buffer.
+ The buffer is xalloc'ed. It is the caller's responsibility to free
+ this buffer.
+
+ The name is not left in the i_l_p buffer as it may need processing
+ to handle escape characters.
+
+ Advances i_l_p to the next non-whitespace character.
+
+ If a symbol name could not be read, the routine issues an error
+ messages, skips to the end of the line and returns NULL. */
+
+static char *
+read_symbol_name (void)
+{
+ char * name;
+ char * start;
+ char c;
+
+ c = *input_line_pointer++;
+
+ if (c == '"')
+ {
+#define SYM_NAME_CHUNK_LEN 128
+ ptrdiff_t len = SYM_NAME_CHUNK_LEN;
+ char * name_end;
+ unsigned int C;
+
+ start = name = xmalloc (len + 1);
+
+ name_end = name + SYM_NAME_CHUNK_LEN;
+
+ while (is_a_char (C = next_char_of_string ()))
+ {
+ if (name >= name_end)
+ {
+ ptrdiff_t sofar;
+
+ sofar = name - start;
+ len += SYM_NAME_CHUNK_LEN;
+ start = xrealloc (start, len + 1);
+ name_end = start + len;
+ name = start + sofar;
+ }
+
+ *name++ = (char) C;
+ }
+ *name = 0;
+
+ /* Since quoted symbol names can contain non-ASCII characters,
+ check the string and warn if it cannot be recognised by the
+ current character set. */
+ if (mbstowcs (NULL, name, len) == (size_t) -1)
+ as_warn (_("symbol name not recognised in the current locale"));
+ }
+ else if (is_name_beginner (c) || c == '\001')
+ {
+ ptrdiff_t len;
+
+ name = input_line_pointer - 1;
+
+ /* We accept \001 in a name in case this is
+ being called with a constructed string. */
+ while (is_part_of_name (c = *input_line_pointer++)
+ || c == '\001')
+ ;
+
+ len = (input_line_pointer - name) - 1;
+ start = xmalloc (len + 1);
+
+ memcpy (start, name, len);
+ start[len] = 0;
+
+ /* Skip a name ender char if one is present. */
+ if (! is_name_ender (c))
+ --input_line_pointer;
+ }
+ else
+ name = start = NULL;
+
+ if (name == start)
+ {
+ as_bad (_("expected symbol name"));
+ ignore_rest_of_line ();
+ return NULL;
+ }
+
+ SKIP_WHITESPACE ();
+
+ return start;
+}
+
+
symbolS *
s_comm_internal (int param,
symbolS *(*comm_parse_extra) (int, symbolS *, addressT))
{
char *name;
- char c;
- char *p;
offsetT temp, size;
symbolS *symbolP = NULL;
char *stop = NULL;
@@ -1599,20 +1693,8 @@ s_comm_internal (int param,
if (flag_mri)
stop = mri_comment_field (&stopc);
- name = input_line_pointer;
- c = get_symbol_end ();
- /* Just after name is now '\0'. */
- p = input_line_pointer;
- *p = c;
-
- if (name == p)
- {
- as_bad (_("expected symbol name"));
- ignore_rest_of_line ();
- goto out;
- }
-
- SKIP_WHITESPACE ();
+ if ((name = read_symbol_name ()) == NULL)
+ goto out;
/* Accept an optional comma after the name. The comma used to be
required, but Irix 5 cc does not generate it for .lcomm. */
@@ -1635,7 +1717,6 @@ s_comm_internal (int param,
goto out;
}
- *p = 0;
symbolP = symbol_find_or_make (name);
if ((S_IS_DEFINED (symbolP) || symbol_equated_p (symbolP))
&& !S_IS_COMMON (symbolP))
@@ -1644,7 +1725,6 @@ s_comm_internal (int param,
{
symbolP = NULL;
as_bad (_("symbol `%s' is already defined"), name);
- *p = c;
ignore_rest_of_line ();
goto out;
}
@@ -1662,7 +1742,6 @@ s_comm_internal (int param,
as_warn (_("size of \"%s\" is already %ld; not changing to %ld"),
name, (long) size, (long) temp);
- *p = c;
if (comm_parse_extra != NULL)
symbolP = (*comm_parse_extra) (param, symbolP, size);
else
@@ -1676,6 +1755,8 @@ s_comm_internal (int param,
out:
if (flag_mri)
mri_comment_end (stop, stopc);
+ if (name != NULL)
+ free (name);
return symbolP;
}
@@ -2179,12 +2260,12 @@ s_globl (int ignore ATTRIBUTE_UNUSED)
do
{
- name = input_line_pointer;
- c = get_symbol_end ();
+ if ((name = read_symbol_name ()) == NULL)
+ return;
+
symbolP = symbol_find_or_make (name);
S_SET_EXTERNAL (symbolP);
- *input_line_pointer = c;
SKIP_WHITESPACE ();
c = *input_line_pointer;
if (c == ',')
@@ -2194,6 +2275,8 @@ s_globl (int ignore ATTRIBUTE_UNUSED)
if (is_end_of_line[(unsigned char) *input_line_pointer])
c = '\n';
}
+
+ free (name);
}
while (c == ',');
@@ -2580,33 +2663,17 @@ void
s_lsym (int ignore ATTRIBUTE_UNUSED)
{
char *name;
- char c;
- char *p;
expressionS exp;
symbolS *symbolP;
/* We permit ANY defined expression: BSD4.2 demands constants. */
- name = input_line_pointer;
- c = get_symbol_end ();
- p = input_line_pointer;
- *p = c;
-
- if (name == p)
- {
- as_bad (_("expected symbol name"));
- ignore_rest_of_line ();
- return;
- }
-
- SKIP_WHITESPACE ();
+ if ((name = read_symbol_name ()) == NULL)
+ return;
if (*input_line_pointer != ',')
{
- *p = 0;
as_bad (_("expected comma after \"%s\""), name);
- *p = c;
- ignore_rest_of_line ();
- return;
+ goto err_out;
}
input_line_pointer++;
@@ -2616,11 +2683,9 @@ s_lsym (int ignore ATTRIBUTE_UNUSED)
&& exp.X_op != O_register)
{
as_bad (_("bad expression"));
- ignore_rest_of_line ();
- return;
+ goto err_out;
}
- *p = 0;
symbolP = symbol_find_or_make (name);
if (S_GET_SEGMENT (symbolP) == undefined_section)
@@ -2638,8 +2703,14 @@ s_lsym (int ignore ATTRIBUTE_UNUSED)
as_bad (_("symbol `%s' is already defined"), name);
}
- *p = c;
demand_empty_rest_of_line ();
+ free (name);
+ return;
+
+ err_out:
+ ignore_rest_of_line ();
+ free (name);
+ return;
}
/* Read a line into an sb. Returns the character that ended the line
@@ -3283,42 +3354,25 @@ void
s_set (int equiv)
{
char *name;
- char delim;
- char *end_name;
/* Especial apologies for the random logic:
this just grew, and could be parsed much more simply!
Dean in haste. */
- name = input_line_pointer;
- delim = get_symbol_end ();
- end_name = input_line_pointer;
- *end_name = delim;
-
- if (name == end_name)
- {
- as_bad (_("expected symbol name"));
- ignore_rest_of_line ();
- return;
- }
-
- SKIP_WHITESPACE ();
+ if ((name = read_symbol_name ()) == NULL)
+ return;
if (*input_line_pointer != ',')
{
- *end_name = 0;
as_bad (_("expected comma after \"%s\""), name);
- *end_name = delim;
ignore_rest_of_line ();
+ free (name);
return;
}
input_line_pointer++;
- *end_name = 0;
-
assign_symbol (name, equiv);
- *end_name = delim;
-
demand_empty_rest_of_line ();
+ free (name);
}
void
@@ -3622,23 +3676,12 @@ void
s_weakref (int ignore ATTRIBUTE_UNUSED)
{
char *name;
- char delim;
- char *end_name;
symbolS *symbolP;
symbolS *symbolP2;
expressionS exp;
- name = input_line_pointer;
- delim = get_symbol_end ();
- end_name = input_line_pointer;
-
- if (name == end_name)
- {
- as_bad (_("expected symbol name"));
- *end_name = delim;
- ignore_rest_of_line ();
- return;
- }
+ if ((name = read_symbol_name ()) == NULL)
+ return;
symbolP = symbol_find_or_make (name);
@@ -3647,41 +3690,27 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
if (!S_IS_VOLATILE (symbolP))
{
as_bad (_("symbol `%s' is already defined"), name);
- *end_name = delim;
- ignore_rest_of_line ();
- return;
+ goto err_out;
}
symbolP = symbol_clone (symbolP, 1);
S_CLEAR_VOLATILE (symbolP);
}
- *end_name = delim;
-
SKIP_WHITESPACE ();
if (*input_line_pointer != ',')
{
- *end_name = 0;
as_bad (_("expected comma after \"%s\""), name);
- *end_name = delim;
- ignore_rest_of_line ();
- return;
+ goto err_out;
}
input_line_pointer++;
SKIP_WHITESPACE ();
+ free (name);
- name = input_line_pointer;
- delim = get_symbol_end ();
- end_name = input_line_pointer;
-
- if (name == end_name)
- {
- as_bad (_("expected symbol name"));
- ignore_rest_of_line ();
- return;
- }
+ if ((name = read_symbol_name ()) == NULL)
+ return;
if ((symbolP2 = symbol_find_noref (name, 1)) == NULL
&& (symbolP2 = md_undefined_symbol (name)) == NULL)
@@ -3712,6 +3741,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
while (symp != symbolP)
{
char *old_loop = loop;
+
symp = symbol_get_value_expression (symp)->X_add_symbol;
loop = concat (loop, " => ", S_GET_NAME (symp),
(const char *) NULL);
@@ -3722,8 +3752,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
S_GET_NAME (symbolP), loop);
free (loop);
-
- *end_name = delim;
+ free (name);
ignore_rest_of_line ();
return;
}
@@ -3734,8 +3763,6 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
/* symbolP2 = symp; */
}
- *end_name = delim;
-
memset (&exp, 0, sizeof (exp));
exp.X_op = O_symbol;
exp.X_add_symbol = symbolP2;
@@ -3746,6 +3773,13 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
S_SET_WEAKREFR (symbolP);
demand_empty_rest_of_line ();
+ free (name);
+ return;
+
+ err_out:
+ ignore_rest_of_line ();
+ free (name);
+ return;
}
diff --git a/gas/testsuite/ChangeLog b/gas/testsuite/ChangeLog
index 13a6344552..e19086abc5 100644
--- a/gas/testsuite/ChangeLog
+++ b/gas/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2012-05-28 Nick Clifton <nickc@redhat.com>
+
+ * gas/elf/syms.s: New test - checks the generation of multibyte
+ symbol names.
+ * gas/elf/syms.d: New file - expected readelf output.
+ * gas/elf/elf.exp: Add syms.
+
2012-05-25 Alan Modra <amodra@gmail.com>
* gas/lns/lns-big-delta.s: Add nops.
diff --git a/gas/testsuite/gas/elf/elf.exp b/gas/testsuite/gas/elf/elf.exp
index 736eec9836..b437730ca9 100644
--- a/gas/testsuite/gas/elf/elf.exp
+++ b/gas/testsuite/gas/elf/elf.exp
@@ -184,6 +184,8 @@ if { [is_elf_format] } then {
run_dump_test "bad-size"
run_dump_test "bad-group"
+ run_dump_test "syms"
+
load_lib gas-dg.exp
dg-init
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/err-*.s $srcdir/$subdir/warn-*.s]] "" ""
diff --git a/gas/testsuite/gas/elf/syms.d b/gas/testsuite/gas/elf/syms.d
new file mode 100644
index 0000000000..40f7706d56
--- /dev/null
+++ b/gas/testsuite/gas/elf/syms.d
@@ -0,0 +1,18 @@
+#readelf: -S -s -p .strtab
+#name: Multibyte symbol names
+# The following targets use an unusual .set syntax...
+#not-target: alpha*-*-* h8300-*-*
+
+#...
+Section Headers:
+#...
+ \[ .\] sec.*tion.*
+#...
+Symbol table.*
+#...
+ ..: .*sy.*mbol
+#...
+String dump.*
+#...
+ \[......\] sy.*mbol
+#pass
diff --git a/gas/testsuite/gas/elf/syms.s b/gas/testsuite/gas/elf/syms.s
new file mode 100644
index 0000000000..977c6bb7c9
--- /dev/null
+++ b/gas/testsuite/gas/elf/syms.s
@@ -0,0 +1,5 @@
+ .section "sec\xa5\xc2tion"
+
+ .set "sy\xa5\xc2mbol", .
+
+ .string8 "str\xa5\xc2ing"
diff --git a/ld/testsuite/ChangeLog b/ld/testsuite/ChangeLog
index 086123bb73..751a3873b1 100644
--- a/ld/testsuite/ChangeLog
+++ b/ld/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2012-05-28 Nick Clifton <nickc@redhat.com>
+
+ * ld-ifunc/ifunc-13a-i386.s: Fix use of .global directive.
+ * ld-ifunc/ifunc-15a-i385.s: Likewise.
+
2012-05-28 Alan Modra <amodra@gmail.com>
PR ld/14170
diff --git a/ld/testsuite/ld-ifunc/ifunc-13a-i386.s b/ld/testsuite/ld-ifunc/ifunc-13a-i386.s
index eb893af3d3..5bda920b7f 100644
--- a/ld/testsuite/ld-ifunc/ifunc-13a-i386.s
+++ b/ld/testsuite/ld-ifunc/ifunc-13a-i386.s
@@ -1,6 +1,6 @@
.text
.type foo, @function
- .global
+ .global foo
foo:
movl xxx@GOT(%ebx), %eax
ret
diff --git a/ld/testsuite/ld-ifunc/ifunc-15-i386.s b/ld/testsuite/ld-ifunc/ifunc-15-i386.s
index 5ee4fab859..ea541e2978 100644
--- a/ld/testsuite/ld-ifunc/ifunc-15-i386.s
+++ b/ld/testsuite/ld-ifunc/ifunc-15-i386.s
@@ -1,6 +1,6 @@
.text
.type foo, @function
- .global
+ .global foo
foo:
movl ifunc@GOT(%ebx), %eax
ret