84096498a7
This is the libiberty (mainly for binutils/gdb) counterpart of https://github.com/alexcrichton/rustc-demangle/pull/23. Relevant links for the new Rust mangling scheme (aka "v0"): * Rust RFC: https://github.com/rust-lang/rfcs/pull/2603 * tracking issue: https://github.com/rust-lang/rust/issues/60705 * implementation: https://github.com/rust-lang/rust/pull/57967 This implementation includes full support for UTF-8 identifiers via punycode, so I've included a testcase for that as well. libiberty/ChangeLog: * rust-demangle.c (struct rust_demangler): Add skipping_printing and bound_lifetime_depth fields. (eat): Add (v0-only). (parse_integer_62): Add (v0-only). (parse_opt_integer_62): Add (v0-only). (parse_disambiguator): Add (v0-only). (struct rust_mangled_ident): Add punycode{,_len} fields. (parse_ident): Support v0 identifiers. (print_str): Respect skipping_printing. (print_uint64): Add (v0-only). (print_uint64_hex): Add (v0-only). (print_ident): Respect skipping_printing, Support v0 identifiers. (print_lifetime_from_index): Add (v0-only). (demangle_binder): Add (v0-only). (demangle_path): Add (v0-only). (demangle_generic_arg): Add (v0-only). (demangle_type): Add (v0-only). (demangle_path_maybe_open_generics): Add (v0-only). (demangle_dyn_trait): Add (v0-only). (demangle_const): Add (v0-only). (demangle_const_uint): Add (v0-only). (basic_type): Add (v0-only). (rust_demangle_callback): Support v0 symbols. * testsuite/rust-demangle-expected: Add v0 testcases.
1523 lines
32 KiB
C
1523 lines
32 KiB
C
/* Demangler for the Rust programming language
|
|
Copyright (C) 2016-2020 Free Software Foundation, Inc.
|
|
Written by David Tolnay (dtolnay@gmail.com).
|
|
Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
|
|
|
|
This file is part of the libiberty library.
|
|
Libiberty is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Library General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2 of the License, or (at your option) any later version.
|
|
|
|
In addition to the permissions in the GNU Library General Public
|
|
License, the Free Software Foundation gives you unlimited permission
|
|
to link the compiled version of this file into combinations with other
|
|
programs, and to distribute those combinations without any restriction
|
|
coming from the use of this file. (The Library Public License
|
|
restrictions do apply in other respects; for example, they cover
|
|
modification of the file, and distribution when not linked into a
|
|
combined executable.)
|
|
|
|
Libiberty is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Library General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Library General Public
|
|
License along with libiberty; see the file COPYING.LIB.
|
|
If not, see <http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "safe-ctype.h"
|
|
|
|
#include <inttypes.h>
|
|
#include <sys/types.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#ifdef HAVE_STRING_H
|
|
#include <string.h>
|
|
#else
|
|
extern size_t strlen(const char *s);
|
|
extern int strncmp(const char *s1, const char *s2, size_t n);
|
|
extern void *memset(void *s, int c, size_t n);
|
|
#endif
|
|
|
|
#include <demangle.h>
|
|
#include "libiberty.h"
|
|
|
|
struct rust_demangler
|
|
{
|
|
const char *sym;
|
|
size_t sym_len;
|
|
|
|
void *callback_opaque;
|
|
demangle_callbackref callback;
|
|
|
|
/* Position of the next character to read from the symbol. */
|
|
size_t next;
|
|
|
|
/* Non-zero if any error occurred. */
|
|
int errored;
|
|
|
|
/* Non-zero if nothing should be printed. */
|
|
int skipping_printing;
|
|
|
|
/* Non-zero if printing should be verbose (e.g. include hashes). */
|
|
int verbose;
|
|
|
|
/* Rust mangling version, with legacy mangling being -1. */
|
|
int version;
|
|
|
|
uint64_t bound_lifetime_depth;
|
|
};
|
|
|
|
/* Parsing functions. */
|
|
|
|
static char
|
|
peek (const struct rust_demangler *rdm)
|
|
{
|
|
if (rdm->next < rdm->sym_len)
|
|
return rdm->sym[rdm->next];
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
eat (struct rust_demangler *rdm, char c)
|
|
{
|
|
if (peek (rdm) == c)
|
|
{
|
|
rdm->next++;
|
|
return 1;
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static char
|
|
next (struct rust_demangler *rdm)
|
|
{
|
|
char c = peek (rdm);
|
|
if (!c)
|
|
rdm->errored = 1;
|
|
else
|
|
rdm->next++;
|
|
return c;
|
|
}
|
|
|
|
static uint64_t
|
|
parse_integer_62 (struct rust_demangler *rdm)
|
|
{
|
|
char c;
|
|
uint64_t x;
|
|
|
|
if (eat (rdm, '_'))
|
|
return 0;
|
|
|
|
x = 0;
|
|
while (!eat (rdm, '_'))
|
|
{
|
|
c = next (rdm);
|
|
x *= 62;
|
|
if (ISDIGIT (c))
|
|
x += c - '0';
|
|
else if (ISLOWER (c))
|
|
x += 10 + (c - 'a');
|
|
else if (ISUPPER (c))
|
|
x += 10 + 26 + (c - 'A');
|
|
else
|
|
{
|
|
rdm->errored = 1;
|
|
return 0;
|
|
}
|
|
}
|
|
return x + 1;
|
|
}
|
|
|
|
static uint64_t
|
|
parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
|
|
{
|
|
if (!eat (rdm, tag))
|
|
return 0;
|
|
return 1 + parse_integer_62 (rdm);
|
|
}
|
|
|
|
static uint64_t
|
|
parse_disambiguator (struct rust_demangler *rdm)
|
|
{
|
|
return parse_opt_integer_62 (rdm, 's');
|
|
}
|
|
|
|
static size_t
|
|
parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
|
|
{
|
|
char c;
|
|
size_t hex_len;
|
|
|
|
hex_len = 0;
|
|
*value = 0;
|
|
|
|
while (!eat (rdm, '_'))
|
|
{
|
|
*value <<= 4;
|
|
|
|
c = next (rdm);
|
|
if (ISDIGIT (c))
|
|
*value |= c - '0';
|
|
else if (c >= 'a' && c <= 'f')
|
|
*value |= 10 + (c - 'a');
|
|
else
|
|
{
|
|
rdm->errored = 1;
|
|
return 0;
|
|
}
|
|
hex_len++;
|
|
}
|
|
|
|
return hex_len;
|
|
}
|
|
|
|
struct rust_mangled_ident
|
|
{
|
|
/* ASCII part of the identifier. */
|
|
const char *ascii;
|
|
size_t ascii_len;
|
|
|
|
/* Punycode insertion codes for Unicode codepoints, if any. */
|
|
const char *punycode;
|
|
size_t punycode_len;
|
|
};
|
|
|
|
static struct rust_mangled_ident
|
|
parse_ident (struct rust_demangler *rdm)
|
|
{
|
|
char c;
|
|
size_t start, len;
|
|
int is_punycode = 0;
|
|
struct rust_mangled_ident ident;
|
|
|
|
ident.ascii = NULL;
|
|
ident.ascii_len = 0;
|
|
ident.punycode = NULL;
|
|
ident.punycode_len = 0;
|
|
|
|
if (rdm->version != -1)
|
|
is_punycode = eat (rdm, 'u');
|
|
|
|
c = next (rdm);
|
|
if (!ISDIGIT (c))
|
|
{
|
|
rdm->errored = 1;
|
|
return ident;
|
|
}
|
|
len = c - '0';
|
|
|
|
if (c != '0')
|
|
while (ISDIGIT (peek (rdm)))
|
|
len = len * 10 + (next (rdm) - '0');
|
|
|
|
/* Skip past the optional `_` separator (v0). */
|
|
if (rdm->version != -1)
|
|
eat (rdm, '_');
|
|
|
|
start = rdm->next;
|
|
rdm->next += len;
|
|
/* Check for overflows. */
|
|
if ((start > rdm->next) || (rdm->next > rdm->sym_len))
|
|
{
|
|
rdm->errored = 1;
|
|
return ident;
|
|
}
|
|
|
|
ident.ascii = rdm->sym + start;
|
|
ident.ascii_len = len;
|
|
|
|
if (is_punycode)
|
|
{
|
|
ident.punycode_len = 0;
|
|
while (ident.ascii_len > 0)
|
|
{
|
|
ident.ascii_len--;
|
|
|
|
/* The last '_' is a separator between ascii & punycode. */
|
|
if (ident.ascii[ident.ascii_len] == '_')
|
|
break;
|
|
|
|
ident.punycode_len++;
|
|
}
|
|
if (!ident.punycode_len)
|
|
{
|
|
rdm->errored = 1;
|
|
return ident;
|
|
}
|
|
ident.punycode = ident.ascii + (len - ident.punycode_len);
|
|
}
|
|
|
|
if (ident.ascii_len == 0)
|
|
ident.ascii = NULL;
|
|
|
|
return ident;
|
|
}
|
|
|
|
/* Printing functions. */
|
|
|
|
static void
|
|
print_str (struct rust_demangler *rdm, const char *data, size_t len)
|
|
{
|
|
if (!rdm->errored && !rdm->skipping_printing)
|
|
rdm->callback (data, len, rdm->callback_opaque);
|
|
}
|
|
|
|
#define PRINT(s) print_str (rdm, s, strlen (s))
|
|
|
|
static void
|
|
print_uint64 (struct rust_demangler *rdm, uint64_t x)
|
|
{
|
|
char s[21];
|
|
snprintf (s, 21, "%" PRIu64, x);
|
|
PRINT (s);
|
|
}
|
|
|
|
static void
|
|
print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
|
|
{
|
|
char s[17];
|
|
snprintf (s, 17, "%" PRIx64, x);
|
|
PRINT (s);
|
|
}
|
|
|
|
/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
|
|
static int
|
|
decode_lower_hex_nibble (char nibble)
|
|
{
|
|
if ('0' <= nibble && nibble <= '9')
|
|
return nibble - '0';
|
|
if ('a' <= nibble && nibble <= 'f')
|
|
return 0xa + (nibble - 'a');
|
|
return -1;
|
|
}
|
|
|
|
/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
|
|
static char
|
|
decode_legacy_escape (const char *e, size_t len, size_t *out_len)
|
|
{
|
|
char c = 0;
|
|
size_t escape_len = 0;
|
|
int lo_nibble = -1, hi_nibble = -1;
|
|
|
|
if (len < 3 || e[0] != '$')
|
|
return 0;
|
|
|
|
e++;
|
|
len--;
|
|
|
|
if (e[0] == 'C')
|
|
{
|
|
escape_len = 1;
|
|
|
|
c = ',';
|
|
}
|
|
else if (len > 2)
|
|
{
|
|
escape_len = 2;
|
|
|
|
if (e[0] == 'S' && e[1] == 'P')
|
|
c = '@';
|
|
else if (e[0] == 'B' && e[1] == 'P')
|
|
c = '*';
|
|
else if (e[0] == 'R' && e[1] == 'F')
|
|
c = '&';
|
|
else if (e[0] == 'L' && e[1] == 'T')
|
|
c = '<';
|
|
else if (e[0] == 'G' && e[1] == 'T')
|
|
c = '>';
|
|
else if (e[0] == 'L' && e[1] == 'P')
|
|
c = '(';
|
|
else if (e[0] == 'R' && e[1] == 'P')
|
|
c = ')';
|
|
else if (e[0] == 'u' && len > 3)
|
|
{
|
|
escape_len = 3;
|
|
|
|
hi_nibble = decode_lower_hex_nibble (e[1]);
|
|
if (hi_nibble < 0)
|
|
return 0;
|
|
lo_nibble = decode_lower_hex_nibble (e[2]);
|
|
if (lo_nibble < 0)
|
|
return 0;
|
|
|
|
/* Only allow non-control ASCII characters. */
|
|
if (hi_nibble > 7)
|
|
return 0;
|
|
c = (hi_nibble << 4) | lo_nibble;
|
|
if (c < 0x20)
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (!c || len <= escape_len || e[escape_len] != '$')
|
|
return 0;
|
|
|
|
*out_len = 2 + escape_len;
|
|
return c;
|
|
}
|
|
|
|
static void
|
|
print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
|
|
{
|
|
char unescaped;
|
|
uint8_t *out, *p, d;
|
|
size_t len, cap, punycode_pos, j;
|
|
/* Punycode parameters and state. */
|
|
uint32_t c;
|
|
size_t base, t_min, t_max, skew, damp, bias, i;
|
|
size_t delta, w, k, t;
|
|
|
|
if (rdm->errored || rdm->skipping_printing)
|
|
return;
|
|
|
|
if (rdm->version == -1)
|
|
{
|
|
/* Ignore leading underscores preceding escape sequences.
|
|
The mangler inserts an underscore to make sure the
|
|
identifier begins with a XID_Start character. */
|
|
if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
|
|
&& ident.ascii[1] == '$')
|
|
{
|
|
ident.ascii++;
|
|
ident.ascii_len--;
|
|
}
|
|
|
|
while (ident.ascii_len > 0)
|
|
{
|
|
/* Handle legacy escape sequences ("$...$", ".." or "."). */
|
|
if (ident.ascii[0] == '$')
|
|
{
|
|
unescaped
|
|
= decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
|
|
if (unescaped)
|
|
print_str (rdm, &unescaped, 1);
|
|
else
|
|
{
|
|
/* Unexpected escape sequence, print the rest verbatim. */
|
|
print_str (rdm, ident.ascii, ident.ascii_len);
|
|
return;
|
|
}
|
|
}
|
|
else if (ident.ascii[0] == '.')
|
|
{
|
|
if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
|
|
{
|
|
/* ".." becomes "::" */
|
|
PRINT ("::");
|
|
len = 2;
|
|
}
|
|
else
|
|
{
|
|
PRINT (".");
|
|
len = 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Print everything before the next escape sequence, at once. */
|
|
for (len = 0; len < ident.ascii_len; len++)
|
|
if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
|
|
break;
|
|
|
|
print_str (rdm, ident.ascii, len);
|
|
}
|
|
|
|
ident.ascii += len;
|
|
ident.ascii_len -= len;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (!ident.punycode)
|
|
{
|
|
print_str (rdm, ident.ascii, ident.ascii_len);
|
|
return;
|
|
}
|
|
|
|
len = 0;
|
|
cap = 4;
|
|
while (cap < ident.ascii_len)
|
|
{
|
|
cap *= 2;
|
|
/* Check for overflows. */
|
|
if ((cap * 4) / 4 != cap)
|
|
{
|
|
rdm->errored = 1;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Store the output codepoints as groups of 4 UTF-8 bytes. */
|
|
out = (uint8_t *)malloc (cap * 4);
|
|
if (!out)
|
|
{
|
|
rdm->errored = 1;
|
|
return;
|
|
}
|
|
|
|
/* Populate initial output from ASCII fragment. */
|
|
for (len = 0; len < ident.ascii_len; len++)
|
|
{
|
|
p = out + 4 * len;
|
|
p[0] = 0;
|
|
p[1] = 0;
|
|
p[2] = 0;
|
|
p[3] = ident.ascii[len];
|
|
}
|
|
|
|
/* Punycode parameters and initial state. */
|
|
base = 36;
|
|
t_min = 1;
|
|
t_max = 26;
|
|
skew = 38;
|
|
damp = 700;
|
|
bias = 72;
|
|
i = 0;
|
|
c = 0x80;
|
|
|
|
punycode_pos = 0;
|
|
while (punycode_pos < ident.punycode_len)
|
|
{
|
|
/* Read one delta value. */
|
|
delta = 0;
|
|
w = 1;
|
|
k = 0;
|
|
do
|
|
{
|
|
k += base;
|
|
t = k < bias ? 0 : (k - bias);
|
|
if (t < t_min)
|
|
t = t_min;
|
|
if (t > t_max)
|
|
t = t_max;
|
|
|
|
if (punycode_pos >= ident.punycode_len)
|
|
goto cleanup;
|
|
d = ident.punycode[punycode_pos++];
|
|
|
|
if (ISLOWER (d))
|
|
d = d - 'a';
|
|
else if (ISDIGIT (d))
|
|
d = 26 + (d - '0');
|
|
else
|
|
{
|
|
rdm->errored = 1;
|
|
goto cleanup;
|
|
}
|
|
|
|
delta += d * w;
|
|
w *= base - t;
|
|
}
|
|
while (d >= t);
|
|
|
|
/* Compute the new insert position and character. */
|
|
len++;
|
|
i += delta;
|
|
c += i / len;
|
|
i %= len;
|
|
|
|
/* Ensure enough space is available. */
|
|
if (cap < len)
|
|
{
|
|
cap *= 2;
|
|
/* Check for overflows. */
|
|
if ((cap * 4) / 4 != cap || cap < len)
|
|
{
|
|
rdm->errored = 1;
|
|
goto cleanup;
|
|
}
|
|
}
|
|
p = (uint8_t *)realloc (out, cap * 4);
|
|
if (!p)
|
|
{
|
|
rdm->errored = 1;
|
|
goto cleanup;
|
|
}
|
|
out = p;
|
|
|
|
/* Move the characters after the insert position. */
|
|
p = out + i * 4;
|
|
memmove (p + 4, p, (len - i - 1) * 4);
|
|
|
|
/* Insert the new character, as UTF-8 bytes. */
|
|
p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
|
|
p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
|
|
p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
|
|
p[3] = 0x80 | (c & 0x3f);
|
|
|
|
/* If there are no more deltas, decoding is complete. */
|
|
if (punycode_pos == ident.punycode_len)
|
|
break;
|
|
|
|
i++;
|
|
|
|
/* Perform bias adaptation. */
|
|
delta /= damp;
|
|
damp = 2;
|
|
|
|
delta += delta / len;
|
|
k = 0;
|
|
while (delta > ((base - t_min) * t_max) / 2)
|
|
{
|
|
delta /= base - t_min;
|
|
k += base;
|
|
}
|
|
bias = k + ((base - t_min + 1) * delta) / (delta + skew);
|
|
}
|
|
|
|
/* Remove all the 0 bytes to leave behind an UTF-8 string. */
|
|
for (i = 0, j = 0; i < len * 4; i++)
|
|
if (out[i] != 0)
|
|
out[j++] = out[i];
|
|
|
|
print_str (rdm, (const char *)out, j);
|
|
|
|
cleanup:
|
|
free (out);
|
|
}
|
|
|
|
/* Print the lifetime according to the previously decoded index.
|
|
An index of `0` always refers to `'_`, but starting with `1`,
|
|
indices refer to late-bound lifetimes introduced by a binder. */
|
|
static void
|
|
print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
|
|
{
|
|
char c;
|
|
uint64_t depth;
|
|
|
|
PRINT ("'");
|
|
if (lt == 0)
|
|
{
|
|
PRINT ("_");
|
|
return;
|
|
}
|
|
|
|
depth = rdm->bound_lifetime_depth - lt;
|
|
/* Try to print lifetimes alphabetically first. */
|
|
if (depth < 26)
|
|
{
|
|
c = 'a' + depth;
|
|
print_str (rdm, &c, 1);
|
|
}
|
|
else
|
|
{
|
|
/* Use `'_123` after running out of letters. */
|
|
PRINT ("_");
|
|
print_uint64 (rdm, depth);
|
|
}
|
|
}
|
|
|
|
/* Demangling functions. */
|
|
|
|
static void demangle_binder (struct rust_demangler *rdm);
|
|
static void demangle_path (struct rust_demangler *rdm, int in_value);
|
|
static void demangle_generic_arg (struct rust_demangler *rdm);
|
|
static void demangle_type (struct rust_demangler *rdm);
|
|
static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
|
|
static void demangle_dyn_trait (struct rust_demangler *rdm);
|
|
static void demangle_const (struct rust_demangler *rdm);
|
|
static void demangle_const_uint (struct rust_demangler *rdm);
|
|
static void demangle_const_int (struct rust_demangler *rdm);
|
|
static void demangle_const_bool (struct rust_demangler *rdm);
|
|
static void demangle_const_char (struct rust_demangler *rdm);
|
|
|
|
/* Optionally enter a binder ('G') for late-bound lifetimes,
|
|
printing e.g. `for<'a, 'b> `, and make those lifetimes visible
|
|
to the caller (via depth level, which the caller should reset). */
|
|
static void
|
|
demangle_binder (struct rust_demangler *rdm)
|
|
{
|
|
uint64_t i, bound_lifetimes;
|
|
|
|
if (rdm->errored)
|
|
return;
|
|
|
|
bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
|
|
if (bound_lifetimes > 0)
|
|
{
|
|
PRINT ("for<");
|
|
for (i = 0; i < bound_lifetimes; i++)
|
|
{
|
|
if (i > 0)
|
|
PRINT (", ");
|
|
rdm->bound_lifetime_depth++;
|
|
print_lifetime_from_index (rdm, 1);
|
|
}
|
|
PRINT ("> ");
|
|
}
|
|
}
|
|
|
|
static void
|
|
demangle_path (struct rust_demangler *rdm, int in_value)
|
|
{
|
|
char tag, ns;
|
|
int was_skipping_printing;
|
|
size_t i, backref, old_next;
|
|
uint64_t dis;
|
|
struct rust_mangled_ident name;
|
|
|
|
if (rdm->errored)
|
|
return;
|
|
|
|
switch (tag = next (rdm))
|
|
{
|
|
case 'C':
|
|
dis = parse_disambiguator (rdm);
|
|
name = parse_ident (rdm);
|
|
|
|
print_ident (rdm, name);
|
|
if (rdm->verbose)
|
|
{
|
|
PRINT ("[");
|
|
print_uint64_hex (rdm, dis);
|
|
PRINT ("]");
|
|
}
|
|
break;
|
|
case 'N':
|
|
ns = next (rdm);
|
|
if (!ISLOWER (ns) && !ISUPPER (ns))
|
|
{
|
|
rdm->errored = 1;
|
|
return;
|
|
}
|
|
|
|
demangle_path (rdm, in_value);
|
|
|
|
dis = parse_disambiguator (rdm);
|
|
name = parse_ident (rdm);
|
|
|
|
if (ISUPPER (ns))
|
|
{
|
|
/* Special namespaces, like closures and shims. */
|
|
PRINT ("::{");
|
|
switch (ns)
|
|
{
|
|
case 'C':
|
|
PRINT ("closure");
|
|
break;
|
|
case 'S':
|
|
PRINT ("shim");
|
|
break;
|
|
default:
|
|
print_str (rdm, &ns, 1);
|
|
}
|
|
if (name.ascii || name.punycode)
|
|
{
|
|
PRINT (":");
|
|
print_ident (rdm, name);
|
|
}
|
|
PRINT ("#");
|
|
print_uint64 (rdm, dis);
|
|
PRINT ("}");
|
|
}
|
|
else
|
|
{
|
|
/* Implementation-specific/unspecified namespaces. */
|
|
|
|
if (name.ascii || name.punycode)
|
|
{
|
|
PRINT ("::");
|
|
print_ident (rdm, name);
|
|
}
|
|
}
|
|
break;
|
|
case 'M':
|
|
case 'X':
|
|
/* Ignore the `impl`'s own path.*/
|
|
parse_disambiguator (rdm);
|
|
was_skipping_printing = rdm->skipping_printing;
|
|
rdm->skipping_printing = 1;
|
|
demangle_path (rdm, in_value);
|
|
rdm->skipping_printing = was_skipping_printing;
|
|
/* fallthrough */
|
|
case 'Y':
|
|
PRINT ("<");
|
|
demangle_type (rdm);
|
|
if (tag != 'M')
|
|
{
|
|
PRINT (" as ");
|
|
demangle_path (rdm, 0);
|
|
}
|
|
PRINT (">");
|
|
break;
|
|
case 'I':
|
|
demangle_path (rdm, in_value);
|
|
if (in_value)
|
|
PRINT ("::");
|
|
PRINT ("<");
|
|
for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
|
|
{
|
|
if (i > 0)
|
|
PRINT (", ");
|
|
demangle_generic_arg (rdm);
|
|
}
|
|
PRINT (">");
|
|
break;
|
|
case 'B':
|
|
backref = parse_integer_62 (rdm);
|
|
if (!rdm->skipping_printing)
|
|
{
|
|
old_next = rdm->next;
|
|
rdm->next = backref;
|
|
demangle_path (rdm, in_value);
|
|
rdm->next = old_next;
|
|
}
|
|
break;
|
|
default:
|
|
rdm->errored = 1;
|
|
return;
|
|
}
|
|
}
|
|
|
|
static void
|
|
demangle_generic_arg (struct rust_demangler *rdm)
|
|
{
|
|
uint64_t lt;
|
|
if (eat (rdm, 'L'))
|
|
{
|
|
lt = parse_integer_62 (rdm);
|
|
print_lifetime_from_index (rdm, lt);
|
|
}
|
|
else if (eat (rdm, 'K'))
|
|
demangle_const (rdm);
|
|
else
|
|
demangle_type (rdm);
|
|
}
|
|
|
|
static const char *
|
|
basic_type (char tag)
|
|
{
|
|
switch (tag)
|
|
{
|
|
case 'b':
|
|
return "bool";
|
|
case 'c':
|
|
return "char";
|
|
case 'e':
|
|
return "str";
|
|
case 'u':
|
|
return "()";
|
|
case 'a':
|
|
return "i8";
|
|
case 's':
|
|
return "i16";
|
|
case 'l':
|
|
return "i32";
|
|
case 'x':
|
|
return "i64";
|
|
case 'n':
|
|
return "i128";
|
|
case 'i':
|
|
return "isize";
|
|
case 'h':
|
|
return "u8";
|
|
case 't':
|
|
return "u16";
|
|
case 'm':
|
|
return "u32";
|
|
case 'y':
|
|
return "u64";
|
|
case 'o':
|
|
return "u128";
|
|
case 'j':
|
|
return "usize";
|
|
case 'f':
|
|
return "f32";
|
|
case 'd':
|
|
return "f64";
|
|
case 'z':
|
|
return "!";
|
|
case 'p':
|
|
return "_";
|
|
case 'v':
|
|
return "...";
|
|
|
|
default:
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
static void
|
|
demangle_type (struct rust_demangler *rdm)
|
|
{
|
|
char tag;
|
|
size_t i, old_next, backref;
|
|
uint64_t lt, old_bound_lifetime_depth;
|
|
const char *basic;
|
|
struct rust_mangled_ident abi;
|
|
|
|
if (rdm->errored)
|
|
return;
|
|
|
|
tag = next (rdm);
|
|
|
|
basic = basic_type (tag);
|
|
if (basic)
|
|
{
|
|
PRINT (basic);
|
|
return;
|
|
}
|
|
|
|
switch (tag)
|
|
{
|
|
case 'R':
|
|
case 'Q':
|
|
PRINT ("&");
|
|
if (eat (rdm, 'L'))
|
|
{
|
|
lt = parse_integer_62 (rdm);
|
|
if (lt)
|
|
{
|
|
print_lifetime_from_index (rdm, lt);
|
|
PRINT (" ");
|
|
}
|
|
}
|
|
if (tag != 'R')
|
|
PRINT ("mut ");
|
|
demangle_type (rdm);
|
|
break;
|
|
case 'P':
|
|
case 'O':
|
|
PRINT ("*");
|
|
if (tag != 'P')
|
|
PRINT ("mut ");
|
|
else
|
|
PRINT ("const ");
|
|
demangle_type (rdm);
|
|
break;
|
|
case 'A':
|
|
case 'S':
|
|
PRINT ("[");
|
|
demangle_type (rdm);
|
|
if (tag == 'A')
|
|
{
|
|
PRINT ("; ");
|
|
demangle_const (rdm);
|
|
}
|
|
PRINT ("]");
|
|
break;
|
|
case 'T':
|
|
PRINT ("(");
|
|
for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
|
|
{
|
|
if (i > 0)
|
|
PRINT (", ");
|
|
demangle_type (rdm);
|
|
}
|
|
if (i == 1)
|
|
PRINT (",");
|
|
PRINT (")");
|
|
break;
|
|
case 'F':
|
|
old_bound_lifetime_depth = rdm->bound_lifetime_depth;
|
|
demangle_binder (rdm);
|
|
|
|
if (eat (rdm, 'U'))
|
|
PRINT ("unsafe ");
|
|
|
|
if (eat (rdm, 'K'))
|
|
{
|
|
if (eat (rdm, 'C'))
|
|
{
|
|
abi.ascii = "C";
|
|
abi.ascii_len = 1;
|
|
}
|
|
else
|
|
{
|
|
abi = parse_ident (rdm);
|
|
if (!abi.ascii || abi.punycode)
|
|
{
|
|
rdm->errored = 1;
|
|
goto restore;
|
|
}
|
|
}
|
|
|
|
PRINT ("extern \"");
|
|
|
|
/* If the ABI had any `-`, they were replaced with `_`,
|
|
so the parts between `_` have to be re-joined with `-`. */
|
|
for (i = 0; i < abi.ascii_len; i++)
|
|
{
|
|
if (abi.ascii[i] == '_')
|
|
{
|
|
print_str (rdm, abi.ascii, i);
|
|
PRINT ("-");
|
|
abi.ascii += i + 1;
|
|
abi.ascii_len -= i + 1;
|
|
i = 0;
|
|
}
|
|
}
|
|
print_str (rdm, abi.ascii, abi.ascii_len);
|
|
|
|
PRINT ("\" ");
|
|
}
|
|
|
|
PRINT ("fn(");
|
|
for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
|
|
{
|
|
if (i > 0)
|
|
PRINT (", ");
|
|
demangle_type (rdm);
|
|
}
|
|
PRINT (")");
|
|
|
|
if (eat (rdm, 'u'))
|
|
{
|
|
/* Skip printing the return type if it's 'u', i.e. `()`. */
|
|
}
|
|
else
|
|
{
|
|
PRINT (" -> ");
|
|
demangle_type (rdm);
|
|
}
|
|
|
|
/* Restore `bound_lifetime_depth` to outside the binder. */
|
|
restore:
|
|
rdm->bound_lifetime_depth = old_bound_lifetime_depth;
|
|
break;
|
|
case 'D':
|
|
PRINT ("dyn ");
|
|
|
|
old_bound_lifetime_depth = rdm->bound_lifetime_depth;
|
|
demangle_binder (rdm);
|
|
|
|
for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
|
|
{
|
|
if (i > 0)
|
|
PRINT (" + ");
|
|
demangle_dyn_trait (rdm);
|
|
}
|
|
|
|
/* Restore `bound_lifetime_depth` to outside the binder. */
|
|
rdm->bound_lifetime_depth = old_bound_lifetime_depth;
|
|
|
|
if (!eat (rdm, 'L'))
|
|
{
|
|
rdm->errored = 1;
|
|
return;
|
|
}
|
|
lt = parse_integer_62 (rdm);
|
|
if (lt)
|
|
{
|
|
PRINT (" + ");
|
|
print_lifetime_from_index (rdm, lt);
|
|
}
|
|
break;
|
|
case 'B':
|
|
backref = parse_integer_62 (rdm);
|
|
if (!rdm->skipping_printing)
|
|
{
|
|
old_next = rdm->next;
|
|
rdm->next = backref;
|
|
demangle_type (rdm);
|
|
rdm->next = old_next;
|
|
}
|
|
break;
|
|
default:
|
|
/* Go back to the tag, so `demangle_path` also sees it. */
|
|
rdm->next--;
|
|
demangle_path (rdm, 0);
|
|
}
|
|
}
|
|
|
|
/* A trait in a trait object may have some "existential projections"
|
|
(i.e. associated type bindings) after it, which should be printed
|
|
in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
|
|
To this end, this method will keep the `<...>` of an 'I' path
|
|
open, by omitting the `>`, and return `Ok(true)` in that case. */
|
|
static int
|
|
demangle_path_maybe_open_generics (struct rust_demangler *rdm)
|
|
{
|
|
int open;
|
|
size_t i, old_next, backref;
|
|
|
|
open = 0;
|
|
|
|
if (rdm->errored)
|
|
return open;
|
|
|
|
if (eat (rdm, 'B'))
|
|
{
|
|
backref = parse_integer_62 (rdm);
|
|
if (!rdm->skipping_printing)
|
|
{
|
|
old_next = rdm->next;
|
|
rdm->next = backref;
|
|
open = demangle_path_maybe_open_generics (rdm);
|
|
rdm->next = old_next;
|
|
}
|
|
}
|
|
else if (eat (rdm, 'I'))
|
|
{
|
|
demangle_path (rdm, 0);
|
|
PRINT ("<");
|
|
open = 1;
|
|
for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
|
|
{
|
|
if (i > 0)
|
|
PRINT (", ");
|
|
demangle_generic_arg (rdm);
|
|
}
|
|
}
|
|
else
|
|
demangle_path (rdm, 0);
|
|
return open;
|
|
}
|
|
|
|
static void
|
|
demangle_dyn_trait (struct rust_demangler *rdm)
|
|
{
|
|
int open;
|
|
struct rust_mangled_ident name;
|
|
|
|
if (rdm->errored)
|
|
return;
|
|
|
|
open = demangle_path_maybe_open_generics (rdm);
|
|
|
|
while (eat (rdm, 'p'))
|
|
{
|
|
if (!open)
|
|
PRINT ("<");
|
|
else
|
|
PRINT (", ");
|
|
open = 1;
|
|
|
|
name = parse_ident (rdm);
|
|
print_ident (rdm, name);
|
|
PRINT (" = ");
|
|
demangle_type (rdm);
|
|
}
|
|
|
|
if (open)
|
|
PRINT (">");
|
|
}
|
|
|
|
static void
|
|
demangle_const (struct rust_demangler *rdm)
|
|
{
|
|
char ty_tag;
|
|
size_t old_next, backref;
|
|
|
|
if (rdm->errored)
|
|
return;
|
|
|
|
if (eat (rdm, 'B'))
|
|
{
|
|
backref = parse_integer_62 (rdm);
|
|
if (!rdm->skipping_printing)
|
|
{
|
|
old_next = rdm->next;
|
|
rdm->next = backref;
|
|
demangle_const (rdm);
|
|
rdm->next = old_next;
|
|
}
|
|
return;
|
|
}
|
|
|
|
ty_tag = next (rdm);
|
|
switch (ty_tag)
|
|
{
|
|
/* Placeholder. */
|
|
case 'p':
|
|
PRINT ("_");
|
|
return;
|
|
|
|
/* Unsigned integer types. */
|
|
case 'h':
|
|
case 't':
|
|
case 'm':
|
|
case 'y':
|
|
case 'o':
|
|
case 'j':
|
|
demangle_const_uint (rdm);
|
|
break;
|
|
|
|
/* Signed integer types. */
|
|
case 'a':
|
|
case 's':
|
|
case 'l':
|
|
case 'x':
|
|
case 'n':
|
|
case 'i':
|
|
demangle_const_int (rdm);
|
|
break;
|
|
|
|
/* Boolean. */
|
|
case 'b':
|
|
demangle_const_bool (rdm);
|
|
break;
|
|
|
|
/* Character. */
|
|
case 'c':
|
|
demangle_const_char (rdm);
|
|
break;
|
|
|
|
default:
|
|
rdm->errored = 1;
|
|
return;
|
|
}
|
|
|
|
if (rdm->errored)
|
|
return;
|
|
|
|
if (rdm->verbose)
|
|
{
|
|
PRINT (": ");
|
|
PRINT (basic_type (ty_tag));
|
|
}
|
|
}
|
|
|
|
static void
|
|
demangle_const_uint (struct rust_demangler *rdm)
|
|
{
|
|
size_t hex_len;
|
|
uint64_t value;
|
|
|
|
if (rdm->errored)
|
|
return;
|
|
|
|
hex_len = parse_hex_nibbles (rdm, &value);
|
|
|
|
if (hex_len > 16)
|
|
{
|
|
/* Print anything that doesn't fit in `uint64_t` verbatim. */
|
|
PRINT ("0x");
|
|
print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
|
|
}
|
|
else if (hex_len > 0)
|
|
print_uint64 (rdm, value);
|
|
else
|
|
rdm->errored = 1;
|
|
}
|
|
|
|
static void
|
|
demangle_const_int (struct rust_demangler *rdm)
|
|
{
|
|
if (eat (rdm, 'n'))
|
|
PRINT ("-");
|
|
demangle_const_uint (rdm);
|
|
}
|
|
|
|
static void
|
|
demangle_const_bool (struct rust_demangler *rdm)
|
|
{
|
|
uint64_t value;
|
|
|
|
if (parse_hex_nibbles (rdm, &value) != 1)
|
|
{
|
|
rdm->errored = 1;
|
|
return;
|
|
}
|
|
|
|
if (value == 0)
|
|
PRINT ("false");
|
|
else if (value == 1)
|
|
PRINT ("true");
|
|
else
|
|
rdm->errored = 1;
|
|
}
|
|
|
|
static void
|
|
demangle_const_char (struct rust_demangler *rdm)
|
|
{
|
|
size_t hex_len;
|
|
uint64_t value;
|
|
|
|
hex_len = parse_hex_nibbles (rdm, &value);
|
|
|
|
if (hex_len == 0 || hex_len > 8)
|
|
{
|
|
rdm->errored = 1;
|
|
return;
|
|
}
|
|
|
|
/* Match Rust's character "debug" output as best as we can. */
|
|
PRINT ("'");
|
|
if (value == '\t')
|
|
PRINT ("\\t");
|
|
else if (value == '\r')
|
|
PRINT ("\\r");
|
|
else if (value == '\n')
|
|
PRINT ("\\n");
|
|
else if (value > ' ' && value < '~')
|
|
/* Rust also considers many non-ASCII codepoints to be printable, but
|
|
that logic is not easily ported to C. */
|
|
print_str (rdm, (char *) &value, 1);
|
|
else
|
|
{
|
|
PRINT ("\\u{");
|
|
print_uint64_hex (rdm, value);
|
|
PRINT ("}");
|
|
}
|
|
PRINT ("'");
|
|
}
|
|
|
|
/* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
|
|
The hex digits must contain at least 5 distinct digits. */
|
|
static int
|
|
is_legacy_prefixed_hash (struct rust_mangled_ident ident)
|
|
{
|
|
uint16_t seen;
|
|
int nibble;
|
|
size_t i, count;
|
|
|
|
if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
|
|
return 0;
|
|
|
|
seen = 0;
|
|
for (i = 0; i < 16; i++)
|
|
{
|
|
nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
|
|
if (nibble < 0)
|
|
return 0;
|
|
seen |= (uint16_t)1 << nibble;
|
|
}
|
|
|
|
/* Count how many distinct digits were seen. */
|
|
count = 0;
|
|
while (seen)
|
|
{
|
|
if (seen & 1)
|
|
count++;
|
|
seen >>= 1;
|
|
}
|
|
|
|
return count >= 5;
|
|
}
|
|
|
|
int
|
|
rust_demangle_callback (const char *mangled, int options,
|
|
demangle_callbackref callback, void *opaque)
|
|
{
|
|
const char *p;
|
|
struct rust_demangler rdm;
|
|
struct rust_mangled_ident ident;
|
|
|
|
rdm.sym = mangled;
|
|
rdm.sym_len = 0;
|
|
|
|
rdm.callback_opaque = opaque;
|
|
rdm.callback = callback;
|
|
|
|
rdm.next = 0;
|
|
rdm.errored = 0;
|
|
rdm.skipping_printing = 0;
|
|
rdm.verbose = (options & DMGL_VERBOSE) != 0;
|
|
rdm.version = 0;
|
|
rdm.bound_lifetime_depth = 0;
|
|
|
|
/* Rust symbols always start with _R (v0) or _ZN (legacy). */
|
|
if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
|
|
rdm.sym += 2;
|
|
else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
|
|
{
|
|
rdm.sym += 3;
|
|
rdm.version = -1;
|
|
}
|
|
else
|
|
return 0;
|
|
|
|
/* Paths (v0) always start with uppercase characters. */
|
|
if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
|
|
return 0;
|
|
|
|
/* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
|
|
for (p = rdm.sym; *p; p++)
|
|
{
|
|
rdm.sym_len++;
|
|
|
|
if (*p == '_' || ISALNUM (*p))
|
|
continue;
|
|
|
|
/* Legacy Rust symbols can also contain [.:$] characters. */
|
|
if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
|
|
continue;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Legacy Rust symbols need to be handled separately. */
|
|
if (rdm.version == -1)
|
|
{
|
|
/* Legacy Rust symbols always end with E. */
|
|
if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
|
|
return 0;
|
|
rdm.sym_len--;
|
|
|
|
/* Legacy Rust symbols also always end with a path segment
|
|
that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
|
|
This early check, before any parse_ident calls, should
|
|
quickly filter out most C++ symbols unrelated to Rust. */
|
|
if (!(rdm.sym_len > 19
|
|
&& !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
|
|
return 0;
|
|
|
|
do
|
|
{
|
|
ident = parse_ident (&rdm);
|
|
if (rdm.errored || !ident.ascii)
|
|
return 0;
|
|
}
|
|
while (rdm.next < rdm.sym_len);
|
|
|
|
/* The last path segment should be the hash. */
|
|
if (!is_legacy_prefixed_hash (ident))
|
|
return 0;
|
|
|
|
/* Reset the state for a second pass, to print the symbol. */
|
|
rdm.next = 0;
|
|
if (!rdm.verbose && rdm.sym_len > 19)
|
|
{
|
|
/* Hide the last segment, containing the hash, if not verbose. */
|
|
rdm.sym_len -= 19;
|
|
}
|
|
|
|
do
|
|
{
|
|
if (rdm.next > 0)
|
|
print_str (&rdm, "::", 2);
|
|
|
|
ident = parse_ident (&rdm);
|
|
print_ident (&rdm, ident);
|
|
}
|
|
while (rdm.next < rdm.sym_len);
|
|
}
|
|
else
|
|
{
|
|
demangle_path (&rdm, 1);
|
|
|
|
/* Skip instantiating crate. */
|
|
if (!rdm.errored && rdm.next < rdm.sym_len)
|
|
{
|
|
rdm.skipping_printing = 1;
|
|
demangle_path (&rdm, 0);
|
|
}
|
|
|
|
/* It's an error to not reach the end. */
|
|
rdm.errored |= rdm.next != rdm.sym_len;
|
|
}
|
|
|
|
return !rdm.errored;
|
|
}
|
|
|
|
/* Growable string buffers. */
|
|
struct str_buf
|
|
{
|
|
char *ptr;
|
|
size_t len;
|
|
size_t cap;
|
|
int errored;
|
|
};
|
|
|
|
static void
|
|
str_buf_reserve (struct str_buf *buf, size_t extra)
|
|
{
|
|
size_t available, min_new_cap, new_cap;
|
|
char *new_ptr;
|
|
|
|
/* Allocation failed before. */
|
|
if (buf->errored)
|
|
return;
|
|
|
|
available = buf->cap - buf->len;
|
|
|
|
if (extra <= available)
|
|
return;
|
|
|
|
min_new_cap = buf->cap + (extra - available);
|
|
|
|
/* Check for overflows. */
|
|
if (min_new_cap < buf->cap)
|
|
{
|
|
buf->errored = 1;
|
|
return;
|
|
}
|
|
|
|
new_cap = buf->cap;
|
|
|
|
if (new_cap == 0)
|
|
new_cap = 4;
|
|
|
|
/* Double capacity until sufficiently large. */
|
|
while (new_cap < min_new_cap)
|
|
{
|
|
new_cap *= 2;
|
|
|
|
/* Check for overflows. */
|
|
if (new_cap < buf->cap)
|
|
{
|
|
buf->errored = 1;
|
|
return;
|
|
}
|
|
}
|
|
|
|
new_ptr = (char *)realloc (buf->ptr, new_cap);
|
|
if (new_ptr == NULL)
|
|
{
|
|
free (buf->ptr);
|
|
buf->ptr = NULL;
|
|
buf->len = 0;
|
|
buf->cap = 0;
|
|
buf->errored = 1;
|
|
}
|
|
else
|
|
{
|
|
buf->ptr = new_ptr;
|
|
buf->cap = new_cap;
|
|
}
|
|
}
|
|
|
|
static void
|
|
str_buf_append (struct str_buf *buf, const char *data, size_t len)
|
|
{
|
|
str_buf_reserve (buf, len);
|
|
if (buf->errored)
|
|
return;
|
|
|
|
memcpy (buf->ptr + buf->len, data, len);
|
|
buf->len += len;
|
|
}
|
|
|
|
static void
|
|
str_buf_demangle_callback (const char *data, size_t len, void *opaque)
|
|
{
|
|
str_buf_append ((struct str_buf *)opaque, data, len);
|
|
}
|
|
|
|
char *
|
|
rust_demangle (const char *mangled, int options)
|
|
{
|
|
struct str_buf out;
|
|
int success;
|
|
|
|
out.ptr = NULL;
|
|
out.len = 0;
|
|
out.cap = 0;
|
|
out.errored = 0;
|
|
|
|
success = rust_demangle_callback (mangled, options,
|
|
str_buf_demangle_callback, &out);
|
|
|
|
if (!success)
|
|
{
|
|
free (out.ptr);
|
|
return NULL;
|
|
}
|
|
|
|
str_buf_append (&out, "\0", 1);
|
|
return out.ptr;
|
|
}
|