Skip to content
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ The current port status by category is:
* [[#26] v0: allow identifiers to start with a digit.](https://github.com/rust-lang/rustc-demangle/pull/26)
* [[#53] v0: replace `skip_*` methods with `print_*` methods in a "skip printing" mode.](https://github.com/rust-lang/rustc-demangle/pull/53)
* arguably backported to Rust, as the C port always took this approach
* `min_const_generics` constants (`bool`, `char`, negative signed integers)
* this arguably also includes `p` as an *untyped* placeholder constant
* **(UNPORTED)** symbol prefix flexibility (`__R` and `R`, instead of `_R`)
* **(UNPORTED)** `min_const_generics` constants (`bool`, `char`, negative signed integers)
* this arguably also includes `p` as an *untyped* placeholder constant
* **(UNPORTED)** [`str` and structural constants](https://github.com/rust-lang/rfcs/pull/3161)
(only usable in `const` generics on unstable Rust)
* **(UNPORTED)** recursion limits
Expand Down
234 changes: 209 additions & 25 deletions rust-demangle.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,60 @@ static void print_uint64_hex(struct rust_demangler *rdm, uint64_t x) {
PRINT(s);
}

static void print_quoted_escaped_char(struct rust_demangler *rdm, char quote, uint32_t c) {
switch (c) {
case '\0':
PRINT("\\0");
break;

case '\t':
PRINT("\\t");
break;

case '\r':
PRINT("\\r");
break;

case '\n':
PRINT("\\n");
break;

case '\\':
PRINT("\\\\");
break;

case '"':
if (quote == '"') {
PRINT("\\\"");
} else {
PRINT("\"");
}
break;

case '\'':
if (quote == '\'') {
PRINT("\\'");
} else {
PRINT("'");
}
break;

default:
if (c >= 0x20 && c <= 0x7e) {
// Printable ASCII
char v = (char)c;
print_str(rdm, &v, 1);
} else {
// FIXME show printable unicode characters without hex encoding
PRINT("\\u{");
char s[9] = {0};
sprintf(s, "%" PRIx32, c);
PRINT(s);
PRINT("}");
}
}
}

static void
print_ident(struct rust_demangler *rdm, struct rust_mangled_ident ident) {
if (rdm->errored || rdm->skipping_printing)
Expand Down Expand Up @@ -342,8 +396,9 @@ static void demangle_generic_arg(struct rust_demangler *rdm);
static void demangle_type(struct rust_demangler *rdm);
static bool demangle_path_maybe_open_generics(struct rust_demangler *rdm);
static void demangle_dyn_trait(struct rust_demangler *rdm);
static void demangle_const(struct rust_demangler *rdm);
static void demangle_const_uint(struct rust_demangler *rdm);
static void demangle_const(struct rust_demangler *rdm, bool in_value);
static void demangle_const_uint(struct rust_demangler *rdm, char ty_tag);
static void demangle_const_str_literal(struct rust_demangler *rdm);

/// Optionally enter a binder ('G') for late-bound lifetimes,
/// printing e.g. `for<'a, 'b> `, and make those lifetimes visible
Expand Down Expand Up @@ -470,7 +525,7 @@ static void demangle_generic_arg(struct rust_demangler *rdm) {
uint64_t lt = parse_integer_62(rdm);
print_lifetime_from_index(rdm, lt);
} else if (eat(rdm, 'K'))
demangle_const(rdm);
demangle_const(rdm, false);
else
demangle_type(rdm);
}
Expand Down Expand Up @@ -566,7 +621,7 @@ static void demangle_type(struct rust_demangler *rdm) {
demangle_type(rdm);
if (tag == 'A') {
PRINT("; ");
demangle_const(rdm);
demangle_const(rdm, true);
}
PRINT("]");
break;
Expand Down Expand Up @@ -732,45 +787,138 @@ static void demangle_dyn_trait(struct rust_demangler *rdm) {
PRINT(">");
}

static void demangle_const(struct rust_demangler *rdm) {
static void demangle_const(struct rust_demangler *rdm, bool in_value) {
CHECK_OR(!rdm->errored, return );

if (eat(rdm, 'B')) {
size_t backref = parse_integer_62(rdm);
if (!rdm->skipping_printing) {
size_t old_next = rdm->next;
rdm->next = backref;
demangle_const(rdm);
rdm->next = old_next;
}
return;
}
bool opened_brace = false;

char ty_tag = next(rdm);
switch (ty_tag) {
case 'p':
PRINT("_");
break;

// Unsigned integer types.
case 'h':
case 't':
case 'm':
case 'y':
case 'o':
case 'j':
demangle_const_uint(rdm, ty_tag);
break;

case 'a':
case 's':
case 'l':
case 'x':
case 'n':
case 'i':
if (eat(rdm, 'n')) {
PRINT("-");
}
demangle_const_uint(rdm, ty_tag);
break;

case 'b': {
uint64_t value = 0;
size_t hex_len = 0;
while (!eat(rdm, '_')) {
value <<= 4;

char c = next(rdm);
if (IS_DIGIT(c))
value |= c - '0';
else if (c >= 'a' && c <= 'f')
value |= 10 + (c - 'a');
else
ERROR_AND(return );
hex_len++;
}

if (value == 0) {
PRINT("false");
} else if (value == 1) {
PRINT("true");
} else {
ERROR_AND(return );
}
break;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we require b0_ and b1_? Oh, no, we don't, that got changed with try_parse_uint.
And demangle_const_uint doesn't have a try_parse_uint because of how it handles hex fallback, ugh.


case 'c': {
uint64_t value = 0;
size_t hex_len = 0;
while (!eat(rdm, '_')) {
value <<= 4;

char c = next(rdm);
if (IS_DIGIT(c))
value |= c - '0';
else if (c >= 'a' && c <= 'f')
value |= 10 + (c - 'a');
else
ERROR_AND(return );
hex_len++;
}

if (value >= 0x10FFFF)
ERROR_AND(return );

if (value >= 0xD800 && value <= 0xDFFF)
ERROR_AND(return );

PRINT("'");
print_quoted_escaped_char(rdm, '\'', value);
PRINT("'");

break;
}

case 'e':
if (!in_value) {
opened_brace = true;
PRINT("{");
}
PRINT("*");

demangle_const_str_literal(rdm);
break;

case 'R':
case 'Q':
ERROR_AND(return ); // FIXME

case 'A':
ERROR_AND(return ); // FIXME

case 'T':
ERROR_AND(return ); // FIXME

case 'V':
ERROR_AND(return ); // FIXME

case 'B': {
size_t backref = parse_integer_62(rdm);
if (!rdm->skipping_printing) {
size_t old_next = rdm->next;
rdm->next = backref;
demangle_const(rdm, in_value);
rdm->next = old_next;
}
break;
}

default:
ERROR_AND(return );
}

if (eat(rdm, 'p'))
PRINT("_");
else {
demangle_const_uint(rdm);
if (rdm->verbose)
PRINT(basic_type(ty_tag));
if (opened_brace) {
PRINT("}");
}
}

static void demangle_const_uint(struct rust_demangler *rdm) {
static void demangle_const_uint(struct rust_demangler *rdm, char ty_tag) {
CHECK_OR(!rdm->errored, return );

uint64_t value = 0;
Expand All @@ -791,11 +939,47 @@ static void demangle_const_uint(struct rust_demangler *rdm) {
// Print anything that doesn't fit in `uint64_t` verbatim.
if (hex_len > 16) {
PRINT("0x");
print_str(rdm, rdm->sym + (rdm->next - hex_len), hex_len);
return;
print_str(rdm, rdm->sym + (rdm->next - hex_len - 1), hex_len);
} else {
print_uint64(rdm, value);
}

if (rdm->verbose)
PRINT(basic_type(ty_tag));
}


static void demangle_const_str_literal(struct rust_demangler *rdm) {
CHECK_OR(!rdm->errored, return );

PRINT("\"");

// FIXME(bjorn3) actually decode UTF-8 strings into individual characters
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's kind of funny is that 1. try_parse_str_chars in the Rust version barely and 2. there's a UTF-8 encoder somewhere in this file, for punycode, so I feel like combining the two (the encoder giving you the mapping of the bits to bytes) might not even be that hard. And we have test data, so I can golf it if you want.

while (!eat(rdm, '_')) {
uint32_t value = 0;

char c = next(rdm);
if (IS_DIGIT(c))
value |= c - '0';
else if (c >= 'a' && c <= 'f')
value |= 10 + (c - 'a');
else
ERROR_AND(return );

value <<= 4;

c = next(rdm);
if (IS_DIGIT(c))
value |= c - '0';
else if (c >= 'a' && c <= 'f')
value |= 10 + (c - 'a');
else
ERROR_AND(return );

print_quoted_escaped_char(rdm, '"', value);
}

print_uint64(rdm, value);
PRINT("\"");
}

bool rust_demangle_with_callback(
Expand Down
15 changes: 13 additions & 2 deletions test-harness/tests/v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,27 @@ fn demangle_min_const_generics() {
t_const!("c76_", "'v'");
t_const!("c22_", r#"'"'"#);
t_const!("ca_", "'\\n'");
t_const!("c2202_", "'∂'");
}

// FIXME(eddyb) port the relevant functionality to C.
// FIXME(bjorn3) port the relevant functionality to C.
#[should_panic]
#[test]
fn demangle_min_const_generics_unicode() {
t_const!("c2202_", "'∂'");
}

#[test]
fn demangle_const_str() {
t_const!("e616263_", "{*\"abc\"}");
t_const!("e27_", r#"{*"'"}"#);
t_const!("e090a_", "{*\"\\t\\n\"}");

}

// FIXME(bjorn3) port the relevant functionality to C.
#[should_panic]
#[test]
fn demangle_const_str_unicode() {
t_const!("ee28882c3bc_", "{*\"∂ü\"}");
t_const!(
"ee183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\
Expand Down