Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion MANIFEST.in

This file was deleted.

213 changes: 194 additions & 19 deletions itanium_demangler/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# encoding:utf-8
name = "itanium_demangler"

"""
Expand All @@ -15,11 +14,15 @@
the type of destructor
* `oper`: `node.value` (`str`) holds a symbolic operator name, without the keyword
"operator"
* `oper_unary`: same as `oper` but to distinguish unary operators from their binary
counterparts
* `oper_cast`: `node.value` holds a type node
* `tpl_args`: `node.value` (`tuple`) holds a sequence of type nodes
* `qual_name`: `node.value` (`tuple`) holds a sequence of `name` and `tpl_args` nodes,
possibly ending in a `ctor`, `dtor` or `operator` node
* `abi`: `node.value` holds a name node, `node.qual` (`frozenset`) holds a set of ABI tags
* `abominable`: `node.value` holds a name node, `node.qual` (`frozenset`) holds any
combinations from `"const"`, `"volatile"`, `"&"`, and `"&&"`

Type nodes:
* `name` and `qual_name` specify a type by its name
Expand All @@ -29,7 +32,8 @@
`"const"`, `"volatile"`, or `"restrict"`
* `literal`: `node.value` (`str`) holds the literal representation as-is,
`node.ty` holds a type node specifying the type of the literal
* `function`: `node.name` holds a name node specifying the function name,
* `entity`: `node.value` holds a name node that refers to a declared entity
* `func`: `node.name` holds a name node specifying the function name,
`node.ret_ty` holds a type node specifying the return type of a template function,
if any, or `None`, ``node.arg_tys` (`tuple`) holds a sequence of type nodes
specifying thefunction arguments
Expand Down Expand Up @@ -128,8 +132,8 @@ def __str__(self):
return '{base dtor}'
else:
assert False
elif self.kind == 'oper':
if self.value.startswith('new') or self.value.startswith('delete'):
elif self.kind in ('oper', 'oper_unary'):
if self.value[0].isalpha():
return 'operator ' + self.value
else:
return 'operator' + self.value
Expand Down Expand Up @@ -164,6 +168,43 @@ def __str__(self):
else:
return repr(self)

def encoding(self):
if self.kind == 'name':
return f'{len(self.value)}{self.value}'
elif self.kind == 'builtin':
return _mangled_builtin_types[self]
elif self.kind == 'qual_name':
if self == _builtin_types['Dn']:
return 'Dn'
elif _is_nested_name(self):
prefix, rest = _infer_std_names(self.value)
if len(rest) > 1:
return f'N{prefix}{"".join(p.encoding() for p in rest)}E'
else:
return f'{prefix}{"".join(p.encoding() for p in rest)}'
else:
return "".join(p.encoding() for p in self.value)
elif self.kind == 'tpl_args':
return f'I{"".join(p.encoding() for p in self.value)}E'
elif self.kind == 'pointer':
return f'P{self.value.encoding()}'
elif self.kind == 'lvalue':
return f'R{self.value.encoding()}'
elif self.kind == 'rvalue':
return f'O{self.value.encoding()}'
elif self.kind == 'ctor':
return _mangled_ctor_map[self.value]
elif self.kind == 'dtor':
return _mangled_dtor_map[self.value]
elif self.kind == 'oper':
return _mangled_operators[self.value]
elif self.kind == 'oper_unary':
return _mangled_unary_operators[self.value]
elif self.kind == 'oper_cast':
return f'cv{self.value.encoding()}'
else:
raise NotImplementedError(f'{self.kind!r} is not supported')

def left(self):
if self.kind == "pointer":
return self.value.left() + "*"
Expand Down Expand Up @@ -196,12 +237,39 @@ def __repr__(self):

def __str__(self):
if self.kind == 'abi':
return str(self.value) + "".join(['[abi:' + tag + ']' for tag in self.qual])
return str(self.value) + "".join(['[abi:' + tag + ']' for tag in sorted(self.qual)])
elif self.kind == 'cv_qual':
return ' '.join([str(self.value)] + list(self.qual))
return ' '.join([str(self.value)] + sorted(self.qual))
elif self.kind == 'abominable':
return ' '.join([str(self.value)] + _order_abominable_qualifiers(self))
else:
return repr(self)

def encoding(self):
if self.kind == 'abi':
return self.value.encoding() + "".join(f'B{len(x)}{x}' for x in sorted(self.qual))
elif self.kind == 'cv_qual':
text = ""
if 'const' in self.qual:
text += 'K'
if 'volatile' in self.qual:
text += 'V'
if 'restrict' in self.qual:
text += 'r'
return f'{text}{self.value.encoding()}'
elif self.kind == 'abominable':
text = ""
if 'const' in self.qual:
text += 'K'
if 'volatile' in self.qual:
text += 'V'
if '&' in self.qual:
text += 'R'
elif '&&' in self.qual:
text += 'O'
prefix, rest = _infer_std_names(self.value.value)
return f'N{text}{prefix}{"".join(p.encoding() for p in rest)}E'

def left(self):
return str(self)

Expand All @@ -222,16 +290,25 @@ def __repr__(self):
def __str__(self):
if self.kind == 'literal':
return '(' + str(self.ty) + ')' + str(self.value)
elif self.kind == 'entity':
return str(self.value)
else:
return repr(self)

def encoding(self):
if self.kind == 'literal':
return f'L{self.ty.encoding()}{self.value}E'
elif self.kind == 'entity':
return f'L{mangle(self.value)}E'

def left(self):
return str(self)

def right(self):
return ""

def map(self, f):
# does not affect references to entities
if self.kind == 'literal':
return self._replace(ty=f(self.ty))
else:
Expand All @@ -248,12 +325,19 @@ def __str__(self):
result = ""
if self.ret_ty is not None:
result += str(self.ret_ty) + ' '
qual = None
if self.name is not None:
result += str(self.name)
if self.name.kind == 'abominable':
qual = _order_abominable_qualifiers(self.name)
result += str(self.name.value)
else:
result += str(self.name)
if self.arg_tys == (Node('builtin', 'void'),):
result += '()'
else:
result += '(' + ', '.join(map(str, self.arg_tys)) + ')'
if qual:
result += ' ' + ' '.join(qual)
return result
else:
return repr(self)
Expand Down Expand Up @@ -281,6 +365,20 @@ def right(self):
else:
return ""

def encoding(self):
if self.kind == 'func':
if self.name is None:
result = 'F'
else:
result = self.name.encoding()
if self.ret_ty is not None:
result += self.ret_ty.encoding()
result += ''.join(p.encoding() for p in self.arg_tys)
# it's a bare-function-type
if self.name is None:
result += 'E'
return result

def map(self, f):
if self.kind == 'func':
return self._replace(name=f(self.name) if self.name else None,
Expand Down Expand Up @@ -317,6 +415,11 @@ def right(self):
else:
return ""

def encoding(self):
if self.kind == 'array':
# instantiation-dependent array bound expression is not supported
return f'A{self.dimension.value}_{self.ty.encoding()}'

def map(self, f):
if self.kind == 'array':
return self._replace(dimension=f(self.dimension) if self.dimension else None,
Expand Down Expand Up @@ -351,6 +454,9 @@ def right(self):
else:
return ""

def encoding(self):
return f'M{self.cls_ty.encoding()}{self.member_ty.encoding()}'

def map(self, f):
if self.kind in ('data', 'func'):
return self._replace(cls_ty=f(self.cls_ty) if self.cls_ty else None,
Expand Down Expand Up @@ -378,15 +484,19 @@ def map(self, f):
'Sd': [Node('name', 'std'), Node('name', 'iostream')],
}

# `!` is also unary, but without ambiguity
_unary_operators = {
'ps': '+',
'ng': '-',
'ad': '&',
'de': '*',
}

_operators = {
'nw': 'new',
'na': 'new[]',
'dl': 'delete',
'da': 'delete[]',
'ps': '+', # (unary)
'ng': '-', # (unary)
'ad': '&', # (unary)
'de': '*', # (unary)
'co': '~',
'pl': '+',
'mi': '-',
Expand Down Expand Up @@ -460,6 +570,12 @@ def map(self, f):
'Dn': Node('qual_name', (Node('name', 'std'), Node('builtin', 'nullptr_t')))
}

_mangled_ctor_map = {v: k for k, v in _ctor_dtor_map.items() if k.startswith('C')}
_mangled_dtor_map = {v: k for k, v in _ctor_dtor_map.items() if k.startswith('D')}
_mangled_builtin_types = {v: k for k, v in _builtin_types.items()}
_mangled_unary_operators = {v: k for k, v in _unary_operators.items()}
_mangled_operators = {v: k for k, v in _operators.items()}


def _handle_cv(qualifiers, node):
qualifier_set = set()
Expand All @@ -482,6 +598,20 @@ def _handle_indirect(qualifier, node):
return Node('rvalue', node)
return node

def _handle_abominable(cv_qualifiers, ref_qualifier, node):
qualifier_set = set()
if 'V' in cv_qualifiers:
qualifier_set.add('volatile')
if 'K' in cv_qualifiers:
qualifier_set.add('const')
if ref_qualifier == 'R':
qualifier_set.add('&')
elif ref_qualifier == 'O':
qualifier_set.add('&&')
if qualifier_set:
return QualNode('abominable', value=node, qual=frozenset(qualifier_set))
else:
return node

_NUMBER_RE = re.compile(r"\d+")

Expand Down Expand Up @@ -557,7 +687,11 @@ def _parse_name(cursor, is_nested=False):
elif match.group('std_name') is not None:
node = Node('qual_name', _std_names[match.group('std_name')])
elif match.group('operator_name') is not None:
node = Node('oper', _operators[match.group('operator_name')])
encoded = match.group('operator_name')
if encoded in _unary_operators:
node = Node('oper_unary', _unary_operators[encoded])
else:
node = Node('oper', _operators[encoded])
elif match.group('operator_cv') is not None:
ty = _parse_type(cursor)
if ty is None:
Expand Down Expand Up @@ -593,8 +727,7 @@ def _parse_name(cursor, is_nested=False):
else:
cursor.add_subst(Node('qual_name', tuple(nodes)))
node = Node('qual_name', tuple(nodes))
node = _handle_cv(match.group('cv_qual'), node)
node = _handle_indirect(match.group('ref_qual'), node)
node = _handle_abominable(match.group('cv_qual'), match.group('ref_qual'), node)
elif match.group('template_param') is not None:
seq_id = _parse_seq_id(cursor)
if seq_id is None:
Expand Down Expand Up @@ -622,19 +755,19 @@ def _parse_name(cursor, is_nested=False):
node = QualNode('abi', node, frozenset(abi_tags))

if not is_nested and cursor.accept('I') and (
node.kind in ('name', 'oper', 'oper_cast') or
node.kind in ('name', 'oper', 'oper_unary', 'oper_cast') or
match.group('std_prefix') is not None or
match.group('std_name') is not None or
match.group('substitution') is not None):
if node.kind in ('name', 'oper', 'oper_cast') or match.group('std_prefix') is not None:
if node.kind in ('name', 'oper', 'oper_unary', 'oper_cast') or match.group('std_prefix') is not None:
cursor.add_subst(node) # <unscoped-template-name> ::= <substitution>
templ_args = _parse_until_end(cursor, 'tpl_args', _parse_type)
if templ_args is None:
return None
node = Node('qual_name', (node, templ_args))
if ((match.group('std_prefix') is not None or
match.group('std_name') is not None) and
node.value[0].value[1].kind not in ('oper', 'oper_cast')):
node.value[0].value[1].kind not in ('oper', 'oper_unary', 'oper_cast')):
cursor.add_subst(node)

return node
Expand Down Expand Up @@ -732,7 +865,7 @@ def _parse_expr_primary(cursor):
return None
elif match.group('mangled_name') is not None:
mangled_name = cursor.advance_until('E')
return _parse_mangled_name(_Cursor(mangled_name))
return CastNode('entity', _parse_mangled_name(_Cursor(mangled_name)), None)
elif match.group('literal') is not None:
ty = _parse_type(cursor)
if ty is None:
Expand Down Expand Up @@ -884,7 +1017,12 @@ def parse(raw):
ast = _expand_arg_packs(ast)
return ast

def is_ctor_or_dtor(ast) -> bool:

def mangle(ast):
return f'_Z{ast.encoding()}'


def _is_ctor_or_dtor(ast) -> bool:
if ast.kind == 'func':
return _is_ctor_or_dtor(ast.name)
elif ast.kind == 'qual_name':
Expand All @@ -893,6 +1031,43 @@ def is_ctor_or_dtor(ast) -> bool:
else:
return False


def _order_abominable_qualifiers(ast) -> list:
result = []
if 'const' in ast.qual:
result.append('const')
if 'volatile' in ast.qual:
result.append('volatile')
if '&' in ast.qual:
result.append('&')
elif '&&' in ast.qual:
result.append('&&')
return result


def _is_nested_name(ast) -> bool:
if ast.kind == 'qual_name':
if len(ast.value) == 2:
return ast.value[-1].kind != 'tpl_args'
else:
return len(ast.value) > 1
else:
return ast.kind == 'abominable'


def _infer_std_names(components):
best_match_prefix = ''
best_match_len = 0

for prefix, std_name in _std_names.items():
std_len = len(std_name)
if len(components) >= std_len and list(components[:std_len]) == std_name:
if std_len > best_match_len:
best_match_len = std_len
best_match_prefix = prefix

return best_match_prefix, list(components[best_match_len:])

# ================================================================================================


Expand Down
Loading