//===--- DLangDemangle.cpp ------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file defines a demangler for the D programming language as specified /// in the ABI specification, available at: /// https://dlang.org/spec/abi.html#name_mangling /// //===----------------------------------------------------------------------===// #include "llvm/Demangle/Demangle.h" #include "llvm/Demangle/StringView.h" #include "llvm/Demangle/Utility.h" #include #include #include using namespace llvm; using llvm::itanium_demangle::OutputBuffer; using llvm::itanium_demangle::StringView; namespace { /// Demangle information structure. struct Demangler { /// Initialize the information structure we use to pass around information. /// /// \param Mangled String to demangle. Demangler(const char *Mangled); /// Extract and demangle the mangled symbol and append it to the output /// string. /// /// \param Demangled Output buffer to write the demangled name. /// /// \return The remaining string on success or nullptr on failure. /// /// \see https://dlang.org/spec/abi.html#name_mangling . /// \see https://dlang.org/spec/abi.html#MangledName . const char *parseMangle(OutputBuffer *Demangled); private: /// Extract and demangle a given mangled symbol and append it to the output /// string. /// /// \param Demangled output buffer to write the demangled name. /// \param Mangled mangled symbol to be demangled. /// /// \return The remaining string on success or nullptr on failure. /// /// \see https://dlang.org/spec/abi.html#name_mangling . /// \see https://dlang.org/spec/abi.html#MangledName . const char *parseMangle(OutputBuffer *Demangled, const char *Mangled); /// Extract the number from a given string. /// /// \param Mangled string to extract the number. /// \param Ret assigned result value. /// /// \return The remaining string on success or nullptr on failure. /// /// \note A result larger than UINT_MAX is considered a failure. /// /// \see https://dlang.org/spec/abi.html#Number . const char *decodeNumber(const char *Mangled, unsigned long *Ret); /// Check whether it is the beginning of a symbol name. /// /// \param Mangled string to extract the symbol name. /// /// \return true on success, false otherwise. /// /// \see https://dlang.org/spec/abi.html#SymbolName . bool isSymbolName(const char *Mangled); /// Extract and demangle an identifier from a given mangled symbol append it /// to the output string. /// /// \param Demangled Output buffer to write the demangled name. /// \param Mangled Mangled symbol to be demangled. /// /// \return The remaining string on success or nullptr on failure. /// /// \see https://dlang.org/spec/abi.html#SymbolName . const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled); /// Extract and demangle the plain identifier from a given mangled symbol and /// prepend/append it to the output string, with a special treatment for some /// magic compiler generated symbols. /// /// \param Demangled Output buffer to write the demangled name. /// \param Mangled Mangled symbol to be demangled. /// \param Len Length of the mangled symbol name. /// /// \return The remaining string on success or nullptr on failure. /// /// \see https://dlang.org/spec/abi.html#LName . const char *parseLName(OutputBuffer *Demangled, const char *Mangled, unsigned long Len); /// Extract and demangle the qualified symbol from a given mangled symbol /// append it to the output string. /// /// \param Demangled Output buffer to write the demangled name. /// \param Mangled Mangled symbol to be demangled. /// /// \return The remaining string on success or nullptr on failure. /// /// \see https://dlang.org/spec/abi.html#QualifiedName . const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); /// The string we are demangling. const char *Str; }; } // namespace const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { // Return nullptr if trying to extract something that isn't a digit. if (Mangled == nullptr || !std::isdigit(*Mangled)) return nullptr; unsigned long Val = 0; do { unsigned long Digit = Mangled[0] - '0'; // Check for overflow. if (Val > (std::numeric_limits::max() - Digit) / 10) return nullptr; Val = Val * 10 + Digit; ++Mangled; } while (std::isdigit(*Mangled)); if (*Mangled == '\0') return nullptr; *Ret = Val; return Mangled; } bool Demangler::isSymbolName(const char *Mangled) { if (std::isdigit(*Mangled)) return true; // TODO: Handle symbol back references and template instances. return false; } const char *Demangler::parseMangle(OutputBuffer *Demangled, const char *Mangled) { // A D mangled symbol is comprised of both scope and type information. // MangleName: // _D QualifiedName Type // _D QualifiedName Z // ^ // The caller should have guaranteed that the start pointer is at the // above location. // Note that type is never a function type, but only the return type of // a function or the type of a variable. Mangled += 2; Mangled = parseQualified(Demangled, Mangled); if (Mangled != nullptr) { // Artificial symbols end with 'Z' and have no type. if (*Mangled == 'Z') ++Mangled; else { // TODO: Implement symbols with types. return nullptr; } } return Mangled; } const char *Demangler::parseQualified(OutputBuffer *Demangled, const char *Mangled) { // Qualified names are identifiers separated by their encoded length. // Nested functions also encode their argument types without specifying // what they return. // QualifiedName: // SymbolFunctionName // SymbolFunctionName QualifiedName // ^ // SymbolFunctionName: // SymbolName // SymbolName TypeFunctionNoReturn // SymbolName M TypeFunctionNoReturn // SymbolName M TypeModifiers TypeFunctionNoReturn // The start pointer should be at the above location. // Whether it has more than one symbol size_t NotFirst = false; do { // Skip over anonymous symbols. if (*Mangled == '0') { do ++Mangled; while (*Mangled == '0'); continue; } if (NotFirst) *Demangled << '.'; NotFirst = true; Mangled = parseIdentifier(Demangled, Mangled); } while (Mangled && isSymbolName(Mangled)); return Mangled; } const char *Demangler::parseIdentifier(OutputBuffer *Demangled, const char *Mangled) { unsigned long Len; if (Mangled == nullptr || *Mangled == '\0') return nullptr; // TODO: Parse back references and lengthless template instances. const char *Endptr = decodeNumber(Mangled, &Len); if (Endptr == nullptr || Len == 0) return nullptr; if (strlen(Endptr) < Len) return nullptr; Mangled = Endptr; // TODO: Parse template instances with a length prefix. // There can be multiple different declarations in the same function that // have the same mangled name. To make the mangled names unique, a fake // parent in the form `__Sddd' is added to the symbol. if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') { const char *NumPtr = Mangled + 3; while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr)) ++NumPtr; if (Mangled + Len == NumPtr) { // Skip over the fake parent. Mangled += Len; return parseIdentifier(Demangled, Mangled); } // Else demangle it as a plain identifier. } return parseLName(Demangled, Mangled, Len); } const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, unsigned long Len) { switch (Len) { case 6: if (strncmp(Mangled, "__initZ", Len + 1) == 0) { // The static initializer for a given symbol. Demangled->prepend("initializer for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled += Len; return Mangled; } if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) { // The vtable symbol for a given class. Demangled->prepend("vtable for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled += Len; return Mangled; } break; case 7: if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) { // The classinfo symbol for a given class. Demangled->prepend("ClassInfo for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled += Len; return Mangled; } break; case 11: if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) { // The interface symbol for a given class. Demangled->prepend("Interface for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled += Len; return Mangled; } break; case 12: if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) { // The ModuleInfo symbol for a given module. Demangled->prepend("ModuleInfo for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled += Len; return Mangled; } break; } *Demangled << StringView(Mangled, Len); Mangled += Len; return Mangled; } Demangler::Demangler(const char *Mangled) : Str(Mangled) {} const char *Demangler::parseMangle(OutputBuffer *Demangled) { return parseMangle(Demangled, this->Str); } char *llvm::dlangDemangle(const char *MangledName) { if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0) return nullptr; OutputBuffer Demangled; if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024)) return nullptr; if (strcmp(MangledName, "_Dmain") == 0) { Demangled << "D main"; } else { Demangler D = Demangler(MangledName); MangledName = D.parseMangle(&Demangled); // Check that the entire symbol was successfully demangled. if (MangledName == nullptr || *MangledName != '\0') { std::free(Demangled.getBuffer()); return nullptr; } } // OutputBuffer's internal buffer is not null terminated and therefore we need // to add it to comply with C null terminated strings. if (Demangled.getCurrentPosition() > 0) { Demangled << '\0'; Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1); return Demangled.getBuffer(); } std::free(Demangled.getBuffer()); return nullptr; }