From 2ab0b4068b6d51949fd2e0ab5ebda4eb708cf526 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 17 Sep 2024 17:27:33 +0000 Subject: [PATCH 1/2] [demangle] Represent a char array initializer as a string literal. This improves the demangling for non-type template arguments that contain string literals. Previously we'd produce char [4]{(char)65, (char)66, (char)67} (which isn't valid C or C++), and now we produce "ABC" The new demangling is always shorter, even when using an escape sequence for every character, and much more readable when the char array contains text. --- libcxxabi/src/demangle/ItaniumDemangle.h | 116 ++++++++++++++++++- libcxxabi/test/test_demangle.pass.cpp | 27 ++++- llvm/include/llvm/Demangle/ItaniumDemangle.h | 116 ++++++++++++++++++- 3 files changed, 256 insertions(+), 3 deletions(-) diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index 3b041efe3aac0..8032c893fbae6 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -156,6 +156,8 @@ template class PODSmallVector { } }; +class NodeArray; + // Base class of all AST nodes. The AST is built by the parser, then is // traversed by the printLeft/Right functions to produce a demangled string. class Node { @@ -293,6 +295,13 @@ class Node { // implementation. virtual void printRight(OutputBuffer &) const {} + // Print an initializer list of this type. Returns true if we printed a custom + // representation, false if nothing has been printed and the default + // representation should be used. + virtual bool printInitListAsType(OutputBuffer &, const NodeArray &) const { + return false; + } + virtual std::string_view getBaseName() const { return {}; } // Silence compiler warnings, this dtor will never be called. @@ -339,6 +348,10 @@ class NodeArray { FirstElement = false; } } + + // Print an array of integer literals as a string literal. Returns whether we + // could do so. + bool printAsString(OutputBuffer &OB) const; }; struct NodeArrayNode : Node { @@ -796,6 +809,15 @@ class ArrayType final : public Node { OB += "]"; Base->printRight(OB); } + + bool printInitListAsType(OutputBuffer &OB, + const NodeArray &Elements) const override { + if (Base->getKind() == KNameType && + static_cast(Base)->getName() == "char") { + return Elements.printAsString(OB); + } + return false; + } }; class FunctionType final : public Node { @@ -2225,8 +2247,11 @@ class InitListExpr : public Node { template void match(Fn F) const { F(Ty, Inits); } void printLeft(OutputBuffer &OB) const override { - if (Ty) + if (Ty) { + if (Ty->printInitListAsType(OB, Inits)) + return; Ty->print(OB); + } OB += '{'; Inits.printWithComma(OB); OB += '}'; @@ -2433,6 +2458,8 @@ class IntegerLiteral : public Node { if (Type.size() <= 3) OB += Type; } + + std::string_view value() const { return Value; } }; class RequiresExpr : public Node { @@ -2604,6 +2631,93 @@ template struct NodeKind; }; #include "ItaniumNodes.def" +bool NodeArray::printAsString(OutputBuffer &OB) const { + auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { + OB.setCurrentPosition(StartPos); + return false; + }; + + OB += '"'; + bool LastWasNumericEscape = false; + for (const Node *Element : *this) { + if (Element->getKind() != Node::KIntegerLiteral) + return Fail(); + int integer_value = 0; + for (char c : static_cast(Element)->value()) { + if (c < '0' || c > '9' || integer_value > 25) + return Fail(); + integer_value *= 10; + integer_value += c - '0'; + } + if (integer_value > 255) + return Fail(); + + // Insert a `""` to avoid accidentally extending a numeric escape. + if (LastWasNumericEscape) { + if ((integer_value >= '0' && integer_value <= '9') || + (integer_value >= 'a' && integer_value <= 'f') || + (integer_value >= 'A' && integer_value <= 'F')) { + OB += "\"\""; + } + } + + LastWasNumericEscape = false; + + // Determine how to print this character. + switch (integer_value) { + case '\a': + OB += "\\a"; + break; + case '\b': + OB += "\\b"; + break; + case '\f': + OB += "\\f"; + break; + case '\n': + OB += "\\n"; + break; + case '\r': + OB += "\\r"; + break; + case '\t': + OB += "\\t"; + break; + case '\v': + OB += "\\v"; + break; + + case '"': + OB += "\\\""; + break; + case '\\': + OB += "\\\\"; + break; + + default: + // We assume that the character is ASCII, and use a numeric escape for all + // remaining non-printable ASCII characters. + if (integer_value < 32 || integer_value == 127) { + constexpr char Hex[] = "0123456789ABCDEF"; + OB += '\\'; + if (integer_value > 7) + OB += 'x'; + if (integer_value >= 16) + OB += Hex[integer_value >> 4]; + OB += Hex[integer_value & 0xF]; + LastWasNumericEscape = true; + break; + } + + // Assume all remaining characters are directly printable. + OB += (char)integer_value; + break; + } + } + OB += '"'; + return true; +} + template struct AbstractManglingParser { const char *First; const char *Last; diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp index 77f79e0d40e84..c8d4ca8637e8d 100644 --- a/libcxxabi/test/test_demangle.pass.cpp +++ b/libcxxabi/test/test_demangle.pass.cpp @@ -30037,7 +30037,32 @@ const char* cases[][2] = // FIXME: This is not valid pointer-to-member syntax. {"_Z1fIXtl1DmcM7DerivedKiadL_ZN11MoreDerived1zEEn8EEEEvv", "void f()"}, {"_Z1fIXtl1Edi1nLi42EEEEvv", "void f()"}, - {"_ZTAXtl1StlA32_cLc104ELc101ELc108ELc108ELc111ELc32ELc119ELc111ELc114ELc108ELc100EEEE", "template parameter object for S{char [32]{(char)104, (char)101, (char)108, (char)108, (char)111, (char)32, (char)119, (char)111, (char)114, (char)108, (char)100}}"}, + // Arrays of char are formatted as string literals. Escape sequences are + // used for non-printable ASCII characters. + // FIXME: We should do the same for arrays of charN_t and wchar_t. + {"_ZTAXtl1StlA32_cLc104ELc101ELc108ELc108ELc111ELc32ELc119ELc111ELc114ELc108ELc100EEEE", "template parameter object for S{\"hello world\"}"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc108ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc0ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc1ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc6ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc7ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc8ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc9ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc10ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc11ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc12ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc13ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc14ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc15ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc16ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc34ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc72ELc101ELc92ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc15ELc101ELc108ELc108ELc111EEEEEvv", "void f()"}, + {"_Z1fIXtl5HellotlA6_cLc240ELc159ELc152ELc138ELc33EEEEEvv", "void f()"}, + // Even non-null-terminated strings get this treatment, even though this + // isn't valid C++ syntax to initialize an array of char. + {"_Z1fIXtl5HellotlA5_cLc72ELc101ELc108ELc108ELc111EEEEEvv", "void f()"}, // FIXME: This is wrong; the S2_ backref should expand to OT_ and then to // "double&&". But we can't cope with a substitution that represents a diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 0af0224bc83fa..401dc4f5a4878 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -156,6 +156,8 @@ template class PODSmallVector { } }; +class NodeArray; + // Base class of all AST nodes. The AST is built by the parser, then is // traversed by the printLeft/Right functions to produce a demangled string. class Node { @@ -293,6 +295,13 @@ class Node { // implementation. virtual void printRight(OutputBuffer &) const {} + // Print an initializer list of this type. Returns true if we printed a custom + // representation, false if nothing has been printed and the default + // representation should be used. + virtual bool printInitListAsType(OutputBuffer &, const NodeArray &) const { + return false; + } + virtual std::string_view getBaseName() const { return {}; } // Silence compiler warnings, this dtor will never be called. @@ -339,6 +348,10 @@ class NodeArray { FirstElement = false; } } + + // Print an array of integer literals as a string literal. Returns whether we + // could do so. + bool printAsString(OutputBuffer &OB) const; }; struct NodeArrayNode : Node { @@ -796,6 +809,15 @@ class ArrayType final : public Node { OB += "]"; Base->printRight(OB); } + + bool printInitListAsType(OutputBuffer &OB, + const NodeArray &Elements) const override { + if (Base->getKind() == KNameType && + static_cast(Base)->getName() == "char") { + return Elements.printAsString(OB); + } + return false; + } }; class FunctionType final : public Node { @@ -2225,8 +2247,11 @@ class InitListExpr : public Node { template void match(Fn F) const { F(Ty, Inits); } void printLeft(OutputBuffer &OB) const override { - if (Ty) + if (Ty) { + if (Ty->printInitListAsType(OB, Inits)) + return; Ty->print(OB); + } OB += '{'; Inits.printWithComma(OB); OB += '}'; @@ -2433,6 +2458,8 @@ class IntegerLiteral : public Node { if (Type.size() <= 3) OB += Type; } + + std::string_view value() const { return Value; } }; class RequiresExpr : public Node { @@ -2604,6 +2631,93 @@ template struct NodeKind; }; #include "ItaniumNodes.def" +bool NodeArray::printAsString(OutputBuffer &OB) const { + auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { + OB.setCurrentPosition(StartPos); + return false; + }; + + OB += '"'; + bool LastWasNumericEscape = false; + for (const Node *Element : *this) { + if (Element->getKind() != Node::KIntegerLiteral) + return Fail(); + int integer_value = 0; + for (char c : static_cast(Element)->value()) { + if (c < '0' || c > '9' || integer_value > 25) + return Fail(); + integer_value *= 10; + integer_value += c - '0'; + } + if (integer_value > 255) + return Fail(); + + // Insert a `""` to avoid accidentally extending a numeric escape. + if (LastWasNumericEscape) { + if ((integer_value >= '0' && integer_value <= '9') || + (integer_value >= 'a' && integer_value <= 'f') || + (integer_value >= 'A' && integer_value <= 'F')) { + OB += "\"\""; + } + } + + LastWasNumericEscape = false; + + // Determine how to print this character. + switch (integer_value) { + case '\a': + OB += "\\a"; + break; + case '\b': + OB += "\\b"; + break; + case '\f': + OB += "\\f"; + break; + case '\n': + OB += "\\n"; + break; + case '\r': + OB += "\\r"; + break; + case '\t': + OB += "\\t"; + break; + case '\v': + OB += "\\v"; + break; + + case '"': + OB += "\\\""; + break; + case '\\': + OB += "\\\\"; + break; + + default: + // We assume that the character is ASCII, and use a numeric escape for all + // remaining non-printable ASCII characters. + if (integer_value < 32 || integer_value == 127) { + constexpr char Hex[] = "0123456789ABCDEF"; + OB += '\\'; + if (integer_value > 7) + OB += 'x'; + if (integer_value >= 16) + OB += Hex[integer_value >> 4]; + OB += Hex[integer_value & 0xF]; + LastWasNumericEscape = true; + break; + } + + // Assume all remaining characters are directly printable. + OB += (char)integer_value; + break; + } + } + OB += '"'; + return true; +} + template struct AbstractManglingParser { const char *First; const char *Last; From 83c0344cb737c234b429c0a3ae74b1e6815d7da1 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 18 Sep 2024 22:12:39 +0000 Subject: [PATCH 2/2] Fix build failure --- libcxxabi/src/demangle/ItaniumDemangle.h | 2 +- llvm/include/llvm/Demangle/ItaniumDemangle.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index 8032c893fbae6..6b3f228fba926 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -2631,7 +2631,7 @@ template struct NodeKind; }; #include "ItaniumNodes.def" -bool NodeArray::printAsString(OutputBuffer &OB) const { +inline bool NodeArray::printAsString(OutputBuffer &OB) const { auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { OB.setCurrentPosition(StartPos); return false; diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 401dc4f5a4878..31af04276ca30 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -2631,7 +2631,7 @@ template struct NodeKind; }; #include "ItaniumNodes.def" -bool NodeArray::printAsString(OutputBuffer &OB) const { +inline bool NodeArray::printAsString(OutputBuffer &OB) const { auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { OB.setCurrentPosition(StartPos); return false;