-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[Clang][Comments] Add argument parsing for @throw @throws @exception #84726
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang][Comments] Add argument parsing for @throw @throws @exception #84726
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-clang Author: hdoc (hdoc) ChangesDoxygen allows for the @throw, @throws, and @exception commands to have an attached argument indicating the type being thrown. Currently, Clang's AST parsing doesn't support parsing out this argument from doc comments. The result is missing compatibility with Doxygen. We would find it helpful if the AST exposed these thrown types as BlockCommandComment arguments so that we could generate better documentation. This PR implements parsing of arguments for the @throw, @throws, and @exception commands. Each command can only have one argument, matching the semantics of Doxygen. We have also added unit tests to validate the functionality. Full diff: https://github.com/llvm/llvm-project/pull/84726.diff 4 Files Affected:
diff --git a/clang/include/clang/AST/CommentCommands.td b/clang/include/clang/AST/CommentCommands.td
index e839031752cdd8..06b2fa9b5531c6 100644
--- a/clang/include/clang/AST/CommentCommands.td
+++ b/clang/include/clang/AST/CommentCommands.td
@@ -132,9 +132,9 @@ def Tparam : BlockCommand<"tparam"> { let IsTParamCommand = 1; }
// HeaderDoc command for template parameter documentation.
def Templatefield : BlockCommand<"templatefield"> { let IsTParamCommand = 1; }
-def Throws : BlockCommand<"throws"> { let IsThrowsCommand = 1; }
-def Throw : BlockCommand<"throw"> { let IsThrowsCommand = 1; }
-def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; }
+def Throws : BlockCommand<"throws"> { let IsThrowsCommand = 1; let NumArgs = 1; }
+def Throw : BlockCommand<"throw"> { let IsThrowsCommand = 1; let NumArgs = 1; }
+def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; let NumArgs = 1;}
def Deprecated : BlockCommand<"deprecated"> {
let IsEmptyParagraphAllowed = 1;
diff --git a/clang/include/clang/AST/CommentParser.h b/clang/include/clang/AST/CommentParser.h
index e11e818b1af0a1..5884a25d007851 100644
--- a/clang/include/clang/AST/CommentParser.h
+++ b/clang/include/clang/AST/CommentParser.h
@@ -100,6 +100,9 @@ class Parser {
ArrayRef<Comment::Argument>
parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
+ ArrayRef<Comment::Argument>
+ parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
+
BlockCommandComment *parseBlockCommand();
InlineCommandComment *parseInlineCommand();
diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index 8adfd85d0160c3..c70fa1b05cb241 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -75,6 +75,25 @@ class TextTokenRetokenizer {
return *Pos.BufferPtr;
}
+ char peekNext(unsigned offset) const {
+ assert(!isEnd());
+ assert(Pos.BufferPtr != Pos.BufferEnd);
+ if (Pos.BufferPtr + offset <= Pos.BufferEnd) {
+ return *(Pos.BufferPtr + offset);
+ } else {
+ return '\0';
+ }
+ }
+
+ void peekNextToken(SmallString<32> &WordText) const {
+ unsigned offset = 1;
+ char C = peekNext(offset++);
+ while (!isWhitespace(C) && C != '\0') {
+ WordText.push_back(C);
+ C = peekNext(offset++);
+ }
+ }
+
void consumeChar() {
assert(!isEnd());
assert(Pos.BufferPtr != Pos.BufferEnd);
@@ -89,6 +108,29 @@ class TextTokenRetokenizer {
}
}
+ /// Extract a template type
+ bool lexTemplateType(SmallString<32> &WordText) {
+ unsigned IncrementCounter = 0;
+ while (!isEnd()) {
+ const char C = peek();
+ WordText.push_back(C);
+ consumeChar();
+ switch (C) {
+ default:
+ break;
+ case '<': {
+ IncrementCounter++;
+ } break;
+ case '>': {
+ IncrementCounter--;
+ if (!IncrementCounter)
+ return true;
+ } break;
+ }
+ }
+ return false;
+ }
+
/// Add a token.
/// Returns true on success, false if there are no interesting tokens to
/// fetch from lexer.
@@ -149,6 +191,76 @@ class TextTokenRetokenizer {
addToken();
}
+ /// Extract a type argument
+ bool lexDataType(Token &Tok) {
+ if (isEnd())
+ return false;
+ Position SavedPos = Pos;
+ consumeWhitespace();
+ SmallString<32> NextToken;
+ SmallString<32> WordText;
+ const char *WordBegin = Pos.BufferPtr;
+ SourceLocation Loc = getSourceLocation();
+ StringRef ConstVal = StringRef("const");
+ bool ConstPointer = false;
+
+ while (!isEnd()) {
+ const char C = peek();
+ if (!isWhitespace(C)) {
+ if (C == '<') {
+ if (!lexTemplateType(WordText))
+ return false;
+ } else {
+ WordText.push_back(C);
+ consumeChar();
+ }
+ } else {
+ if (WordText.equals(ConstVal)) {
+ WordText.push_back(C);
+ consumeChar();
+ } else if (WordText.ends_with(StringRef("*")) ||
+ WordText.ends_with(StringRef("&"))) {
+ NextToken.clear();
+ peekNextToken(NextToken);
+ if (NextToken.equals(ConstVal)) {
+ ConstPointer = true;
+ WordText.push_back(C);
+ consumeChar();
+ } else {
+ consumeChar();
+ break;
+ }
+ } else {
+ NextToken.clear();
+ peekNextToken(NextToken);
+ if ((NextToken.ends_with(StringRef("*")) ||
+ NextToken.ends_with(StringRef("&"))) &&
+ !ConstPointer) {
+ WordText.push_back(C);
+ consumeChar();
+ } else {
+ consumeChar();
+ break;
+ }
+ }
+ }
+ }
+
+ const unsigned Length = WordText.size();
+ if (Length == 0) {
+ Pos = SavedPos;
+ return false;
+ }
+
+ char *TextPtr = Allocator.Allocate<char>(Length + 1);
+
+ memcpy(TextPtr, WordText.c_str(), Length + 1);
+ StringRef Text = StringRef(TextPtr, Length);
+
+ formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
+ return true;
+ }
+
/// Extract a word -- sequence of non-whitespace characters.
bool lexWord(Token &Tok) {
if (isEnd())
@@ -295,6 +407,7 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
Comment::Argument[NumArgs];
unsigned ParsedArgs = 0;
Token Arg;
+
while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
Args[ParsedArgs] = Comment::Argument{
SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
@@ -304,6 +417,23 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
return llvm::ArrayRef(Args, ParsedArgs);
}
+ArrayRef<Comment::Argument>
+Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
+ unsigned NumArgs) {
+ auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
+ Comment::Argument[NumArgs];
+ unsigned ParsedArgs = 0;
+ Token Arg;
+
+ while (ParsedArgs < NumArgs && Retokenizer.lexDataType(Arg)) {
+ Args[ParsedArgs] = Comment::Argument{
+ SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
+ ParsedArgs++;
+ }
+
+ return llvm::ArrayRef(Args, ParsedArgs);
+}
+
BlockCommandComment *Parser::parseBlockCommand() {
assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
@@ -356,6 +486,9 @@ BlockCommandComment *Parser::parseBlockCommand() {
parseParamCommandArgs(PC, Retokenizer);
else if (TPC)
parseTParamCommandArgs(TPC, Retokenizer);
+ else if (Info->IsThrowsCommand)
+ S.actOnBlockCommandArgs(
+ BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
else
S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp
index c3479672ae2a3c..e01d654aa1cea2 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1427,8 +1427,241 @@ TEST_F(CommentParserTest, Deprecated) {
}
}
+TEST_F(CommentParserTest, ThrowsCommandHasArg1) {
+ const char *Sources[] = {
+ "/// @throws int This function throws an integer",
+ ("/// @throws\n"
+ "/// int This function throws an integer"),
+ ("/// @throws \n"
+ "/// int This function throws an integer"),
+ ("/// @throws\n"
+ "/// int\n"
+ "/// This function throws an integer"),
+ ("/// @throws \n"
+ "/// int \n"
+ "/// This function throws an integer"),
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "int");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg2) {
+ const char *Sources[] = {
+ "/// @throws const int This function throws a const integer",
+ ("/// @throws\n"
+ "/// const int This function throws a const integer"),
+ ("/// @throws \n"
+ "/// const int This function throws a const integer"),
+ ("/// @throws\n"
+ "/// const int\n"
+ "/// This function throws a const integer"),
+ ("/// @throws \n"
+ "/// const int \n"
+ "/// This function throws a const integer"),
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "const int");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg3) {
+ const char *Sources[] = {
+ "/// @throws const int * This function throws a pointer to a const "
+ "integer\n",
+ ("/// @throws\n"
+ "/// const int * This function throws a pointer to a const integer"),
+ ("/// @throws \n"
+ "/// const int * This function throws a pointer to a const integer"),
+ ("/// @throws\n"
+ "/// const int *\n"
+ "/// This function throws a pointer to a const integer"),
+ ("/// @throws \n"
+ "/// const int *\n"
+ "/// This function throws a pointer to a const integer"),
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "const int *");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg4) {
+ const char *Sources[] = {
+ "/// @throws const int * const This function throws a const pointer to a "
+ "const integer",
+ ("/// @throws\n"
+ "/// const int * const This function throws a const pointer to a const "
+ "integer"),
+ ("/// @throws \n"
+ "/// const int * const This function throws a const pointer to a const "
+ "integer"),
+ ("/// @throws\n"
+ "/// const int * const\n"
+ "/// This function throws a const pointer to a const integer"),
+ ("/// @throws \n"
+ "/// const int * const\n"
+ "/// This function throws a const pointer to a const integer"),
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "const int * const");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg5) {
+ const char *Sources[] = {
+ "/// @throws int** This function throws a double pointer to an integer",
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "int**");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg6) {
+ const char *Sources[] = {
+ "/// @throws const char ** double pointer to a constant char pointer",
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "const char **");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg7) {
+ const char *Sources[] = {
+ "/// @throws Error<T> error of type Error<T>",
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+ ASSERT_TRUE(HasChildCount(PC, 3)); // Extra children because <T> is parsed
+ // as a series of TextComments
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "Error<T>");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg8) {
+ const char *Sources[] = {
+ "/// @throws Error<Container<T>> nested templates",
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "Error<Container<T>>");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg9) {
+ const char *Sources[] = {
+ "/// @throws Error<Ts...> variadic templates",
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "Error<Ts...>");
+ }
+ }
+}
+
} // unnamed namespace
} // end namespace comments
} // end namespace clang
-
|
clang/lib/AST/CommentParser.cpp
Outdated
SmallString<32> WordText; | ||
const char *WordBegin = Pos.BufferPtr; | ||
SourceLocation Loc = getSourceLocation(); | ||
StringRef ConstVal = StringRef("const"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't we support volatile
as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it should. We'll update the PR to support that. Thank you for noticing and mentioning this.
Likewise in the future we'll probably want to add support for restrict
. It's not necessary for this PR since throw/throws/exception
is only useful in C++ but when parsing Doxygen comments in C codebases we'll want to have the option of supporting that.
Ping |
clang/lib/AST/CommentParser.cpp
Outdated
char peekNext(unsigned offset) const { | ||
assert(!isEnd()); | ||
assert(Pos.BufferPtr != Pos.BufferEnd); | ||
if (Pos.BufferPtr + offset <= Pos.BufferEnd) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this be if (Pos.BufferPtr + offset < Pos.BufferEnd)
(unless this depends of the buffer being null terminated)? Otherwise this allows for an offset
which results in Pos.BufferEnd
being read.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch, fixed.
clang/lib/AST/CommentParser.cpp
Outdated
@@ -89,6 +108,114 @@ class TextTokenRetokenizer { | |||
} | |||
} | |||
|
|||
bool continueInt(SmallString<32> &NextToken) { | |||
return NextToken.ends_with(StringRef("char")) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't really understand what this is for...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is used during integral lexing to decide whether we should continue lexing the presumed integral type or not. The name was confusing, so I've renamed it from continueInt
to shouldContinueLexingIntegralType
clang/lib/AST/CommentParser.cpp
Outdated
const char *WordBegin = Pos.BufferPtr; | ||
SourceLocation Loc = getSourceLocation(); | ||
StringRef ConstVal = StringRef("const"); | ||
StringRef PointerVal = StringRef("*"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about pointers to members? Arrays? Function? More complex declarators like void(*)(int, int)
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For now, this doesn't support those values, and due to the complexity of implementing them and the rarity of their use I think it'd make more sense to add them in a follow-up PR. Let me know if this sounds reasonable
StringRef ReferenceVal = StringRef("&"); | ||
bool ConstPointer = false; | ||
|
||
while (!isEnd()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some comments wouldn't hurt...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added as part of the latest commit, along with some light refactoring to make it more readable. No doubt it is a complex piece of code
if (!lexTemplate(WordText)) | ||
return false; | ||
} else { | ||
WordText.push_back(C); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this treat any non-whitespace character (other than <
) as being part of a name?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As the code stands now, yes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which I think is fine, we are just trying to find a boundary
59e3fc2
to
64f93cc
Compare
Ping |
Before I do an in-depth review i think supporting arguments to Given use cases, I think it's sufficient to consider anything before a space as part of a type. If people want to throw "unsigned int" exceptions, well... it won't work. I think it's a reasonable tradeoff. In the long term, if we really want to support arbitrary constructs there, I think it would take a more robust approach such as the one that was proposed here (and subsequently abandoned) |
@cor3ntin we appreciate the feedback. Likewise, we agree that the home-grown parser just for parsing I've refactored the PR and reduced the functionality to the subset you requested. I think that makes more sense from a maintainability perspective, as you highlighted. Please review the PR when you get a chance. Thank you. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The general direction looks good now. I left a few nitpicky comments.
Thanks!
@@ -100,6 +100,11 @@ class Parser { | |||
ArrayRef<Comment::Argument> | |||
parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs); | |||
|
|||
/// Parse arguments for \\throws command supported args are in form of class |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// Parse arguments for \\throws command supported args are in form of class | |
/// Parse arguments for \throws command supported args are in form of class |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed
const char C = peek(); | ||
WordText.push_back(C); | ||
consumeChar(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nice to modify consumeChar
to return the consumed char
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would prefer to do that as a separate refactor PR, I think it's outside the scope of this change.
clang/lib/AST/CommentParser.cpp
Outdated
@@ -89,6 +89,31 @@ class TextTokenRetokenizer { | |||
} | |||
} | |||
|
|||
/// Extract a template type | |||
bool lexTemplate(SmallString<32> &WordText) { | |||
unsigned IncrementCounter = 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BracketCount
would be a better name
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a much better name, fixed and thank you.
char *TextPtr = Allocator.Allocate<char>(Length + 1); | ||
|
||
memcpy(TextPtr, WordText.c_str(), Length + 1); | ||
StringRef Text = StringRef(TextPtr, Length); | ||
|
||
formTokenWithChars(Tok, Loc, WordBegin, Length, Text); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd like to see that extracted in a separate function (and replace the few places where we do the exact same thing)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As above, I would prefer to do that as a separate refactor PR, I think it's outside the scope of this change.
clang/lib/AST/CommentParser.cpp
Outdated
@@ -295,6 +367,7 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { | |||
Comment::Argument[NumArgs]; | |||
unsigned ParsedArgs = 0; | |||
Token Arg; | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you revert whitespace only changes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed, I try to keep the clang-format changes restricted to only the regions I touch but it looks like some sneaked through
if (!lexTemplate(WordText)) | ||
return false; | ||
} else { | ||
WordText.push_back(C); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which I think is fine, we are just trying to find a boundary
@@ -89,6 +89,31 @@ class TextTokenRetokenizer { | |||
} | |||
} | |||
|
|||
/// Extract a template type | |||
bool lexTemplate(SmallString<32> &WordText) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should work okay for simple (most) cases, but we should have tests showing the boundaries, e.g., Foo<(1 > 0)>
or a typo like Foo<Bar<T>
etc.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added some more test cases that check this edge case behavior, like unequal numbers of opening and closing brackets as well as some other unusual edge cases.
Doxygen allows for the @throw, @throws, and @exception commands to have an attached argument indicating the type being thrown. Currently, Clang's AST parsing doesn't support parsing out this argument from doc comments. The result is missing compatibility with Doxygen. We would find it helpful if the AST exposed these thrown types as BlockCommandComment arguments so that we could generate better documentation. This PR implements parsing of arguments for the @throw, @throws, and @exception commands. Each command can only have one argument, matching the semantics of Doxygen. We have also added unit tests to validate the functionality.
XML validation was failing due to the Exception XML being empty, since the actual exception type was being parsed as an argument instead of as a ParagraphComment. This was a result of the change we made to argument parsing. As a result, I updated the XML output to still output the argument text in the Para XML, as it was emitted before.
- Refactored some functionality with the help of clang-tidy - Added comments to the complicated lexing functions - Renamed variables to follow LLVM/Clang conventions
This is in response to a request by upstream to simplify the parser previously developed.
a9dfc23
to
6b165de
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, Thanks
Will you need us to merge that for you?
@cor3ntin yes, please merge this as we do not have commit access. |
@hdoc Congratulations on having your first Pull Request (PR) merged into the LLVM Project! Your changes will be combined with recent changes from other authors, then tested Please check whether problems have been caused by your change specifically, as How to do this, and the rest of the post-merge process, is covered in detail here. If your change does cause a problem, it may be reverted, or you can revert it yourself. If you don't get any reports, no action is required from you. Your changes are working as expected, well done! |
…lvm#84726) Doxygen allows for the `@throw`, `@throws`, and `@exception` commands to have an attached argument indicating the type being thrown. Currently, Clang's AST parsing doesn't support parsing out this argument from doc comments. The result is missing compatibility with Doxygen. This PR implements parsing of arguments for the `@throw`, `@throws`, and `@exception` commands. Each command can only have one argument, matching the semantics of Doxygen.
Doxygen allows for the
@throw
,@throws
, and@exception
commands to have an attached argument indicating the type being thrown. Currently, Clang's AST parsing doesn't support parsing out this argument from doc comments. The result is missing compatibility with Doxygen.We would find it helpful if the AST exposed these thrown types as BlockCommandComment arguments so that we could generate better documentation.
This PR implements parsing of arguments for the
@throw
,@throws
, and@exception
commands. Each command can only have one argument, matching the semantics of Doxygen. We have also added unit tests to validate the functionality.