Skip to content

Commit 70914aa

Browse files
committed
Use pseudo parser for folding ranges
This first version only uses bracket matching. We plan to extend this to use DirectiveTree as well. Also includes changes to Token to allow retrieving corresponding token in token stream of original source file. Differential Revision: https://reviews.llvm.org/D129648
1 parent d693fd2 commit 70914aa

File tree

8 files changed

+173
-21
lines changed

8 files changed

+173
-21
lines changed

clang-tools-extra/clangd/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ target_link_libraries(clangDaemon
170170
clangTidy
171171

172172
clangdSupport
173+
174+
clangPseudo
173175
)
174176
if(CLANGD_TIDY_CHECKS)
175177
target_link_libraries(clangDaemon PRIVATE ${ALL_CLANG_TIDY_CHECKS})

clang-tools-extra/clangd/SemanticSelection.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#include "Protocol.h"
1212
#include "Selection.h"
1313
#include "SourceCode.h"
14+
#include "clang-pseudo/Bracket.h"
15+
#include "clang-pseudo/DirectiveTree.h"
16+
#include "clang-pseudo/Token.h"
1417
#include "clang/AST/DeclBase.h"
1518
#include "clang/Basic/SourceLocation.h"
1619
#include "clang/Basic/SourceManager.h"
@@ -170,5 +173,46 @@ llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST) {
170173
return collectFoldingRanges(SyntaxTree, TM);
171174
}
172175

176+
// FIXME(kirillbobyrev): Collect comments, PP conditional regions, includes and
177+
// other code regions (e.g. public/private/protected sections of classes,
178+
// control flow statement bodies).
179+
// Related issue: https://github.com/clangd/clangd/issues/310
180+
llvm::Expected<std::vector<FoldingRange>>
181+
getFoldingRanges(const std::string &Code) {
182+
auto OrigStream = clang::pseudo::lex(Code, clang::pseudo::genericLangOpts());
183+
184+
auto DirectiveStructure = clang::pseudo::DirectiveTree::parse(OrigStream);
185+
clang::pseudo::chooseConditionalBranches(DirectiveStructure, OrigStream);
186+
187+
// FIXME: Provide ranges in the disabled-PP regions as well.
188+
auto Preprocessed = DirectiveStructure.stripDirectives(OrigStream);
189+
190+
auto ParseableStream = cook(Preprocessed, clang::pseudo::genericLangOpts());
191+
pseudo::pairBrackets(ParseableStream);
192+
193+
std::vector<FoldingRange> Result;
194+
for (const auto &Tok : ParseableStream.tokens()) {
195+
if (auto *Paired = Tok.pair()) {
196+
// Process only token at the start of the range. Avoid ranges on a single
197+
// line.
198+
if (Tok.Line < Paired->Line) {
199+
Position Start = offsetToPosition(
200+
Code,
201+
OrigStream.tokens()[Tok.OriginalIndex].text().data() - Code.data());
202+
Position End = offsetToPosition(
203+
Code, OrigStream.tokens()[Paired->OriginalIndex].text().data() -
204+
Code.data());
205+
FoldingRange FR;
206+
FR.startLine = Start.line;
207+
FR.startCharacter = Start.character + 1;
208+
FR.endLine = End.line;
209+
FR.endCharacter = End.character;
210+
Result.push_back(FR);
211+
}
212+
}
213+
}
214+
return Result;
215+
}
216+
173217
} // namespace clangd
174218
} // namespace clang

clang-tools-extra/clangd/SemanticSelection.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "ParsedAST.h"
1616
#include "Protocol.h"
1717
#include "llvm/Support/Error.h"
18+
#include <string>
1819
#include <vector>
1920
namespace clang {
2021
namespace clangd {
@@ -29,6 +30,11 @@ llvm::Expected<SelectionRange> getSemanticRanges(ParsedAST &AST, Position Pos);
2930
/// This should include large scopes, preprocessor blocks etc.
3031
llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST);
3132

33+
/// Returns a list of ranges whose contents might be collapsible in an editor.
34+
/// This version uses the pseudoparser which does not require the AST.
35+
llvm::Expected<std::vector<FoldingRange>>
36+
getFoldingRanges(const std::string &Code);
37+
3238
} // namespace clangd
3339
} // namespace clang
3440

clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,86 @@ TEST(FoldingRanges, All) {
265265
}
266266
}
267267

268+
TEST(FoldingRangesPseudoParser, All) {
269+
const char *Tests[] = {
270+
R"cpp(
271+
#define FOO int foo() {\
272+
int Variable = 42; \
273+
}
274+
275+
// Do not generate folding range for braces within macro expansion.
276+
FOO
277+
278+
// Do not generate folding range within macro arguments.
279+
#define FUNCTOR(functor) functor
280+
void func() {[[
281+
FUNCTOR([](){});
282+
]]}
283+
284+
// Do not generate folding range with a brace coming from macro.
285+
#define LBRACE {
286+
void bar() LBRACE
287+
int X = 42;
288+
}
289+
)cpp",
290+
R"cpp(
291+
void func() {[[
292+
int Variable = 100;
293+
294+
if (Variable > 5) {[[
295+
Variable += 42;
296+
]]} else if (Variable++)
297+
++Variable;
298+
else {[[
299+
Variable--;
300+
]]}
301+
302+
// Do not generate FoldingRange for empty CompoundStmts.
303+
for (;;) {}
304+
305+
// If there are newlines between {}, we should generate one.
306+
for (;;) {[[
307+
308+
]]}
309+
]]}
310+
)cpp",
311+
R"cpp(
312+
class Foo {[[
313+
public:
314+
Foo() {[[
315+
int X = 1;
316+
]]}
317+
318+
private:
319+
int getBar() {[[
320+
return 42;
321+
]]}
322+
323+
// Braces are located at the same line: no folding range here.
324+
void getFooBar() { }
325+
]]};
326+
)cpp",
327+
R"cpp(
328+
// Range boundaries on escaped newlines.
329+
class Foo \
330+
\
331+
{[[ \
332+
public:
333+
Foo() {[[\
334+
int X = 1;
335+
]]} \
336+
]]};
337+
)cpp",
338+
};
339+
for (const char *Test : Tests) {
340+
auto T = Annotations(Test);
341+
EXPECT_THAT(
342+
gatherFoldingRanges(llvm::cantFail(getFoldingRanges(T.code().str()))),
343+
UnorderedElementsAreArray(T.ranges()))
344+
<< Test;
345+
}
346+
}
347+
268348
} // namespace
269349
} // namespace clangd
270350
} // namespace clang

clang-tools-extra/pseudo/include/clang-pseudo/Token.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ struct Token {
6767
uint8_t Indent = 0;
6868
/// Flags have some meaning defined by the function that produced this stream.
6969
uint8_t Flags = 0;
70+
/// Index into the original token stream (as raw-lexed from the source code).
71+
Index OriginalIndex = Invalid;
7072
// Helpers to get/set Flags based on `enum class`.
7173
template <class T> bool flag(T Mask) const {
7274
return Flags & uint8_t{static_cast<std::underlying_type_t<T>>(Mask)};
@@ -96,7 +98,7 @@ struct Token {
9698
/// If this token is a paired bracket, the offset of the pair in the stream.
9799
int32_t Pair = 0;
98100
};
99-
static_assert(sizeof(Token) <= sizeof(char *) + 20, "Careful with layout!");
101+
static_assert(sizeof(Token) <= sizeof(char *) + 24, "Careful with layout!");
100102
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Token &);
101103

102104
/// A half-open range of tokens within a stream.

clang-tools-extra/pseudo/lib/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,7 @@ add_clang_library(clangPseudo
1717
clangLex
1818
clangPseudoGrammar
1919
)
20+
21+
target_include_directories(clangPseudo INTERFACE
22+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
23+
)

clang-tools-extra/pseudo/lib/Lex.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ TokenStream lex(const std::string &Code, const clang::LangOptions &LangOpts) {
2626

2727
TokenStream Result;
2828
clang::Token CT;
29+
// Index into the token stream of original source code.
30+
Token::Index TokenIndex = 0;
2931
unsigned LastOffset = 0;
3032
unsigned Line = 0;
3133
unsigned Indent = 0;
@@ -66,6 +68,7 @@ TokenStream lex(const std::string &Code, const clang::LangOptions &LangOpts) {
6668
if (CT.needsCleaning() || CT.hasUCN())
6769
Tok.setFlag(LexFlags::NeedsCleaning);
6870

71+
Tok.OriginalIndex = TokenIndex++;
6972
Result.push(Tok);
7073
LastOffset = Offset;
7174
}

clang-tools-extra/pseudo/unittests/TokenTest.cpp

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ MATCHER_P2(lineIndent, Line, Indent, "") {
3131
return arg.Line == (unsigned)Line && arg.Indent == (unsigned)Indent;
3232
}
3333

34+
MATCHER_P(originalIndex, index, "") {
35+
return arg.OriginalIndex == (Token::Index)index;
36+
}
37+
3438
TEST(TokenTest, Lex) {
3539
LangOptions Opts;
3640
std::string Code = R"cpp(
@@ -105,20 +109,23 @@ tokens
105109
Raw.tokens(),
106110
ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier),
107111
hasFlag(LexFlags::StartsPPLine),
108-
hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0)),
112+
hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0),
113+
originalIndex(0)),
109114
AllOf(token("two", tok::raw_identifier),
110115
hasFlag(LexFlags::StartsPPLine),
111-
Not(hasFlag(LexFlags::NeedsCleaning))),
116+
Not(hasFlag(LexFlags::NeedsCleaning)),
117+
originalIndex(1)),
112118
AllOf(token("\\\ntokens", tok::raw_identifier),
113119
Not(hasFlag(LexFlags::StartsPPLine)),
114-
hasFlag(LexFlags::NeedsCleaning))));
120+
hasFlag(LexFlags::NeedsCleaning), originalIndex(2))));
115121

116122
TokenStream Cooked = cook(Raw, Opts);
117123
EXPECT_THAT(
118124
Cooked.tokens(),
119-
ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0)),
120-
token("two", tok::identifier),
121-
token("tokens", tok::identifier)));
125+
ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0),
126+
originalIndex(0)),
127+
AllOf(token("two", tok::identifier), originalIndex(1)),
128+
AllOf(token("tokens", tok::identifier), originalIndex(2))));
122129
}
123130

124131
TEST(TokenTest, EncodedCharacters) {
@@ -182,13 +189,14 @@ TEST(TokenTest, SplitGreaterGreater) {
182189
)cpp";
183190
TokenStream Cook = cook(lex(Code, Opts), Opts);
184191
TokenStream Split = stripComments(Cook);
185-
EXPECT_THAT(Split.tokens(), ElementsAreArray({
186-
token(">", tok::greater),
187-
token(">", tok::greater),
188-
token(">", tok::greater),
189-
token(">", tok::greater),
190-
token(">>=", tok::greatergreaterequal),
191-
}));
192+
EXPECT_THAT(Split.tokens(),
193+
ElementsAre(AllOf(token(">", tok::greater), originalIndex(0)),
194+
AllOf(token(">", tok::greater), originalIndex(0)),
195+
// Token 1 and 2 are comments.
196+
AllOf(token(">", tok::greater), originalIndex(3)),
197+
AllOf(token(">", tok::greater), originalIndex(3)),
198+
AllOf(token(">>=", tok::greatergreaterequal),
199+
originalIndex(4))));
192200
}
193201

194202
TEST(TokenTest, DropComments) {
@@ -199,13 +207,16 @@ TEST(TokenTest, DropComments) {
199207
)cpp";
200208
TokenStream Raw = cook(lex(Code, Opts), Opts);
201209
TokenStream Stripped = stripComments(Raw);
202-
EXPECT_THAT(Raw.tokens(),
203-
ElementsAreArray(
204-
{token("// comment", tok::comment), token("int", tok::kw_int),
205-
token("/*abc*/", tok::comment), token(";", tok::semi)}));
206-
207-
EXPECT_THAT(Stripped.tokens(), ElementsAreArray({token("int", tok::kw_int),
208-
token(";", tok::semi)}));
210+
EXPECT_THAT(
211+
Raw.tokens(),
212+
ElementsAre(AllOf(token("// comment", tok::comment), originalIndex(0)),
213+
AllOf(token("int", tok::kw_int), originalIndex(1)),
214+
AllOf(token("/*abc*/", tok::comment), originalIndex(2)),
215+
AllOf(token(";", tok::semi), originalIndex(3))));
216+
217+
EXPECT_THAT(Stripped.tokens(),
218+
ElementsAre(AllOf(token("int", tok::kw_int), originalIndex(1)),
219+
AllOf(token(";", tok::semi), originalIndex(3))));
209220
}
210221

211222
} // namespace

0 commit comments

Comments
 (0)