Skip to content

Commit 4edf9d8

Browse files
committed
[clang][modules] Move SLocEntry search into ASTReader
In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries. This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%.
1 parent 69074bf commit 4edf9d8

File tree

4 files changed

+75
-67
lines changed

4 files changed

+75
-67
lines changed

clang/include/clang/Basic/SourceManager.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,9 @@ class ExternalSLocEntrySource {
533533
/// entry from being loaded.
534534
virtual bool ReadSLocEntry(int ID) = 0;
535535

536+
/// Get the index ID for the loaded SourceLocation offset.
537+
virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0;
538+
536539
/// Retrieve the module import location and name for the given ID, if
537540
/// in fact it was loaded from a module (rather than, say, a precompiled
538541
/// header).

clang/include/clang/Serialization/ASTReader.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,6 +2153,12 @@ class ASTReader
21532153

21542154
/// Read the source location entry with index ID.
21552155
bool ReadSLocEntry(int ID) override;
2156+
/// Get the index ID for the loaded SourceLocation offset.
2157+
int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override;
2158+
/// Read the offset of the SLocEntry at the given index in the given module
2159+
/// file.
2160+
std::optional<SourceLocation::UIntTy> readSLocOffset(ModuleFile *F,
2161+
unsigned Index);
21562162

21572163
/// Retrieve the module import location and module name for the
21582164
/// given source manager entry ID.

clang/lib/Basic/SourceManager.cpp

Lines changed: 3 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const {
864864
/// This function knows that the SourceLocation is in a loaded buffer, not a
865865
/// local one.
866866
FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const {
867-
if (SLocOffset < CurrentLoadedOffset) {
868-
assert(0 && "Invalid SLocOffset or bad function choice");
869-
return FileID();
870-
}
871-
872-
// Essentially the same as the local case, but the loaded array is sorted
873-
// in the other direction (decreasing order).
874-
// GreaterIndex is the one where the offset is greater, which is actually a
875-
// lower index!
876-
unsigned GreaterIndex = 0;
877-
unsigned LessIndex = LoadedSLocEntryTable.size();
878-
if (LastFileIDLookup.ID < 0) {
879-
// Prune the search space.
880-
int LastID = LastFileIDLookup.ID;
881-
if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset)
882-
GreaterIndex =
883-
(-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache
884-
else
885-
LessIndex = -LastID - 2;
886-
}
887-
888-
// First do a linear scan from the last lookup position, if possible.
889-
unsigned NumProbes;
867+
int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset);
890868
bool Invalid = false;
891-
for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) {
892-
// Make sure the entry is loaded!
893-
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(GreaterIndex, &Invalid);
894-
if (Invalid)
895-
return FileID(); // invalid entry.
896-
if (E.getOffset() <= SLocOffset) {
897-
FileID Res = FileID::get(-int(GreaterIndex) - 2);
898-
LastFileIDLookup = Res;
899-
NumLinearScans += NumProbes + 1;
900-
return Res;
901-
}
902-
}
903-
904-
// Linear scan failed. Do the binary search.
905-
NumProbes = 0;
906-
while (true) {
907-
++NumProbes;
908-
unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
909-
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex, &Invalid);
910-
if (Invalid)
911-
return FileID(); // invalid entry.
912-
913-
if (E.getOffset() > SLocOffset) {
914-
if (GreaterIndex == MiddleIndex) {
915-
assert(0 && "binary search missed the entry");
916-
return FileID();
917-
}
918-
GreaterIndex = MiddleIndex;
919-
continue;
920-
}
921-
922-
if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) {
923-
FileID Res = FileID::get(-int(MiddleIndex) - 2);
924-
LastFileIDLookup = Res;
925-
NumBinaryProbes += NumProbes;
926-
return Res;
927-
}
928-
929-
if (LessIndex == MiddleIndex) {
930-
assert(0 && "binary search missed the entry");
931-
return FileID();
932-
}
933-
LessIndex = MiddleIndex;
934-
}
869+
(void)getLoadedSLocEntryByID(ID, &Invalid);
870+
return Invalid ? FileID() : FileID::get(ID);
935871
}
936872

937873
SourceLocation SourceManager::

clang/lib/Serialization/ASTReader.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,69 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
14441444
}
14451445
}
14461446

1447+
std::optional<SourceLocation::UIntTy>
1448+
ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) {
1449+
BitstreamCursor &Cursor = F->SLocEntryCursor;
1450+
SavedStreamPosition SavedPosition(Cursor);
1451+
if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase +
1452+
F->SLocEntryOffsets[Index])) {
1453+
Error(std::move(Err));
1454+
return std::nullopt;
1455+
}
1456+
1457+
Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
1458+
if (!MaybeEntry) {
1459+
Error(MaybeEntry.takeError());
1460+
return std::nullopt;
1461+
}
1462+
llvm::BitstreamEntry Entry = MaybeEntry.get();
1463+
1464+
if (Entry.Kind != llvm::BitstreamEntry::Record) {
1465+
Error("incorrectly-formatted source location entry in AST file");
1466+
return std::nullopt;
1467+
}
1468+
1469+
RecordData Record;
1470+
StringRef Blob;
1471+
Expected<unsigned> MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob);
1472+
if (!MaybeSLOC) {
1473+
Error(MaybeSLOC.takeError());
1474+
return std::nullopt;
1475+
}
1476+
switch (MaybeSLOC.get()) {
1477+
default:
1478+
Error("incorrectly-formatted source location entry in AST file");
1479+
return std::nullopt;
1480+
case SM_SLOC_FILE_ENTRY:
1481+
case SM_SLOC_BUFFER_ENTRY:
1482+
case SM_SLOC_EXPANSION_ENTRY:
1483+
return F->SLocEntryBaseOffset + Record[0];
1484+
}
1485+
}
1486+
1487+
int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) {
1488+
auto SLocMapI =
1489+
GlobalSLocOffsetMap.find(SourceManager::MaxLoadedOffset - SLocOffset - 1);
1490+
assert(SLocMapI != GlobalSLocOffsetMap.end() &&
1491+
"Corrupted global sloc offset map");
1492+
ModuleFile *F = SLocMapI->second;
1493+
1494+
std::vector<unsigned> Indices(F->LocalNumSLocEntries);
1495+
for (unsigned I = 0; I != F->LocalNumSLocEntries; ++I)
1496+
Indices[I] = I;
1497+
1498+
auto It = llvm::upper_bound(Indices, SLocOffset,
1499+
[&](SourceLocation::UIntTy Offset, unsigned Index) {
1500+
auto EntryOffset = readSLocOffset(F, Index);
1501+
assert(EntryOffset && "Corrupted AST file");
1502+
return Offset < *EntryOffset;
1503+
});
1504+
// The iterator points to the first entry with start offset greater than the
1505+
// offset of interest. The previous entry must contain the offset of interest.
1506+
It = std::prev(It);
1507+
return F->SLocEntryBaseID + *It;
1508+
}
1509+
14471510
bool ASTReader::ReadSLocEntry(int ID) {
14481511
if (ID == 0)
14491512
return false;

0 commit comments

Comments
 (0)