Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
7b963f7
feat(libstore): make cycle errors more verbose
lovesegfault Oct 11, 2025
95305ef
refactor(libstore/find-cycles): use RefScanSink
lovesegfault Oct 11, 2025
21f7fc9
perf(libstore/find-cycles): avoid copying StringSet on every chunk
lovesegfault Oct 11, 2025
df97582
refactor(libstore/find-cycles): remove unused hashPathMap parameter
lovesegfault Oct 11, 2025
c0ea0f8
refactor(libstore/find-cycles): make edges member private
lovesegfault Oct 11, 2025
f0b8a6c
fix(libstore/find-cycles): use readFull for robust file reading
lovesegfault Oct 11, 2025
35146e3
refactor(libstore/find-cycles): remove unused refLength constant
lovesegfault Oct 11, 2025
893c072
style(libstore/find-cycles): use initializer list for edge construction
lovesegfault Oct 11, 2025
3c4541f
fix(libstore/find-cycles): add second pass to merge multiedges
lovesegfault Oct 11, 2025
4d66e0b
refactor(libstore/find-cycles): use portable std::filesystem and read…
lovesegfault Oct 11, 2025
fe1e770
style(libstore/find-cycles): modernize type aliases (typedef → using)
lovesegfault Oct 11, 2025
72f1520
refactor(libstore/find-cycles): add explicit include for caseHackSuffix
lovesegfault Oct 11, 2025
9421c42
refactor(libstore/find-cycles): use std::filesystem::path operator/
lovesegfault Oct 11, 2025
c042201
refactor(libstore/find-cycles): rename scanForCycleEdges2 → walkAndSc…
lovesegfault Oct 11, 2025
5d2feca
refactor(libstore/find-cycles): use std::filesystem::path parameter
lovesegfault Oct 11, 2025
a49176a
refactor(libstore/find-cycles): conditionally include archive.hh on m…
lovesegfault Oct 11, 2025
01be765
fix(libstore/find-cycles): install header in public include directory
lovesegfault Oct 12, 2025
bbb33b4
test(libstore): add parametrized tests for transformEdgesToMultiedges
lovesegfault Oct 12, 2025
baad165
perf(libstore/find-cycles): optimize transformEdgesToMultiedges to O(n)
lovesegfault Oct 12, 2025
a880a6c
test(functional): make multiple-outputs cycle test backward-compatible
lovesegfault Oct 12, 2025
6c39e5e
refactor(libstore/find-cycles): use SourceAccessor for filesystem access
lovesegfault Oct 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 146 additions & 0 deletions src/libstore-tests/find-cycles.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#include "nix/store/build/find-cycles.hh"

#include <gtest/gtest.h>
#include <algorithm>

namespace nix {

/**
* Parameters for transformEdgesToMultiedges tests
*/
struct TransformEdgesParams
{
std::string description;
std::vector<std::vector<std::string>> inputEdges;
std::vector<std::vector<std::string>> expectedOutput;

friend std::ostream & operator<<(std::ostream & os, const TransformEdgesParams & params)
{
os << "Test: " << params.description << "\n";
os << "Input edges (" << params.inputEdges.size() << "):\n";
for (const auto & edge : params.inputEdges) {
os << " ";
for (size_t i = 0; i < edge.size(); ++i) {
if (i > 0)
os << " -> ";
os << edge[i];
}
os << "\n";
}
os << "Expected output (" << params.expectedOutput.size() << "):\n";
for (const auto & multiedge : params.expectedOutput) {
os << " ";
for (size_t i = 0; i < multiedge.size(); ++i) {
if (i > 0)
os << " -> ";
os << multiedge[i];
}
os << "\n";
}
return os;
}
};

class TransformEdgesToMultiedgesTest : public ::testing::TestWithParam<TransformEdgesParams>
{};

namespace {
// Helper to convert vector<vector<string>> to StoreCycleEdgeVec
StoreCycleEdgeVec toStoreCycleEdgeVec(const std::vector<std::vector<std::string>> & edges)
{
StoreCycleEdgeVec result;
result.reserve(edges.size());
for (const auto & edge : edges) {
result.emplace_back(edge.begin(), edge.end());
}
return result;
}

// Comparator for sorting multiedges deterministically
bool compareMultiedges(const StoreCycleEdge & a, const StoreCycleEdge & b)
{
if (a.size() != b.size())
return a.size() < b.size();
return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
}
} // namespace

TEST_P(TransformEdgesToMultiedgesTest, TransformEdges)
{
const auto & params = GetParam();

auto inputEdges = toStoreCycleEdgeVec(params.inputEdges);
StoreCycleEdgeVec actualOutput;
transformEdgesToMultiedges(inputEdges, actualOutput);

auto expectedOutput = toStoreCycleEdgeVec(params.expectedOutput);

ASSERT_EQ(actualOutput.size(), expectedOutput.size()) << "Number of multiedges doesn't match expected";

// Sort both for comparison (order may vary, but content should match)
std::sort(actualOutput.begin(), actualOutput.end(), compareMultiedges);
std::sort(expectedOutput.begin(), expectedOutput.end(), compareMultiedges);

// Compare each multiedge
EXPECT_EQ(actualOutput, expectedOutput);
}

INSTANTIATE_TEST_CASE_P(
FindCycles,
TransformEdgesToMultiedgesTest,
::testing::Values(
// Empty input
TransformEdgesParams{"empty input", {}, {}},

// Single edge - no joining possible
TransformEdgesParams{"single edge", {{"a", "b"}}, {{"a", "b"}}},

// Two edges that connect (append case: A->B, B->C becomes A->B->C)
TransformEdgesParams{"two edges connecting via append", {{"a", "b"}, {"b", "c"}}, {{"a", "b", "c"}}},

// Two edges that connect (prepend case: B->C, A->B becomes A->B->C)
TransformEdgesParams{"two edges connecting via prepend", {{"b", "c"}, {"a", "b"}}, {{"a", "b", "c"}}},

// Complete cycle (A->B, B->C, C->A becomes A->B->C->A)
TransformEdgesParams{"complete cycle", {{"a", "b"}, {"b", "c"}, {"c", "a"}}, {{"a", "b", "c", "a"}}},

// Two disjoint edges - no joining
TransformEdgesParams{"disjoint edges", {{"a", "b"}, {"c", "d"}}, {{"a", "b"}, {"c", "d"}}},

// Chain of multiple edges (A->B, B->C, C->D, D->E)
TransformEdgesParams{
"chain of edges", {{"a", "b"}, {"b", "c"}, {"c", "d"}, {"d", "e"}}, {{"a", "b", "c", "d", "e"}}},

// Multiple disjoint cycles
TransformEdgesParams{
"multiple disjoint cycles",
{{"a", "b"}, {"b", "a"}, {"c", "d"}, {"d", "c"}},
{{"a", "b", "a"}, {"c", "d", "c"}}},

// Complex graph requiring multiple merge passes
// First pass: (A->B, B->C) -> (A->B->C)
// Then: (A->B->C, C->D) -> (A->B->C->D)
// Then: (D->A, A->B->C->D) -> (A->B->C->D->A)
TransformEdgesParams{
"complex requiring multiple passes",
{{"a", "b"}, {"b", "c"}, {"c", "d"}, {"d", "a"}},
{{"a", "b", "c", "d", "a"}}},

// Y-shaped graph (A->B, B->C, B->D)
// B->C and B->D can't connect, but A->B can prepend to B->C and A->B can prepend to B->D
// However, once A->B joins with B->C to form A->B->C, the original A->B is consumed
// So we should get A->B->C and B->D (or A->B->D and B->C depending on order)
TransformEdgesParams{"Y-shaped graph", {{"a", "b"}, {"b", "c"}, {"b", "d"}}, {{"a", "b", "c"}, {"b", "d"}}},

// Edge with longer path (multi-hop edge)
TransformEdgesParams{
"edge with multiple hops", {{"a", "x", "y", "b"}, {"b", "c"}}, {{"a", "x", "y", "b", "c"}}},

// Self-loop edge
TransformEdgesParams{"self-loop", {{"a", "a"}}, {{"a", "a"}}},

// Reverse order joining (tests prepend logic thoroughly)
TransformEdgesParams{
"reverse order joining", {{"d", "e"}, {"c", "d"}, {"b", "c"}, {"a", "b"}}, {{"a", "b", "c", "d", "e"}}}));

} // namespace nix
1 change: 1 addition & 0 deletions src/libstore-tests/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ sources = files(
'derived-path.cc',
'downstream-placeholder.cc',
'dummy-store.cc',
'find-cycles.cc',
'http-binary-cache-store.cc',
'legacy-ssh-store.cc',
'local-binary-cache-store.cc',
Expand Down
222 changes: 222 additions & 0 deletions src/libstore/build/find-cycles.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
#include "nix/store/build/find-cycles.hh"

#include "nix/store/store-api.hh"
#include "nix/util/source-accessor.hh"

#include <algorithm>
#include <filesystem>
#include <map>

namespace nix {

CycleEdgeScanSink::CycleEdgeScanSink(StringSet && hashes, std::string storeDir)
: RefScanSink(std::move(hashes))
, storeDir(std::move(storeDir))
{
}

void CycleEdgeScanSink::setCurrentPath(const std::string & path)
{
currentFilePath = path;
// Clear tracking for new file
recordedForCurrentFile.clear();
}

void CycleEdgeScanSink::operator()(std::string_view data)
{
// Call parent's operator() to do the actual hash searching
// This reuses all the proven buffer boundary handling logic
RefScanSink::operator()(data);

// Check which hashes have been found and not yet recorded for this file
// getResult() returns the set of ALL hashes found so far
for (const auto & hash : getResult()) {
if (recordedForCurrentFile.insert(hash).second) {
// This hash was just found and not yet recorded for current file
// Create an edge from current file to the target
auto targetPath = storeDir + hash;

edges.push_back({currentFilePath, targetPath});

debug("found cycle edge: %s → %s (hash: %s)", currentFilePath, targetPath, hash);
}
}
}

StoreCycleEdgeVec && CycleEdgeScanSink::getEdges()
{
return std::move(edges);
}

void scanForCycleEdges(const Path & path, const StorePathSet & refs, StoreCycleEdgeVec & edges)
{
StringSet hashes;

// Extract the store directory from the path
// Example: /run/user/1000/nix-test/store/abc-foo -> /run/user/1000/nix-test/store/
auto storePrefixPath = std::filesystem::path(path);
storePrefixPath.remove_filename();
std::string storePrefix = storePrefixPath.string();

debug("scanForCycleEdges: storePrefixPath = %s", storePrefixPath.string());
debug("scanForCycleEdges: storePrefix = %s", storePrefix);

// Collect hashes to search for
for (auto & i : refs) {
hashes.insert(std::string(i.hashPart()));
}

// Create sink that reuses RefScanSink's hash-finding logic
CycleEdgeScanSink sink(std::move(hashes), storePrefix);

// Get filesystem accessor and walk the tree
auto accessor = getFSSourceAccessor();
walkAndScanPath(*accessor, CanonPath(path), path, sink);

// Extract the found edges
edges = sink.getEdges();
}

/**
* Recursively walk filesystem and stream files into the sink.
* This reuses RefScanSink's hash-finding logic instead of reimplementing it.
*/
void walkAndScanPath(
SourceAccessor & accessor, const CanonPath & path, const std::string & displayPath, CycleEdgeScanSink & sink)
{
auto stat = accessor.lstat(path);

debug("walkAndScanPath: scanning path = %s", displayPath);

switch (stat.type) {
case SourceAccessor::tRegular: {
// Handle regular files - stream contents into sink
sink.setCurrentPath(displayPath);
accessor.readFile(path, sink);
break;
}

case SourceAccessor::tDirectory: {
// Handle directories - recursively scan contents
auto entries = accessor.readDirectory(path);
for (const auto & [name, entryType] : entries) {
auto childPath = path / name;
auto childDisplayPath = displayPath + "/" + name;
debug("walkAndScanPath: recursing into %s", childDisplayPath);
walkAndScanPath(accessor, childPath, childDisplayPath, sink);
}
break;
}

case SourceAccessor::tSymlink: {
// Handle symlinks - stream link target into sink
auto linkTarget = accessor.readLink(path);

debug("walkAndScanPath: scanning symlink %s -> %s", displayPath, linkTarget);

sink.setCurrentPath(displayPath);
sink(std::string_view(linkTarget));
break;
}

case SourceAccessor::tChar:
case SourceAccessor::tBlock:
case SourceAccessor::tSocket:
case SourceAccessor::tFifo:
case SourceAccessor::tUnknown:
default:
throw Error("file '%1%' has an unsupported type", displayPath);
}
}

void transformEdgesToMultiedges(StoreCycleEdgeVec & edges, StoreCycleEdgeVec & multiedges)
{
debug("transformEdgesToMultiedges: processing %lu edges", edges.size());

// Maps to track path endpoints for efficient joining
// Key: node name, Value: index into multiedges vector
std::map<std::string, size_t> pathStartingAt; // Maps start node -> path index
std::map<std::string, size_t> pathEndingAt; // Maps end node -> path index

for (auto & edge : edges) {
if (edge.empty())
continue;

const std::string & edgeStart = edge.front();
const std::string & edgeEnd = edge.back();

// Check if this edge can connect to existing paths
auto startIt = pathEndingAt.find(edgeStart);
auto endIt = pathStartingAt.find(edgeEnd);

bool canPrepend = (startIt != pathEndingAt.end());
bool canAppend = (endIt != pathStartingAt.end());

if (canPrepend && canAppend && startIt->second == endIt->second) {
// Edge connects a path to itself - append it to form a cycle
size_t pathIdx = startIt->second;
auto & path = multiedges[pathIdx];
// Append all but first element of edge (first element is duplicate)
path.insert(path.end(), std::next(edge.begin()), edge.end());
// Update the end point (start point stays the same for a cycle)
pathEndingAt.erase(startIt);
pathEndingAt[edgeEnd] = pathIdx;
} else if (canPrepend && canAppend) {
// Edge joins two different paths - merge them
size_t prependIdx = startIt->second;
size_t appendIdx = endIt->second;
auto & prependPath = multiedges[prependIdx];
auto & appendPath = multiedges[appendIdx];

// Save endpoint before modifying appendPath
const std::string appendPathEnd = appendPath.back();
const std::string appendPathStart = appendPath.front();

// Append edge (without first element) to prependPath
prependPath.insert(prependPath.end(), std::next(edge.begin()), edge.end());
// Append appendPath (without first element) to prependPath
prependPath.insert(prependPath.end(), std::next(appendPath.begin()), appendPath.end());

// Update maps: prependPath now ends where appendPath ended
pathEndingAt.erase(startIt);
pathEndingAt[appendPathEnd] = prependIdx;
pathStartingAt.erase(appendPathStart);

// Mark appendPath for removal by clearing it
appendPath.clear();
} else if (canPrepend) {
// Edge extends an existing path at its end
size_t pathIdx = startIt->second;
auto & path = multiedges[pathIdx];
// Append all but first element of edge (first element is duplicate)
path.insert(path.end(), std::next(edge.begin()), edge.end());
// Update the end point
pathEndingAt.erase(startIt);
pathEndingAt[edgeEnd] = pathIdx;
} else if (canAppend) {
// Edge extends an existing path at its start
size_t pathIdx = endIt->second;
auto & path = multiedges[pathIdx];
// Prepend all but last element of edge (last element is duplicate)
path.insert(path.begin(), edge.begin(), std::prev(edge.end()));
// Update the start point
pathStartingAt.erase(endIt);
pathStartingAt[edgeStart] = pathIdx;
} else {
// Edge doesn't connect to anything - start a new path
size_t newIdx = multiedges.size();
multiedges.push_back(edge);
pathStartingAt[edgeStart] = newIdx;
pathEndingAt[edgeEnd] = newIdx;
}
}

// Remove empty paths (those that were merged into others)
multiedges.erase(
std::remove_if(multiedges.begin(), multiedges.end(), [](const StoreCycleEdge & p) { return p.empty(); }),
multiedges.end());

debug("transformEdgesToMultiedges: result has %lu multiedges", multiedges.size());
}

} // namespace nix
Loading
Loading