diff --git a/.github/workflows/test-git-annex.yml b/.github/workflows/test-git-annex.yml new file mode 100644 index 0000000000000..67b8a3eccda37 --- /dev/null +++ b/.github/workflows/test-git-annex.yml @@ -0,0 +1,32 @@ +name: Test Git Annex + # Here we just test the git-annex feature, leaving + # the full test suite for upstream to watch over. + +on: + push: + workflow_call: # so release.yml can depend on this (https://stackoverflow.com/a/71489231) + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install toolchain + run: | + sudo apt-get update && sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make git git-annex + # per README.md, building needs Go 1.17 and Node LTS + - uses: actions/setup-go@v2 + with: + go-version: '^1.17' # The Go version to download (if necessary) and use. + - uses: actions/setup-node@v2 + with: + node-version: 'lts/*' + + - name: Build ./gitea + run: | + TAGS="bindata sqlite sqlite_unlock_notify" make backend + + - name: Test + run: | + make 'test-sqlite#TestGitAnnex' diff --git a/integrations/api_helper_for_declarative_test.go b/integrations/api_helper_for_declarative_test.go index 181a6469467cd..3851c1b762118 100644 --- a/integrations/api_helper_for_declarative_test.go +++ b/integrations/api_helper_for_declarative_test.go @@ -14,6 +14,8 @@ import ( "testing" "time" + "github.com/google/uuid" + "code.gitea.io/gitea/models/perm" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/json" @@ -462,3 +464,23 @@ func doAPIAddRepoToOrganizationTeam(ctx APITestContext, teamID int64, orgName, r ctx.Session.MakeRequest(t, req, http.StatusNoContent) } } + +// generate and activate an ssh key for the user attached to the APITestContext +// TODO: pick a better name; golang doesn't do method overriding. +func withCtxKeyFile(t *testing.T, ctx APITestContext, callback func()) { + keyName := "One of " + ctx.Username + "'s keys: #" + uuid.New().String() + withKeyFile(t, keyName, func(keyFile string) { + + var key api.PublicKey + + doAPICreateUserKey(ctx, keyName, keyFile, + func(t *testing.T, _key api.PublicKey) { + // save the key ID so we can delete it at the end + key = _key + })(t) + + defer doAPIDeleteUserKey(ctx, key.ID)(t) + + callback() + }) +} diff --git a/integrations/git_annex_test.go b/integrations/git_annex_test.go new file mode 100644 index 0000000000000..26becd6595cec --- /dev/null +++ b/integrations/git_annex_test.go @@ -0,0 +1,787 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package integrations + +import ( + "github.com/stretchr/testify/require" + "testing" + + "errors" + "fmt" + "math/rand" + "net/url" + "os" + "path" + "regexp" + "strings" + + "code.gitea.io/gitea/models/perm" + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/setting" + api "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/util" +) + +// Some guidelines: +// +// * a APITestContext is an awkward union of session credential + username + target repo +// which is assumed to be owned by that username; if you want to target a different +// repo, you need to edit its .Reponame or just ignore it and write "username/reponame.git" + + +/* + Test that permissions are enforced on git-annex-shell commands. + + Along the way, test that uploading, downloading, and deleting all work. +*/ +func TestGitAnnexPermissions(t *testing.T) { + /* + // TODO: look into how LFS did this + if !setting.Annex.Enabled { + t.Skip() + } + */ + + + // Each case below is split so that 'clone' is done as + // the repo owner, but 'copy' as the user under test. + // + // Otherwise, in cases where permissions block the + // initial 'clone', the test would simply end there + // and never verify if permissions apply properly to + // 'annex copy' -- potentially leaving a security gap. + + onGiteaRun(t, func(t *testing.T, u *url.URL) { + + t.Run("Public", func(t *testing.T) { + defer PrintCurrentTest(t)() + + // create a public repo + ownerCtx := NewAPITestContext(t, "user2", "annex-public") + doAPICreateRepository(ownerCtx, false)(t) + private := false // this API takes pointers, so we need a variable + doAPIEditRepository(ownerCtx, &api.EditRepoOption{Private: &private})(t) + + // double-check it's public + repo, err := repo_model.GetRepositoryByOwnerAndName(ownerCtx.Username, ownerCtx.Reponame) + require.NoError(t, err) + require.False(t, repo.IsPrivate) + + // Remote addresses of the repo + repoURL := createSSHUrl(ownerCtx.GitPath(), u) // remote git URL + remoteRepoPath := path.Join(setting.RepoRootPath, ownerCtx.GitPath()) // path on disk -- which can be examined directly because we're testing from localhost + + // Fill in fixture data + withAnnexCtxKeyFile(t, ownerCtx, func() { + err = doInitRemoteAnnexRepository(t, repoURL) + require.NoError(t, err, "git-annex repository should have been initialized") + }) + + // Different sessions, so we can test different permissions. + // We leave Reponame blank because we don't actually then later add it according to each case if needed + // + // NB: these usernames need to match appropriate entries in models/fixtures/user.yml + writerCtx := NewAPITestContext(t, "user5", "") + readerCtx := NewAPITestContext(t, "user4", "") + outsiderCtx := NewAPITestContext(t, "user8", "") // a user with no specific access + + // set up collaborators + doAPIAddCollaborator(ownerCtx, readerCtx.Username, perm.AccessModeRead)(t) + doAPIAddCollaborator(ownerCtx, writerCtx.Username, perm.AccessModeWrite)(t) + + // tests + t.Run("Owner", func(t *testing.T) { + defer PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer PrintCurrentTest(t)() + + repoPath, err := os.MkdirTemp("", ownerCtx.Reponame) + require.NoError(t, err) + defer util.RemoveAll(repoPath) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexUploadTest(remoteRepoPath, repoPath)) + }) + }) + }) + }) + + t.Run("Writer", func(t *testing.T) { + defer PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer PrintCurrentTest(t)() + + repoPath, err := os.MkdirTemp("", ownerCtx.Reponame) + require.NoError(t, err) + defer util.RemoveAll(repoPath) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, writerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexUploadTest(remoteRepoPath, repoPath)) + }) + }) + }) + }) + + t.Run("Reader", func(t *testing.T) { + defer PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer PrintCurrentTest(t)() + + repoPath, err := os.MkdirTemp("", ownerCtx.Reponame) + require.NoError(t, err) + defer util.RemoveAll(repoPath) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, readerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.Error(t, doAnnexUploadTest(remoteRepoPath, repoPath), "Uploading should fail due to permissions") + }) + }) + }) + }) + + t.Run("Outsider", func(t *testing.T) { + defer PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer PrintCurrentTest(t)() + + repoPath, err := os.MkdirTemp("", ownerCtx.Reponame) + require.NoError(t, err) + defer util.RemoveAll(repoPath) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, outsiderCtx, func() { + t.Run("Init", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.Error(t, doAnnexUploadTest(remoteRepoPath, repoPath), "Uploading should fail due to permissions") + }) + }) + }) + }) + + t.Run("Delete", func(t *testing.T) { + defer PrintCurrentTest(t)() + + // Delete the repo, make sure it's fully gone + doAPIDeleteRepository(ownerCtx)(t) + _, stat_err := os.Stat(remoteRepoPath) + require.True(t, os.IsNotExist(stat_err), "Remote annex repo should be removed from disk") + }) + }) + + t.Run("Private", func(t *testing.T) { + defer PrintCurrentTest(t)() + + // create a private repo + ownerCtx := NewAPITestContext(t, "user2", "annex-private") + doAPICreateRepository(ownerCtx, false)(t) + + // double-check it's private + repo, err := repo_model.GetRepositoryByOwnerAndName(ownerCtx.Username, ownerCtx.Reponame) + require.NoError(t, err) + require.True(t, repo.IsPrivate) + + // Remote addresses of the repo + repoURL := createSSHUrl(ownerCtx.GitPath(), u) // remote git URL + remoteRepoPath := path.Join(setting.RepoRootPath, ownerCtx.GitPath()) // path on disk -- which can be examined directly because we're testing from localhost + + // Fill in fixture data + withAnnexCtxKeyFile(t, ownerCtx, func() { + err = doInitRemoteAnnexRepository(t, repoURL) + require.NoError(t, err, "git-annex repository should have been initialized") + }) + + // Different sessions, so we can test different permissions. + // We leave Reponame blank because we don't actually then later add it according to each case if needed + // + // NB: these usernames need to match appropriate entries in models/fixtures/user.yml + writerCtx := NewAPITestContext(t, "user5", "") + readerCtx := NewAPITestContext(t, "user4", "") + outsiderCtx := NewAPITestContext(t, "user8", "") // a user with no specific access + // Note: there's also full anonymous access, which is only available for public HTTP repos; + // it should behave the same as 'outsider' but we (will) test it separately below anyway + + // set up collaborators + doAPIAddCollaborator(ownerCtx, readerCtx.Username, perm.AccessModeRead)(t) + doAPIAddCollaborator(ownerCtx, writerCtx.Username, perm.AccessModeWrite)(t) + + // tests + t.Run("Owner", func(t *testing.T) { + defer PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer PrintCurrentTest(t)() + + repoPath, err := os.MkdirTemp("", ownerCtx.Reponame) + require.NoError(t, err) + defer util.RemoveAll(repoPath) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexUploadTest(remoteRepoPath, repoPath)) + }) + }) + }) + }) + + t.Run("Writer", func(t *testing.T) { + defer PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer PrintCurrentTest(t)() + + repoPath, err := os.MkdirTemp("", ownerCtx.Reponame) + require.NoError(t, err) + defer util.RemoveAll(repoPath) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, writerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexUploadTest(remoteRepoPath, repoPath)) + }) + }) + }) + }) + + t.Run("Reader", func(t *testing.T) { + defer PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer PrintCurrentTest(t)() + + repoPath, err := os.MkdirTemp("", ownerCtx.Reponame) + require.NoError(t, err) + defer util.RemoveAll(repoPath) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, readerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.Error(t, doAnnexUploadTest(remoteRepoPath, repoPath), "Uploading should fail due to permissions") + }) + }) + }) + }) + + t.Run("Outsider", func(t *testing.T) { + defer PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer PrintCurrentTest(t)() + + repoPath, err := os.MkdirTemp("", ownerCtx.Reponame) + require.NoError(t, err) + defer util.RemoveAll(repoPath) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, outsiderCtx, func() { + t.Run("Init", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.Error(t, doAnnexInitTest(remoteRepoPath, repoPath), "annex init should fail due to permissions") + }) + + t.Run("Download", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.Error(t, doAnnexDownloadTest(remoteRepoPath, repoPath), "annex copy --from should fail due to permissions") + }) + + t.Run("Upload", func(t *testing.T) { + defer PrintCurrentTest(t)() + + require.Error(t, doAnnexUploadTest(remoteRepoPath, repoPath), "annex copy --to should fail due to permissions") + }) + }) + }) + }) + + t.Run("Delete", func(t *testing.T) { + defer PrintCurrentTest(t)() + + // Delete the repo, make sure it's fully gone + doAPIDeleteRepository(ownerCtx)(t) + _, stat_err := os.Stat(remoteRepoPath) + require.True(t, os.IsNotExist(stat_err), "Remote annex repo should be removed from disk") + }) + }) + }) +} + +/* test that 'git annex init' works + +precondition: repoPath contains a pre-cloned git repo with an annex: a valid git-annex branch, + and a file 'large.bin' in its origin's annex. See doInitAnnexRepository(). + +*/ +func doAnnexInitTest(remoteRepoPath string, repoPath string) (err error) { + _, _, err = git.NewCommand(git.DefaultContext, "annex", "init").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + // - method 0: 'git config remote.origin.annex-uuid'. + // Demonstrates that 'git annex init' successfully contacted + // the remote git-annex and was able to learn its ID number. + readAnnexUUID, _, err := git.NewCommand(git.DefaultContext, "config", "remote.origin.annex-uuid").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + readAnnexUUID = strings.TrimSpace(readAnnexUUID) + + match := regexp.MustCompile("^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}$").MatchString(readAnnexUUID) + if !match { + return errors.New(fmt.Sprintf("'git config remote.origin.annex-uuid' should have been able to download the remote's uuid; but instead read '%s'.", readAnnexUUID)) + } + + remoteAnnexUUID, _, err := git.NewCommand(git.DefaultContext, "config", "annex.uuid").RunStdString(&git.RunOpts{Dir: remoteRepoPath}) + if err != nil { + return err + } + + remoteAnnexUUID = strings.TrimSpace(remoteAnnexUUID) + match = regexp.MustCompile("^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}$").MatchString(remoteAnnexUUID) + if !match { + return errors.New(fmt.Sprintf("'git annex init' should have been able to download the remote's uuid; but instead read '%s'.", remoteAnnexUUID)) + } + + if readAnnexUUID != remoteAnnexUUID { + return errors.New(fmt.Sprintf("'git annex init' should have read the expected annex UUID '%s', but instead got '%s'", remoteAnnexUUID, readAnnexUUID)) + } + + // - method 1: 'git annex whereis'. + // Demonstrates that git-annex understands the annexed file can be found in the remote annex. + annexWhereis, _, err := git.NewCommand(git.DefaultContext, "annex", "whereis", "large.bin").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + // Note: this regex is unanchored because 'whereis' outputs multiple lines containing + // headers and 1+ remotes and we just want to find one of them. + match = regexp.MustCompile(regexp.QuoteMeta(remoteAnnexUUID) + " -- .* \\[origin\\]\n").MatchString(annexWhereis) + if !match { + return errors.New("'git annex whereis' should report large.bin is known to be in [origin]") + } + + return nil +} + +func doAnnexDownloadTest(remoteRepoPath string, repoPath string) (err error) { + // NB: this test does something slightly different if run separately from "doAnnexInitTest()": + // "git annex copy" will notice and run "git annex init", silently. + // This shouldn't change any results, but be aware in case it does. + + _, _, err = git.NewCommand(git.DefaultContext, "annex", "copy", "--from", "origin").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + // verify the file was downloaded + localObjectPath, err := annexObjectPath(repoPath, "large.bin") + if err != nil { + return err + } + //localObjectPath := path.Join(repoPath, "large.bin") // or, just compare against the checked-out file + + remoteObjectPath, err := annexObjectPath(remoteRepoPath, "large.bin") + if err != nil { + return err + } + + match, err := util.FileCmp(localObjectPath, remoteObjectPath, 0) + if err != nil { + return err + } + if !match { + return errors.New("Annexed files should be the same") + } + + return nil +} + +func doAnnexUploadTest(remoteRepoPath string, repoPath string) (err error) { + // NB: this test does something slightly different if run separately from "Init": + // it first runs "git annex init" silently in the background. + // This shouldn't change any results, but be aware in case it does. + + err = generateRandomFile(1024*1024/4, path.Join(repoPath, "contribution.bin")) + if err != nil { + return err + } + + err = git.AddChanges(repoPath, false, ".") + if err != nil { + return err + } + + err = git.CommitChanges(repoPath, git.CommitChangesOptions{Message: "Annex another file"}) + if err != nil { + return err + } + + _, _, err = git.NewCommand(git.DefaultContext, "annex", "copy", "--to", "origin").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + _, _, err = git.NewCommand(git.DefaultContext, "annex", "sync", "--no-content").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + // verify the file was uploaded + localObjectPath, err := annexObjectPath(repoPath, "contribution.bin") + if err != nil { + return err + } + //localObjectPath := path.Join(repoPath, "contribution.bin") // or, just compare against the checked-out file + + remoteObjectPath, err := annexObjectPath(remoteRepoPath, "contribution.bin") + if err != nil { + return err + } + + match, err := util.FileCmp(localObjectPath, remoteObjectPath, 0) + if err != nil { + return err + } + if !match { + return errors.New("Annexed files should be the same") + } + + return nil +} + +// ---- Helpers ---- + +func generateRandomFile(size int, path string) (err error) { + // Generate random file + + // XXX TODO: maybe this should not be random, but instead a predictable pattern, so that the test is deterministic + bufSize := 4 * 1024 + if bufSize > size { + bufSize = size + } + + buffer := make([]byte, bufSize) + + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + + written := 0 + for written < size { + n := size - written + if n > bufSize { + n = bufSize + } + _, err := rand.Read(buffer[:n]) + if err != nil { + return err + } + n, err = f.Write(buffer[:n]) + if err != nil { + return err + } + written += n + } + if err != nil { + return err + } + + return nil +} + +// ---- Annex-specific helpers ---- + +/* Initialize a repo with some baseline annexed and non-annexed files. + +TODO: this could be replaced with a fixture repo; +see integrations/gitea-repositories-meta/ and models/fixtures/repository.yml. + +However we reuse this template for -different- repos. +*/ +func doInitAnnexRepository(repoPath string) error { + // set up what files should be annexed + // in this case, all *.bin files will be annexed + // without this, git-annex's default config annexes every file larger than some number of megabytes + f, err := os.Create(path.Join(repoPath, ".gitattributes")) + if err != nil { + return err + } + + _, err = f.WriteString("* annex.largefiles=nothing") + if err != nil { + return err + } + _, err = f.WriteString("*.bin filter=annex annex.largefiles=anything") + if err != nil { + return err + } + f.Close() + + err = git.AddChanges(repoPath, false, ".") + if err != nil { + return err + } + err = git.CommitChanges(repoPath, git.CommitChangesOptions{Message: "Configure git-annex settings"}) + if err != nil { + return err + } + + // 'git annex init' + // 'gitea-annex-test' is there to avoid the nuisance comment getting stored. + err = git.NewCommand(git.DefaultContext, "annex", "init", "gitea-annex-test").Run(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + // add a file to the annex + err = generateRandomFile(1024*1024/4, path.Join(repoPath, "large.bin")) + if err != nil { + return err + } + err = git.AddChanges(repoPath, false, ".") + if err != nil { + return err + } + err = git.CommitChanges(repoPath, git.CommitChangesOptions{Message: "Annex a file"}) + if err != nil { + return err + } + + return nil +} + +/* Initialize a repo with some baseline annexed and non-annexed files. + +TODO: this could be replaced with a fixture repo; +see integrations/gitea-repositories-meta/ and models/fixtures/repository.yml. + +However we reuse this template for -different- repos. + +TODO: This has to take a testing.T, but only because it reuses a routine + written in the other integration tests which expects it. + It would be cleaner if it didn't have to. +*/ +func doInitRemoteAnnexRepository(t *testing.T, repoURL *url.URL) error { + repoPath, err := os.MkdirTemp("", path.Base(repoURL.Path)) + if err != nil { + return err + } + // This clone is immediately thrown away, which + // helps force the tests to be end-to-end. + defer util.RemoveAll(repoPath) + + doGitClone(repoPath, repoURL)(t) + + err = doInitAnnexRepository(repoPath) + + _, _, err = git.NewCommand(git.DefaultContext, "annex", "sync", "--content").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + return nil +} + +/* given a git repo and a path to an annexed file in it (assumed to be committed to its HEAD), + find the path in .git/annex/objects/ that contains its actual contents. */ +func annexObjectPath(repoPath string, file string) (string, error) { + + // find the path inside *.git/annex/objects of a given file + // i.e. figure out its two-level hash prefix: https://git-annex.branchable.com/internals/hashing/ + // ASSUMES the target file is checked into HEAD + + var bare bool // whether the repo is bare or not; this changes what the hashing algorithm is, due to backwards compatibility + + bareStr, _, err := git.NewCommand(git.DefaultContext, "config", "core.bare").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return "", err + } + + if bareStr == "true\n" { + bare = true + } else if bareStr == "false\n" { + bare = false + } else { + return "", errors.New(fmt.Sprintf("Could not determine if %s is a bare repo or not; git config core.bare = <%s>", repoPath, bareStr)) + } + + // given a repo and a file in it + // TODO: handle other branches, e.g. non-HEAD branches etc + annexKey, _, err := git.NewCommand(git.DefaultContext, "show", "HEAD:"+file).RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return "", err + } + + annexKey = strings.TrimSpace(annexKey) + if !strings.HasPrefix(annexKey, "/annex/objects/") { + return "", errors.New(fmt.Sprintf("%s/%s does not appear to be annexed .", repoPath, file)) + } + annexKey = strings.TrimPrefix(annexKey, "/annex/objects/") + + var keyformat string + if bare { + keyformat = "hashdirlower" + } else { + keyformat = "hashdirmixed" + } + keyHashPrefix, _, err := git.NewCommand(git.DefaultContext, "annex", "examinekey", "--format=${"+keyformat+"}", annexKey).RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return "", err + } + + if !bare { + repoPath = path.Join(repoPath, ".git") + } + return path.Join(repoPath, "annex", "objects", keyHashPrefix, annexKey, annexKey), nil +} + +/* like withKeyFile(), but automatically sets it the account given in ctx for use by git-annex */ +func withAnnexCtxKeyFile(t *testing.T, ctx APITestContext, callback func()) { + _gitAnnexUseGitSSH, gitAnnexUseGitSSHExists := os.LookupEnv("GIT_ANNEX_USE_GIT_SSH") + defer func() { + // reset + if gitAnnexUseGitSSHExists { + os.Setenv("GIT_ANNEX_USE_GIT_SSH", _gitAnnexUseGitSSH) + } + }() + + os.Setenv("GIT_ANNEX_USE_GIT_SSH", "1") // withKeyFile works by setting GIT_SSH_COMMAND, but git-annex only respects that if this is set + + withCtxKeyFile(t, ctx, callback) +} diff --git a/integrations/git_helper_for_declarative_test.go b/integrations/git_helper_for_declarative_test.go index 1ea594b739c8b..f6b5e6f0d11d4 100644 --- a/integrations/git_helper_for_declarative_test.go +++ b/integrations/git_helper_for_declarative_test.go @@ -41,6 +41,28 @@ func withKeyFile(t *testing.T, keyname string, callback func(string)) { "ssh -o \"UserKnownHostsFile=/dev/null\" -o \"StrictHostKeyChecking=no\" -o \"IdentitiesOnly=yes\" -i \""+keyFile+"\" \"$@\""), 0o700) assert.NoError(t, err) + // reset ssh wrapper afterwards + _gitSSH, gitSSHExists := os.LookupEnv("GIT_SSH") + defer func() { + if gitSSHExists { + os.Setenv("GIT_SSH", _gitSSH) + } + }() + + _gitSSHCommand, gitSSHCommandExists := os.LookupEnv("GIT_SSH_COMMAND") + defer func() { + if gitSSHCommandExists { + os.Setenv("GIT_SSH_COMMAND", _gitSSHCommand) + } + }() + + _gitSSHVariant, gitSSHVariantExists := os.LookupEnv("GIT_SSH_VARIANT") + defer func() { + if gitSSHVariantExists { + os.Setenv("GIT_SSH_VARIANT", _gitSSHVariant) + } + }() + // Setup ssh wrapper os.Setenv("GIT_SSH", path.Join(tmpDir, "ssh")) os.Setenv("GIT_SSH_COMMAND", diff --git a/modules/util/filecmp.go b/modules/util/filecmp.go new file mode 100644 index 0000000000000..49a6656d853ea --- /dev/null +++ b/modules/util/filecmp.go @@ -0,0 +1,85 @@ +package util + +import ( + "bytes" + "io" + "os" +) + +// Decide if two files have the same contents or not. +// chunkSize is the size of the blocks to scan by; pass 0 to get a sensible default. +// *Follows* symlinks. +// +// May return an error if something else goes wrong; in this case, you should ignore the value of 'same'. +// +// derived from https://stackoverflow.com/a/30038571 +// under CC-BY-SA-4.0 by several contributors +func FileCmp(file1, file2 string, chunkSize int) (same bool, err error) { + + if chunkSize == 0 { + chunkSize = 4 * 1024 + } + + // shortcuts: check file metadata + stat1, err := os.Stat(file1) + if err != nil { + return false, err + } + + stat2, err := os.Stat(file1) + if err != nil { + return false, err + } + + // are inputs are literally the same file? + if os.SameFile(stat1, stat2) { + return true, nil + } + + // do inputs at least have the same size? + if stat1.Size() != stat2.Size() { + return false, nil + } + + // long way: compare contents + f1, err := os.Open(file1) + if err != nil { + return false, err + } + defer f1.Close() + + f2, err := os.Open(file2) + if err != nil { + return false, err + } + defer f2.Close() + + b1 := make([]byte, chunkSize) + b2 := make([]byte, chunkSize) + for { + n1, err1 := io.ReadFull(f1, b1) + n2, err2 := io.ReadFull(f2, b2) + + // https://pkg.go.dev/io#Reader + // > Callers should always process the n > 0 bytes returned + // > before considering the error err. Doing so correctly + // > handles I/O errors that happen after reading some bytes + // > and also both of the allowed EOF behaviors. + + if !bytes.Equal(b1[:n1], b2[:n2]) { + return false, nil + } + + if (err1 == io.EOF && err2 == io.EOF) || (err1 == io.ErrUnexpectedEOF && err2 == io.ErrUnexpectedEOF) { + return true, nil + } + + // some other error, like a dropped network connection or a bad transfer + if err1 != nil { + return false, err1 + } + if err2 != nil { + return false, err2 + } + } +}