Skip to content

Commit bdfd379

Browse files
committed
Add go wrapper around git diff-tree --raw -r -M
* Implemented calling git diff-tree * Ensures wrapper function is called with valid arguments * Parses output into go struct, using strong typing when possible
1 parent e94f37f commit bdfd379

File tree

3 files changed

+518
-1
lines changed

3 files changed

+518
-1
lines changed

modules/git/tree_entry_mode.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,20 @@
33

44
package git
55

6-
import "strconv"
6+
import (
7+
"fmt"
8+
"strconv"
9+
)
710

811
// EntryMode the type of the object in the git tree
912
type EntryMode int
1013

1114
// There are only a few file modes in Git. They look like unix file modes, but they can only be
1215
// one of these.
1316
const (
17+
// EntryModeNoEntry is possible if the file was added or removed in a commit. In the case of
18+
// added the base commit will not have the file in its tree so a mode of 0o000000 is used.
19+
EntryModeNoEntry EntryMode = 0o000000
1420
// EntryModeBlob
1521
EntryModeBlob EntryMode = 0o100644
1622
// EntryModeExec
@@ -33,3 +39,22 @@ func ToEntryMode(value string) EntryMode {
3339
v, _ := strconv.ParseInt(value, 8, 32)
3440
return EntryMode(v)
3541
}
42+
43+
func ParseEntryMode(mode string) (EntryMode, error) {
44+
switch mode {
45+
case "000000":
46+
return EntryModeNoEntry, nil
47+
case "100644":
48+
return EntryModeBlob, nil
49+
case "100755":
50+
return EntryModeExec, nil
51+
case "120000":
52+
return EntryModeSymlink, nil
53+
case "160000":
54+
return EntryModeCommit, nil
55+
case "040000":
56+
return EntryModeTree, nil
57+
default:
58+
return 0, fmt.Errorf("unparsable entry mode: %s", mode)
59+
}
60+
}

services/gitdiff/git_diff_tree.go

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
// Copyright 2025 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package gitdiff
5+
6+
import (
7+
"bufio"
8+
"context"
9+
"fmt"
10+
"strings"
11+
12+
"code.gitea.io/gitea/modules/git"
13+
"code.gitea.io/gitea/modules/log"
14+
)
15+
16+
type DiffTree struct {
17+
Files []*DiffTreeRecord
18+
}
19+
20+
type DiffTreeRecord struct {
21+
// Status is one of 'added', 'deleted', 'modified', 'renamed', 'copied'
22+
Status string
23+
24+
HeadPath string
25+
BasePath string
26+
HeadMode git.EntryMode
27+
BaseMode git.EntryMode
28+
HeadBlobID string
29+
BaseBlobID string
30+
}
31+
32+
// GetDiffTree returns the list of path of the files that have changed between the two commits
33+
func GetDiffTree(ctx context.Context, gitRepo *git.Repository, baseSha, headSha string) (*DiffTree, error) {
34+
gitDiffTreeRecords, err := runGitDiffTree(ctx, gitRepo, baseSha, headSha)
35+
if err != nil {
36+
return nil, err
37+
}
38+
39+
return &DiffTree{
40+
Files: gitDiffTreeRecords,
41+
}, nil
42+
}
43+
44+
func runGitDiffTree(ctx context.Context, gitRepo *git.Repository, baseSha, headSha string) ([]*DiffTreeRecord, error) {
45+
baseCommitID, headCommitID, err := validateGitDiffTreeArguments(gitRepo, baseSha, headSha)
46+
if err != nil {
47+
return nil, err
48+
}
49+
50+
cmd := git.NewCommand(ctx, "diff-tree", "--raw", "-r", "--find-renames").AddDynamicArguments(baseCommitID, headCommitID)
51+
stdout, _, runErr := cmd.RunStdString(&git.RunOpts{Dir: gitRepo.Path})
52+
if runErr != nil {
53+
log.Warn("git diff-tree: %v", runErr)
54+
return nil, runErr
55+
}
56+
57+
return parseGitDiffTree(stdout)
58+
}
59+
60+
func validateGitDiffTreeArguments(gitRepo *git.Repository, baseSha, headSha string) (string, string, error) {
61+
// if the head is empty its an error
62+
if headSha == "" {
63+
return "", "", fmt.Errorf("headSha is empty")
64+
}
65+
66+
// if the head commit doesn't exist its and error
67+
headCommit, err := gitRepo.GetCommit(headSha)
68+
if err != nil {
69+
return "", "", fmt.Errorf("failed to get commit headSha: %v", err)
70+
}
71+
headCommitID := headCommit.ID.String()
72+
73+
// if the base is empty we should use the parent of the head commit
74+
if baseSha == "" {
75+
// if the headCommit has no parent we should use an empty commit
76+
// this can happen when we are generating a diff against an orphaned commit
77+
if headCommit.ParentCount() == 0 {
78+
objectFormat, err := gitRepo.GetObjectFormat()
79+
if err != nil {
80+
return "", "", err
81+
}
82+
83+
return objectFormat.EmptyTree().String(), headCommitID, nil
84+
}
85+
86+
baseCommit, err := headCommit.Parent(0)
87+
if err != nil {
88+
return "", "", fmt.Errorf("baseSha is '', attempted to use parent of commit %s, got error: %v", headCommit.ID.String(), err)
89+
}
90+
return baseCommit.ID.String(), headCommitID, nil
91+
}
92+
93+
// try and get the base commit
94+
baseCommit, err := gitRepo.GetCommit(baseSha)
95+
// propagate the error if we couldn't get the base commit
96+
if err != nil {
97+
return "", "", fmt.Errorf("failed to get base commit %s: %v", baseSha, err)
98+
}
99+
100+
return baseCommit.ID.String(), headCommit.ID.String(), nil
101+
}
102+
103+
func parseGitDiffTree(output string) ([]*DiffTreeRecord, error) {
104+
/*
105+
The output of `git diff-tree --raw -r --find-renames` is of the form:
106+
107+
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<path>
108+
109+
or for renames:
110+
111+
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<old_path> <new_path>
112+
113+
See: <https://git-scm.com/docs/git-diff-tree#_raw_output_format> for more details
114+
*/
115+
if output == "" {
116+
return []*DiffTreeRecord{}, nil
117+
}
118+
119+
results := make([]*DiffTreeRecord, 0)
120+
121+
lines := bufio.NewScanner(strings.NewReader(output))
122+
for lines.Scan() {
123+
line := lines.Text()
124+
125+
if len(line) == 0 {
126+
continue
127+
}
128+
129+
record, err := parseGitDiffTreeLine(line)
130+
if err != nil {
131+
return nil, err
132+
}
133+
134+
results = append(results, record)
135+
}
136+
137+
if err := lines.Err(); err != nil {
138+
return nil, err
139+
}
140+
141+
return results, nil
142+
}
143+
144+
func parseGitDiffTreeLine(line string) (*DiffTreeRecord, error) {
145+
line = strings.TrimPrefix(line, ":")
146+
fields := strings.Fields(line)
147+
if len(fields) < 6 {
148+
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`)", line)
149+
}
150+
151+
status, err := statusFromLetter(fields[4])
152+
if err != nil {
153+
return nil, err
154+
}
155+
156+
var headPath, basePath string
157+
if status == "renamed" {
158+
if len(fields) != 7 {
159+
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`)", line)
160+
}
161+
basePath = fields[5]
162+
headPath = fields[6]
163+
} else {
164+
basePath = fields[5]
165+
headPath = fields[5]
166+
}
167+
168+
baseMode, err := git.ParseEntryMode(fields[0])
169+
if err != nil {
170+
return nil, err
171+
}
172+
173+
headMode, err := git.ParseEntryMode(fields[1])
174+
if err != nil {
175+
return nil, err
176+
}
177+
178+
return &DiffTreeRecord{
179+
Status: status,
180+
BaseMode: baseMode,
181+
HeadMode: headMode,
182+
BaseBlobID: fields[2],
183+
HeadBlobID: fields[3],
184+
BasePath: basePath,
185+
HeadPath: headPath,
186+
}, nil
187+
}
188+
189+
func statusFromLetter(letter string) (string, error) {
190+
if len(letter) < 1 {
191+
return "", fmt.Errorf("empty status letter")
192+
}
193+
switch letter[0] {
194+
case 'A':
195+
return "added", nil
196+
case 'D':
197+
return "deleted", nil
198+
case 'M':
199+
return "modified", nil
200+
case 'R':
201+
// This is of the form "R<score>" but we are choosing to ignore the score
202+
return "renamed", nil
203+
case 'C':
204+
// This is of the form "C<score>" but we are choosing to ignore the score
205+
return "copied", nil
206+
default:
207+
return "", fmt.Errorf("unknown status letter: '%s'", letter)
208+
}
209+
}

0 commit comments

Comments
 (0)