Skip to content

Commit da24f58

Browse files
committed
Add go wrapper around git diff-tree --raw -r -M
* Implemented calling git diff-tree * Ensures wrapper function is called with valid arguments * Parses output into go struct, using strong typing when possible
1 parent e94f37f commit da24f58

File tree

4 files changed

+566
-14
lines changed

4 files changed

+566
-14
lines changed

modules/git/parse.go

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,19 +46,9 @@ func parseLsTreeLine(line []byte) (*LsTreeEntry, error) {
4646
entry.Size = optional.Some(size)
4747
}
4848

49-
switch string(entryMode) {
50-
case "100644":
51-
entry.EntryMode = EntryModeBlob
52-
case "100755":
53-
entry.EntryMode = EntryModeExec
54-
case "120000":
55-
entry.EntryMode = EntryModeSymlink
56-
case "160000":
57-
entry.EntryMode = EntryModeCommit
58-
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
59-
entry.EntryMode = EntryModeTree
60-
default:
61-
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
49+
entry.EntryMode, err = ParseEntryMode(string(entryMode))
50+
if err != nil || entry.EntryMode == EntryModeNoEntry {
51+
return nil, fmt.Errorf("invalid ls-tree output (invalid mode): %q, err: %w", line, err)
6252
}
6353

6454
entry.ID, err = NewIDFromString(string(entryObjectID))

modules/git/tree_entry_mode.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,20 @@
33

44
package git
55

6-
import "strconv"
6+
import (
7+
"fmt"
8+
"strconv"
9+
)
710

811
// EntryMode the type of the object in the git tree
912
type EntryMode int
1013

1114
// There are only a few file modes in Git. They look like unix file modes, but they can only be
1215
// one of these.
1316
const (
17+
// EntryModeNoEntry is possible if the file was added or removed in a commit. In the case of
18+
// added the base commit will not have the file in its tree so a mode of 0o000000 is used.
19+
EntryModeNoEntry EntryMode = 0o000000
1420
// EntryModeBlob
1521
EntryModeBlob EntryMode = 0o100644
1622
// EntryModeExec
@@ -33,3 +39,22 @@ func ToEntryMode(value string) EntryMode {
3339
v, _ := strconv.ParseInt(value, 8, 32)
3440
return EntryMode(v)
3541
}
42+
43+
func ParseEntryMode(mode string) (EntryMode, error) {
44+
switch mode {
45+
case "000000":
46+
return EntryModeNoEntry, nil
47+
case "100644":
48+
return EntryModeBlob, nil
49+
case "100755":
50+
return EntryModeExec, nil
51+
case "120000":
52+
return EntryModeSymlink, nil
53+
case "160000":
54+
return EntryModeCommit, nil
55+
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
56+
return EntryModeTree, nil
57+
default:
58+
return 0, fmt.Errorf("unparsable entry mode: %s", mode)
59+
}
60+
}

services/gitdiff/git_diff_tree.go

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
// Copyright 2025 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package gitdiff
5+
6+
import (
7+
"bufio"
8+
"context"
9+
"fmt"
10+
"strings"
11+
12+
"code.gitea.io/gitea/modules/git"
13+
"code.gitea.io/gitea/modules/log"
14+
)
15+
16+
type DiffTree struct {
17+
Files []*DiffTreeRecord
18+
}
19+
20+
type DiffTreeRecord struct {
21+
// Status is one of 'added', 'deleted', 'modified', 'renamed', 'copied'
22+
Status string
23+
24+
HeadPath string
25+
BasePath string
26+
HeadMode git.EntryMode
27+
BaseMode git.EntryMode
28+
HeadBlobID string
29+
BaseBlobID string
30+
}
31+
32+
// GetDiffTree returns the list of path of the files that have changed between the two commits
33+
func GetDiffTree(ctx context.Context, gitRepo *git.Repository, baseSha, headSha string) (*DiffTree, error) {
34+
gitDiffTreeRecords, err := runGitDiffTree(ctx, gitRepo, baseSha, headSha)
35+
if err != nil {
36+
return nil, err
37+
}
38+
39+
return &DiffTree{
40+
Files: gitDiffTreeRecords,
41+
}, nil
42+
}
43+
44+
func runGitDiffTree(ctx context.Context, gitRepo *git.Repository, baseSha, headSha string) ([]*DiffTreeRecord, error) {
45+
baseCommitID, headCommitID, err := validateGitDiffTreeArguments(gitRepo, baseSha, headSha)
46+
if err != nil {
47+
return nil, err
48+
}
49+
50+
cmd := git.NewCommand(ctx, "diff-tree", "--raw", "-r", "--find-renames").AddDynamicArguments(baseCommitID, headCommitID)
51+
stdout, _, runErr := cmd.RunStdString(&git.RunOpts{Dir: gitRepo.Path})
52+
if runErr != nil {
53+
log.Warn("git diff-tree: %v", runErr)
54+
return nil, runErr
55+
}
56+
57+
return parseGitDiffTree(stdout)
58+
}
59+
60+
func validateGitDiffTreeArguments(gitRepo *git.Repository, baseSha, headSha string) (string, string, error) {
61+
// if the head is empty its an error
62+
if headSha == "" {
63+
return "", "", fmt.Errorf("headSha is empty")
64+
}
65+
66+
// if the head commit doesn't exist its and error
67+
headCommit, err := gitRepo.GetCommit(headSha)
68+
if err != nil {
69+
return "", "", fmt.Errorf("failed to get commit headSha: %v", err)
70+
}
71+
headCommitID := headCommit.ID.String()
72+
73+
// if the base is empty we should use the parent of the head commit
74+
if baseSha == "" {
75+
// if the headCommit has no parent we should use an empty commit
76+
// this can happen when we are generating a diff against an orphaned commit
77+
if headCommit.ParentCount() == 0 {
78+
objectFormat, err := gitRepo.GetObjectFormat()
79+
if err != nil {
80+
return "", "", err
81+
}
82+
83+
return objectFormat.EmptyTree().String(), headCommitID, nil
84+
}
85+
86+
baseCommit, err := headCommit.Parent(0)
87+
if err != nil {
88+
return "", "", fmt.Errorf("baseSha is '', attempted to use parent of commit %s, got error: %v", headCommit.ID.String(), err)
89+
}
90+
return baseCommit.ID.String(), headCommitID, nil
91+
}
92+
93+
// try and get the base commit
94+
baseCommit, err := gitRepo.GetCommit(baseSha)
95+
// propagate the error if we couldn't get the base commit
96+
if err != nil {
97+
return "", "", fmt.Errorf("failed to get base commit %s: %v", baseSha, err)
98+
}
99+
100+
return baseCommit.ID.String(), headCommit.ID.String(), nil
101+
}
102+
103+
func parseGitDiffTree(output string) ([]*DiffTreeRecord, error) {
104+
/*
105+
The output of `git diff-tree --raw -r --find-renames` is of the form:
106+
107+
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<path>
108+
109+
or for renames:
110+
111+
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<old_path>\t<new_path>
112+
113+
See: <https://git-scm.com/docs/git-diff-tree#_raw_output_format> for more details
114+
*/
115+
if output == "" {
116+
return []*DiffTreeRecord{}, nil
117+
}
118+
119+
results := make([]*DiffTreeRecord, 0)
120+
121+
lines := bufio.NewScanner(strings.NewReader(output))
122+
for lines.Scan() {
123+
line := lines.Text()
124+
125+
if len(line) == 0 {
126+
continue
127+
}
128+
129+
record, err := parseGitDiffTreeLine(line)
130+
if err != nil {
131+
return nil, err
132+
}
133+
134+
results = append(results, record)
135+
}
136+
137+
if err := lines.Err(); err != nil {
138+
return nil, err
139+
}
140+
141+
return results, nil
142+
}
143+
144+
func parseGitDiffTreeLine(line string) (*DiffTreeRecord, error) {
145+
line = strings.TrimPrefix(line, ":")
146+
splitSections := strings.SplitN(line, "\t", 2)
147+
if len(splitSections) < 2 {
148+
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`)", line)
149+
}
150+
151+
fields := strings.Fields(splitSections[0])
152+
if len(fields) < 5 {
153+
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`, expected 5 space delimited values got %d)", line, len(fields))
154+
}
155+
156+
baseMode, err := git.ParseEntryMode(fields[0])
157+
if err != nil {
158+
return nil, err
159+
}
160+
161+
headMode, err := git.ParseEntryMode(fields[1])
162+
if err != nil {
163+
return nil, err
164+
}
165+
166+
baseBlobID := fields[2]
167+
headBlobID := fields[3]
168+
169+
status, err := statusFromLetter(fields[4])
170+
if err != nil {
171+
return nil, err
172+
}
173+
174+
filePaths := strings.Split(splitSections[1], "\t")
175+
176+
var headPath, basePath string
177+
if status == "renamed" {
178+
if len(filePaths) != 2 {
179+
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`, expected 2 paths found %d", line, len(filePaths))
180+
}
181+
basePath = filePaths[0]
182+
headPath = filePaths[1]
183+
} else {
184+
basePath = filePaths[0]
185+
headPath = filePaths[0]
186+
}
187+
188+
return &DiffTreeRecord{
189+
Status: status,
190+
BaseMode: baseMode,
191+
HeadMode: headMode,
192+
BaseBlobID: baseBlobID,
193+
HeadBlobID: headBlobID,
194+
BasePath: basePath,
195+
HeadPath: headPath,
196+
}, nil
197+
}
198+
199+
func statusFromLetter(letter string) (string, error) {
200+
if len(letter) < 1 {
201+
return "", fmt.Errorf("empty status letter")
202+
}
203+
switch letter[0] {
204+
case 'A':
205+
return "added", nil
206+
case 'D':
207+
return "deleted", nil
208+
case 'M':
209+
return "modified", nil
210+
case 'R':
211+
// This is of the form "R<score>" but we are choosing to ignore the score
212+
return "renamed", nil
213+
case 'C':
214+
// This is of the form "C<score>" but we are choosing to ignore the score
215+
return "copied", nil
216+
default:
217+
return "", fmt.Errorf("unknown status letter: '%s'", letter)
218+
}
219+
}

0 commit comments

Comments
 (0)