Skip to content

Commit a6613d9

Browse files
committed
Add go wrapper around git diff-tree --raw -r -M
* Implemented calling git diff-tree * Ensures wrapper function is called with valid arguments * Parses output into go struct, using strong typing when possible
1 parent a025fa7 commit a6613d9

File tree

4 files changed

+705
-14
lines changed

4 files changed

+705
-14
lines changed

modules/git/parse.go

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,19 +46,9 @@ func parseLsTreeLine(line []byte) (*LsTreeEntry, error) {
4646
entry.Size = optional.Some(size)
4747
}
4848

49-
switch string(entryMode) {
50-
case "100644":
51-
entry.EntryMode = EntryModeBlob
52-
case "100755":
53-
entry.EntryMode = EntryModeExec
54-
case "120000":
55-
entry.EntryMode = EntryModeSymlink
56-
case "160000":
57-
entry.EntryMode = EntryModeCommit
58-
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
59-
entry.EntryMode = EntryModeTree
60-
default:
61-
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
49+
entry.EntryMode, err = ParseEntryMode(string(entryMode))
50+
if err != nil || entry.EntryMode == EntryModeNoEntry {
51+
return nil, fmt.Errorf("invalid ls-tree output (invalid mode): %q, err: %w", line, err)
6252
}
6353

6454
entry.ID, err = NewIDFromString(string(entryObjectID))

modules/git/tree_entry_mode.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,20 @@
33

44
package git
55

6-
import "strconv"
6+
import (
7+
"fmt"
8+
"strconv"
9+
)
710

811
// EntryMode the type of the object in the git tree
912
type EntryMode int
1013

1114
// There are only a few file modes in Git. They look like unix file modes, but they can only be
1215
// one of these.
1316
const (
17+
// EntryModeNoEntry is possible if the file was added or removed in a commit. In the case of
18+
// added the base commit will not have the file in its tree so a mode of 0o000000 is used.
19+
EntryModeNoEntry EntryMode = 0o000000
1420
// EntryModeBlob
1521
EntryModeBlob EntryMode = 0o100644
1622
// EntryModeExec
@@ -33,3 +39,22 @@ func ToEntryMode(value string) EntryMode {
3339
v, _ := strconv.ParseInt(value, 8, 32)
3440
return EntryMode(v)
3541
}
42+
43+
func ParseEntryMode(mode string) (EntryMode, error) {
44+
switch mode {
45+
case "000000":
46+
return EntryModeNoEntry, nil
47+
case "100644":
48+
return EntryModeBlob, nil
49+
case "100755":
50+
return EntryModeExec, nil
51+
case "120000":
52+
return EntryModeSymlink, nil
53+
case "160000":
54+
return EntryModeCommit, nil
55+
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
56+
return EntryModeTree, nil
57+
default:
58+
return 0, fmt.Errorf("unparsable entry mode: %s", mode)
59+
}
60+
}

services/gitdiff/git_diff_tree.go

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
// Copyright 2025 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package gitdiff
5+
6+
import (
7+
"bufio"
8+
"context"
9+
"fmt"
10+
"io"
11+
"strconv"
12+
"strings"
13+
14+
"code.gitea.io/gitea/modules/git"
15+
"code.gitea.io/gitea/modules/log"
16+
)
17+
18+
type DiffTree struct {
19+
Files []*DiffTreeRecord
20+
}
21+
22+
type DiffTreeRecord struct {
23+
// Status is one of 'added', 'deleted', 'modified', 'renamed', 'copied', 'typechanged', 'unmerged', 'unknown'
24+
Status string
25+
26+
// For renames and copies, the percentage of similarity between the source and target of the move/rename.
27+
Score uint8
28+
29+
HeadPath string
30+
BasePath string
31+
HeadMode git.EntryMode
32+
BaseMode git.EntryMode
33+
HeadBlobID string
34+
BaseBlobID string
35+
}
36+
37+
// GetDiffTree returns the list of path of the files that have changed between the two commits.
38+
// If useMergeBase is true, the diff will be calculated using the merge base of the two commits.
39+
// This is the same behavior as using a three-dot diff in git diff.
40+
func GetDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (*DiffTree, error) {
41+
gitDiffTreeRecords, err := runGitDiffTree(ctx, gitRepo, useMergeBase, baseSha, headSha)
42+
if err != nil {
43+
return nil, err
44+
}
45+
46+
return &DiffTree{
47+
Files: gitDiffTreeRecords,
48+
}, nil
49+
}
50+
51+
func runGitDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) ([]*DiffTreeRecord, error) {
52+
useMergeBase, baseCommitID, headCommitID, err := validateGitDiffTreeArguments(gitRepo, useMergeBase, baseSha, headSha)
53+
if err != nil {
54+
return nil, err
55+
}
56+
57+
cmd := git.NewCommand(ctx, "diff-tree", "--raw", "-r", "--find-renames", "--root")
58+
if useMergeBase {
59+
cmd.AddArguments("--merge-base")
60+
}
61+
cmd.AddDynamicArguments(baseCommitID, headCommitID)
62+
stdout, _, runErr := cmd.RunStdString(&git.RunOpts{Dir: gitRepo.Path})
63+
if runErr != nil {
64+
log.Warn("git diff-tree: %v", runErr)
65+
return nil, runErr
66+
}
67+
68+
return parseGitDiffTree(strings.NewReader(stdout))
69+
}
70+
71+
func validateGitDiffTreeArguments(gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (shouldUseMergeBase bool, resolvedBaseSha, resolvedHeadSha string, err error) {
72+
// if the head is empty its an error
73+
if headSha == "" {
74+
return false, "", "", fmt.Errorf("headSha is empty")
75+
}
76+
77+
// if the head commit doesn't exist its and error
78+
headCommit, err := gitRepo.GetCommit(headSha)
79+
if err != nil {
80+
return false, "", "", fmt.Errorf("failed to get commit headSha: %v", err)
81+
}
82+
headCommitID := headCommit.ID.String()
83+
84+
// if the base is empty we should use the parent of the head commit
85+
if baseSha == "" {
86+
// if the headCommit has no parent we should use an empty commit
87+
// this can happen when we are generating a diff against an orphaned commit
88+
if headCommit.ParentCount() == 0 {
89+
objectFormat, err := gitRepo.GetObjectFormat()
90+
if err != nil {
91+
return false, "", "", err
92+
}
93+
94+
// We set use merge base to false because we have no base commit
95+
return false, objectFormat.EmptyTree().String(), headCommitID, nil
96+
}
97+
98+
baseCommit, err := headCommit.Parent(0)
99+
if err != nil {
100+
return false, "", "", fmt.Errorf("baseSha is '', attempted to use parent of commit %s, got error: %v", headCommit.ID.String(), err)
101+
}
102+
return useMergeBase, baseCommit.ID.String(), headCommitID, nil
103+
}
104+
105+
// try and get the base commit
106+
baseCommit, err := gitRepo.GetCommit(baseSha)
107+
// propagate the error if we couldn't get the base commit
108+
if err != nil {
109+
return useMergeBase, "", "", fmt.Errorf("failed to get base commit %s: %v", baseSha, err)
110+
}
111+
112+
return useMergeBase, baseCommit.ID.String(), headCommit.ID.String(), nil
113+
}
114+
115+
func parseGitDiffTree(gitOutput io.Reader) ([]*DiffTreeRecord, error) {
116+
/*
117+
The output of `git diff-tree --raw -r --find-renames` is of the form:
118+
119+
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<path>
120+
121+
or for renames:
122+
123+
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<old_path>\t<new_path>
124+
125+
See: <https://git-scm.com/docs/git-diff-tree#_raw_output_format> for more details
126+
*/
127+
results := make([]*DiffTreeRecord, 0)
128+
129+
lines := bufio.NewScanner(gitOutput)
130+
for lines.Scan() {
131+
line := lines.Text()
132+
133+
if len(line) == 0 {
134+
continue
135+
}
136+
137+
record, err := parseGitDiffTreeLine(line)
138+
if err != nil {
139+
return nil, err
140+
}
141+
142+
results = append(results, record)
143+
}
144+
145+
if err := lines.Err(); err != nil {
146+
return nil, err
147+
}
148+
149+
return results, nil
150+
}
151+
152+
func parseGitDiffTreeLine(line string) (*DiffTreeRecord, error) {
153+
line = strings.TrimPrefix(line, ":")
154+
splitSections := strings.SplitN(line, "\t", 2)
155+
if len(splitSections) < 2 {
156+
return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`)", line)
157+
}
158+
159+
fields := strings.Fields(splitSections[0])
160+
if len(fields) < 5 {
161+
return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`, expected 5 space delimited values got %d)", line, len(fields))
162+
}
163+
164+
baseMode, err := git.ParseEntryMode(fields[0])
165+
if err != nil {
166+
return nil, err
167+
}
168+
169+
headMode, err := git.ParseEntryMode(fields[1])
170+
if err != nil {
171+
return nil, err
172+
}
173+
174+
baseBlobID := fields[2]
175+
headBlobID := fields[3]
176+
177+
status, score, err := statusFromLetter(fields[4])
178+
if err != nil {
179+
return nil, fmt.Errorf("unparsable output for diff-tree --raw: %s, error: %s", line, err)
180+
}
181+
182+
filePaths := strings.Split(splitSections[1], "\t")
183+
184+
var headPath, basePath string
185+
if status == "renamed" {
186+
if len(filePaths) != 2 {
187+
return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`, expected 2 paths found %d", line, len(filePaths))
188+
}
189+
basePath = filePaths[0]
190+
headPath = filePaths[1]
191+
} else {
192+
basePath = filePaths[0]
193+
headPath = filePaths[0]
194+
}
195+
196+
return &DiffTreeRecord{
197+
Status: status,
198+
Score: score,
199+
BaseMode: baseMode,
200+
HeadMode: headMode,
201+
BaseBlobID: baseBlobID,
202+
HeadBlobID: headBlobID,
203+
BasePath: basePath,
204+
HeadPath: headPath,
205+
}, nil
206+
}
207+
208+
func statusFromLetter(rawStatus string) (status string, score uint8, err error) {
209+
if len(rawStatus) < 1 {
210+
return "", 0, fmt.Errorf("empty status letter")
211+
}
212+
switch rawStatus[0] {
213+
case 'A':
214+
return "added", 0, nil
215+
case 'D':
216+
return "deleted", 0, nil
217+
case 'M':
218+
return "modified", 0, nil
219+
case 'R':
220+
score, err = tryParseStatusScore(rawStatus)
221+
return "renamed", score, err
222+
case 'C':
223+
score, err = tryParseStatusScore(rawStatus)
224+
return "copied", score, err
225+
case 'T':
226+
return "typechanged", 0, nil
227+
case 'U':
228+
return "unmerged", 0, nil
229+
case 'X':
230+
return "unknown", 0, nil
231+
default:
232+
return "", 0, fmt.Errorf("unknown status letter: '%s'", rawStatus)
233+
}
234+
}
235+
236+
func tryParseStatusScore(rawStatus string) (uint8, error) {
237+
if len(rawStatus) < 2 {
238+
return 0, fmt.Errorf("status score missing")
239+
}
240+
241+
score, err := strconv.ParseUint(rawStatus[1:], 10, 8)
242+
if err != nil {
243+
return 0, fmt.Errorf("failed to parse status score: %w", err)
244+
} else if score > 100 {
245+
return 0, fmt.Errorf("status score out of range: %d", score)
246+
}
247+
248+
return uint8(score), nil
249+
}

0 commit comments

Comments
 (0)