From bf196ee4bce3faa2bdfa069b0b4d41c514066dc6 Mon Sep 17 00:00:00 2001 From: George Dunlap Date: Fri, 11 Sep 2020 23:16:44 +0100 Subject: [PATCH] ParsePatchHeader: Copy functionality of git mailinfo's cleanup_subject Primarily to get rid of [PATCH] at the front, but while we're here just be generally compatible with `git am`: * Remove `re` and variations * Remove whitespace * Remove anything in brackets But only at the very beginning of the subject. Store anything removed in this way in PatchHeader.SubjectPrefix. Inspired by https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject() Signed-off-by: George Dunlap --- gitdiff/patch_header.go | 68 +++++++++++++++++++++++++++++++++--- gitdiff/patch_header_test.go | 50 +++++++++++++++++++++++++- 2 files changed, 112 insertions(+), 6 deletions(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index 6d3ef05..01148f5 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -39,6 +39,11 @@ type PatchHeader struct { // patch. Empty if no message is included in the header. Title string Body string + + // If the preamble looks like an email, ParsePatchHeader will + // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the + // Title and place them here. + SubjectPrefix string } // Message returns the commit message for the header. The message consists of @@ -160,10 +165,14 @@ func ParsePatchDate(s string) (time.Time, error) { // formats used by git diff, git log, and git show and the UNIX mailbox format // used by git format-patch. // -// ParsePatchHeader makes no assumptions about the format of the patch title or -// message other than trimming whitespace and condensing blank lines. In -// particular, it does not remove the extra content that git format-patch adds -// to make emailed patches friendlier, like subject prefixes or commit stats. +// If ParsePatchHeader detect that it is handling an email, it will +// remove extra content at the beginning of the title line, such as +// `[PATCH]` or `Re:` in the same way that `git mailinfo` does. +// SubjectPrefix will be set to the value of this removed string. +// (`git mailinfo` is the core part of `git am` that pulls information +// out of an individual mail.) Unline `git mailinfo`, +// ParsePatchHeader does not at the moment remove commit states or +// other extraneous matter after a `---` line. func ParsePatchHeader(s string) (*PatchHeader, error) { r := bufio.NewReader(strings.NewReader(s)) @@ -359,7 +368,8 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { h.AuthorDate = d } - h.Title = msg.Header.Get("Subject") + subject := msg.Header.Get("Subject") + h.SubjectPrefix, h.Title = parseSubject(subject) s := bufio.NewScanner(msg.Body) h.Body = scanMessageBody(s, "") @@ -369,3 +379,51 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { return h, nil } + +// Takes an email subject and returns the patch prefix and commit +// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH +// v3 3/5] ` and `Implement foo` +func parseSubject(s string) (string, string) { + // This is meant to be compatible with + // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject(). + // If compatibility with `git am` drifts, go there to see if there + // are any updates. + + at := 0 + for at < len(s) { + switch s[at] { + case 'r', 'R': + // Detect re:, Re:, rE: and RE: + if at+2 < len(s) && + (s[at+1] == 'e' || s[at+1] == 'E') && + s[at+2] == ':' { + at += 3 + continue + } + + case ' ', '\t', ':': + // Delete whitespace and duplicate ':' characters + at++ + continue + + case '[': + // Look for closing parenthesis + j := at + 1 + for ; j < len(s); j++ { + if s[j] == ']' { + break + } + } + + if j < len(s) { + at = j + 1 + continue + } + } + + // Only loop if we actually removed something + break + } + + return s[:at], s[at:] +} diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 77c541d..37f28fd 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -236,7 +236,7 @@ Another body line. SHA: expectedSHA, Author: expectedIdentity, AuthorDate: expectedDate, - Title: "[PATCH] " + expectedTitle, + Title: expectedTitle, Body: expectedBody, }, }, @@ -348,3 +348,51 @@ func assertPatchIdentity(t *testing.T, kind string, exp, act *PatchIdentity) { t.Errorf("incorrect parsed %s, expected %+v, bot got %+v", kind, exp, act) } } + +func TestCleanupSubject(t *testing.T) { + exp := "A sample commit to test header parsing" + tests := map[string]string{ + "plain": "", + "patch": "[PATCH] ", + "patchv5": "[PATCH v5] ", + "patchrfc": "[PATCH RFC] ", + "patchnospace": "[PATCH]", + "space": " ", + "re": "re: ", + "Re": "Re: ", + "RE": "rE: ", + "rere": "re: re: ", + } + + for name, prefix := range tests { + gotprefix, gottitle := parseSubject(prefix + exp) + if gottitle != exp { + t.Errorf("%s: Incorrect parsing of prefix %s: got title %s, wanted %s", + name, prefix, gottitle, exp) + } + if gotprefix != prefix { + t.Errorf("%s: Incorrect parsing of prefix %s: got prefix %s", + name, prefix, gotprefix) + } + } + + moretests := map[string]struct { + in, eprefix, etitle string + }{ + "Reimplement": {"Reimplement something", "", "Reimplement something"}, + "patch-reimplement": {"[PATCH v5] Reimplement something", "[PATCH v5] ", "Reimplement something"}, + "Openbracket": {"[Just to annoy people", "", "[Just to annoy people"}, + } + + for name, test := range moretests { + prefix, title := parseSubject(test.in) + if title != test.etitle { + t.Errorf("%s: Incorrect parsing of %s: got title %s, wanted %s", + name, test.in, title, test.etitle) + } + if prefix != test.eprefix { + t.Errorf("%s: Incorrect parsing of %s: got prefix %s, wanted %s", + name, test.in, title, test.etitle) + } + } +}