From 043c7982b2855366214b3849da066334d18f3ac3 Mon Sep 17 00:00:00 2001 From: Javier Campanini Date: Mon, 19 Jul 2021 11:26:28 -0400 Subject: [PATCH 1/6] add support for emojis in subject line --- gitdiff/patch_header.go | 32 +++++++++++++++++++++++++++++++- gitdiff/patch_header_test.go | 20 ++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index 16e3d3e..156da1f 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -2,6 +2,7 @@ package gitdiff import ( "bufio" + "encoding/hex" "errors" "fmt" "io" @@ -457,5 +458,34 @@ func parseSubject(s string) (string, string) { break } - return s[:at], s[at:] + return s[:at], decodeUTF8Subject(s[at:]) +} + +func decodeUTF8Subject(encoded string) string { + if !strings.HasPrefix(encoded, "=?UTF-8?q?") { + // not UTF-8 encoded + return encoded + } + + payload := strings.TrimPrefix(encoded, "=?UTF-8?q?") + payload = strings.TrimSuffix(payload, "?=") + + at := 0 + subject := "" + + for at < len(payload) { + if payload[at] == '=' { + // detected a hex value + hexx := payload[at+1 : at+3] + hexbytes, _ := hex.DecodeString(hexx) + subject += string(hexbytes) + at += 3 + + } else { + subject += string(payload[at]) + at++ + } + } + + return subject } diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 7dc7f13..eb30f96 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -138,6 +138,7 @@ func TestParsePatchHeader(t *testing.T) { } expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) expectedTitle := "A sample commit to test header parsing" + expectedEmojiOneLineTitle := "🤖 Enabling auto-merging" expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." expectedBodyAppendix := "CC: Joe Smith " @@ -267,6 +268,25 @@ Another body line. Body: expectedBody, }, }, + "mailboxEmojiOneLine": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging?= + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedEmojiOneLineTitle, + Body: expectedBody, + }, + }, "mailboxAppendix": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny From 8ab13c82b8373b71c960dadb183c48971006d1e4 Mon Sep 17 00:00:00 2001 From: Javier Campanini Date: Mon, 19 Jul 2021 11:46:22 -0400 Subject: [PATCH 2/6] added multiline support --- gitdiff/patch_header.go | 25 +++++++++++++++++-------- gitdiff/patch_header_test.go | 21 +++++++++++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index 156da1f..85f16d3 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -467,25 +467,34 @@ func decodeUTF8Subject(encoded string) string { return encoded } - payload := strings.TrimPrefix(encoded, "=?UTF-8?q?") - payload = strings.TrimSuffix(payload, "?=") + // If the subject is too long, `git format-patch` maty produce a subject line across + // multiple lines. This will lead to the next line starting with + // Adding the space to the beginning of `encoded` ensures the replace catches all + // instances, including the first line. + payload := strings.ReplaceAll(" "+encoded, " =?UTF-8?q?", "") + payload = strings.ReplaceAll(payload, "?=", "") at := 0 - subject := "" + var subject []byte for at < len(payload) { if payload[at] == '=' { // detected a hex value - hexx := payload[at+1 : at+3] - hexbytes, _ := hex.DecodeString(hexx) - subject += string(hexbytes) + hexString := payload[at+1 : at+3] + hexByte, err := hex.DecodeString(hexString) + if err != nil { + // if err, abort decoding and return original subject + return encoded + } + + subject = append(subject, hexByte...) at += 3 } else { - subject += string(payload[at]) + subject = append(subject, payload[at]) at++ } } - return subject + return string(subject) } diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index eb30f96..e7408af 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -139,6 +139,7 @@ func TestParsePatchHeader(t *testing.T) { expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) expectedTitle := "A sample commit to test header parsing" expectedEmojiOneLineTitle := "🤖 Enabling auto-merging" + expectedEmojiMultiLineTitle := "🤖 Enabling auto-merging of certain PRs" expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." expectedBodyAppendix := "CC: Joe Smith " @@ -287,6 +288,26 @@ Another body line. Body: expectedBody, }, }, + "mailboxEmojiMultiLine": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging=20of=20c?= + =?UTF-8?q?ertain=20PRs?= + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedEmojiMultiLineTitle, + Body: expectedBody, + }, + }, "mailboxAppendix": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny From a5d9fc010369fa9d9cbf97e43e9d64c6f70d2a90 Mon Sep 17 00:00:00 2001 From: Javier Campanini Date: Mon, 19 Jul 2021 11:58:42 -0400 Subject: [PATCH 3/6] improve comments --- gitdiff/patch_header.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index 85f16d3..3dca959 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -461,16 +461,17 @@ func parseSubject(s string) (string, string) { return s[:at], decodeUTF8Subject(s[at:]) } +// Decodes a subject line if encoded using quoted-printable UTF-8 encoding. See for reference: +// https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject func decodeUTF8Subject(encoded string) string { if !strings.HasPrefix(encoded, "=?UTF-8?q?") { // not UTF-8 encoded return encoded } - // If the subject is too long, `git format-patch` maty produce a subject line across - // multiple lines. This will lead to the next line starting with - // Adding the space to the beginning of `encoded` ensures the replace catches all - // instances, including the first line. + // If the subject is too long, `git format-patch` may produce a subject line across + // multiple lines. When parsed, this can look like the following: + // payload := strings.ReplaceAll(" "+encoded, " =?UTF-8?q?", "") payload = strings.ReplaceAll(payload, "?=", "") @@ -479,7 +480,7 @@ func decodeUTF8Subject(encoded string) string { for at < len(payload) { if payload[at] == '=' { - // detected a hex value + // detected a hex value that needs decoding hexString := payload[at+1 : at+3] hexByte, err := hex.DecodeString(hexString) if err != nil { From 304a6dc5b58e974b475f16172449efd5b8674803 Mon Sep 17 00:00:00 2001 From: Javier Campanini Date: Mon, 19 Jul 2021 13:23:11 -0400 Subject: [PATCH 4/6] use quotedprintable stdlib package --- gitdiff/patch_header.go | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index 3dca959..618ca87 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -2,10 +2,10 @@ package gitdiff import ( "bufio" - "encoding/hex" "errors" "fmt" "io" + "mime/quotedprintable" "net/mail" "strconv" "strings" @@ -461,8 +461,9 @@ func parseSubject(s string) (string, string) { return s[:at], decodeUTF8Subject(s[at:]) } -// Decodes a subject line if encoded using quoted-printable UTF-8 encoding. See for reference: -// https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject +// Decodes a subject line if encoded using quoted-printable UTF-8 Q encoding. This format is the +// result of a `git format-patch` when the commit title has an emoji (or other non-ASCII character). +// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject func decodeUTF8Subject(encoded string) string { if !strings.HasPrefix(encoded, "=?UTF-8?q?") { // not UTF-8 encoded @@ -472,30 +473,15 @@ func decodeUTF8Subject(encoded string) string { // If the subject is too long, `git format-patch` may produce a subject line across // multiple lines. When parsed, this can look like the following: // - payload := strings.ReplaceAll(" "+encoded, " =?UTF-8?q?", "") + payload := " " + encoded + payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") payload = strings.ReplaceAll(payload, "?=", "") - at := 0 - var subject []byte - - for at < len(payload) { - if payload[at] == '=' { - // detected a hex value that needs decoding - hexString := payload[at+1 : at+3] - hexByte, err := hex.DecodeString(hexString) - if err != nil { - // if err, abort decoding and return original subject - return encoded - } - - subject = append(subject, hexByte...) - at += 3 - - } else { - subject = append(subject, payload[at]) - at++ - } + decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) + if err != nil { + // if err, abort decoding and return original subject + return encoded } - return string(subject) + return string(decoded) } From 77e5d9fb716aa150421ffe41b8e0ba273ccbdca8 Mon Sep 17 00:00:00 2001 From: Javier Campanini Date: Mon, 19 Jul 2021 13:50:27 -0400 Subject: [PATCH 5/6] swap to use example from git docs --- gitdiff/patch_header_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index e7408af..bda91fe 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -139,7 +139,7 @@ func TestParsePatchHeader(t *testing.T) { expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) expectedTitle := "A sample commit to test header parsing" expectedEmojiOneLineTitle := "🤖 Enabling auto-merging" - expectedEmojiMultiLineTitle := "🤖 Enabling auto-merging of certain PRs" + expectedEmojiMultiLineTitle := "[IA64] Put ia64 config files on the Uwe Kleine-König diet" expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." expectedBodyAppendix := "CC: Joe Smith " @@ -292,8 +292,8 @@ Another body line. Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny Date: Sat, 11 Apr 2020 15:21:23 -0700 -Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging=20of=20c?= - =?UTF-8?q?ertain=20PRs?= +Subject: [PATCH] =?UTF-8?q?[IA64]=20Put=20ia64=20config=20files=20on=20the=20?= + =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig=20diet?= The medium format shows the body, which may wrap on to multiple lines. From 2797d239170b9ee838c1a3c3c0a5ea2892cd2bef Mon Sep 17 00:00:00 2001 From: Javier Campanini Date: Mon, 19 Jul 2021 14:43:42 -0400 Subject: [PATCH 6/6] fixup method name and comment --- gitdiff/patch_header.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index 618ca87..c3c387d 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -458,13 +458,13 @@ func parseSubject(s string) (string, string) { break } - return s[:at], decodeUTF8Subject(s[at:]) + return s[:at], decodeSubject(s[at:]) } -// Decodes a subject line if encoded using quoted-printable UTF-8 Q encoding. This format is the -// result of a `git format-patch` when the commit title has an emoji (or other non-ASCII character). +// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result +// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji). // See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject -func decodeUTF8Subject(encoded string) string { +func decodeSubject(encoded string) string { if !strings.HasPrefix(encoded, "=?UTF-8?q?") { // not UTF-8 encoded return encoded