Skip to content

Commit bd869f6

Browse files
moygitster
authored andcommitted
send-email: add and use a local copy of Mail::Address
We used to have two versions of the email parsing code. Our parse_mailboxes (in Git.pm), and Mail::Address which we used if installed. Unfortunately, both versions have different sets of bugs, and changing the behavior of git depending on whether Mail::Address is installed was a bad idea. A first attempt to solve this was cc90750 (send-email: don't use Mail::Address, even if available, 2017-08-23), but it turns out our parse_mailboxes is too buggy for some uses. For example the lack of nested comments support breaks get_maintainer.pl in the Linux kernel tree: https://public-inbox.org/git/[email protected]/ This patch goes the other way: use Mail::Address anyway, but have a local copy from CPAN as a fallback, when the system one is not available. The duplicated script is small (276 lines of code) and stable in time. Maintaining the local copy should not be an issue, and will certainly be less burden than maintaining our own parse_mailboxes. Another option would be to consider Mail::Address as a hard dependency, but it's easy enough to save the trouble of extra-dependency to the end user or packager. Signed-off-by: Matthieu Moy <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 1eaabe3 commit bd869f6

File tree

3 files changed

+302
-1
lines changed

3 files changed

+302
-1
lines changed

git-send-email.perl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
use Cwd qw(abs_path cwd);
3131
use Git;
3232
use Git::I18N;
33+
use Git::Mail::Address;
3334

3435
Getopt::Long::Configure qw/ pass_through /;
3536

@@ -489,7 +490,7 @@ sub read_config {
489490
($repocommitter) = Git::ident_person(@repo, 'committer');
490491

491492
sub parse_address_line {
492-
return Git::parse_mailboxes($_[0]);
493+
return map { $_->format } Mail::Address->parse($_[0]);
493494
}
494495

495496
sub split_addrs {

perl/Git/FromCPAN/Mail/Address.pm

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
# Copyrights 1995-2017 by [Mark Overmeer <[email protected]>].
2+
# For other contributors see ChangeLog.
3+
# See the manual pages for details on the licensing terms.
4+
# Pod stripped from pm file by OODoc 2.02.
5+
package Mail::Address;
6+
use vars '$VERSION';
7+
$VERSION = '2.19';
8+
9+
use strict;
10+
11+
use Carp;
12+
13+
# use locale; removed in version 1.78, because it causes taint problems
14+
15+
sub Version { our $VERSION }
16+
17+
18+
19+
# given a comment, attempt to extract a person's name
20+
sub _extract_name
21+
{ # This function can be called as method as well
22+
my $self = @_ && ref $_[0] ? shift : undef;
23+
24+
local $_ = shift
25+
or return '';
26+
27+
# Using encodings, too hard. See Mail::Message::Field::Full.
28+
return '' if m/\=\?.*?\?\=/;
29+
30+
# trim whitespace
31+
s/^\s+//;
32+
s/\s+$//;
33+
s/\s+/ /;
34+
35+
# Disregard numeric names (e.g. [email protected])
36+
return "" if /^[\d ]+$/;
37+
38+
s/^\((.*)\)$/$1/; # remove outermost parenthesis
39+
s/^"(.*)"$/$1/; # remove outer quotation marks
40+
s/\(.*?\)//g; # remove minimal embedded comments
41+
s/\\//g; # remove all escapes
42+
s/^"(.*)"$/$1/; # remove internal quotation marks
43+
s/^([^\s]+) ?, ?(.*)$/$2 $1/; # reverse "Last, First M." if applicable
44+
s/,.*//;
45+
46+
# Change casing only when the name contains only upper or only
47+
# lower cased characters.
48+
unless( m/[A-Z]/ && m/[a-z]/ )
49+
{ # Set the case of the name to first char upper rest lower
50+
s/\b(\w+)/\L\u$1/igo; # Upcase first letter on name
51+
s/\bMc(\w)/Mc\u$1/igo; # Scottish names such as 'McLeod'
52+
s/\bo'(\w)/O'\u$1/igo; # Irish names such as 'O'Malley, O'Reilly'
53+
s/\b(x*(ix)?v*(iv)?i*)\b/\U$1/igo; # Roman numerals, eg 'Level III Support'
54+
}
55+
56+
# some cleanup
57+
s/\[[^\]]*\]//g;
58+
s/(^[\s'"]+|[\s'"]+$)//g;
59+
s/\s{2,}/ /g;
60+
61+
$_;
62+
}
63+
64+
sub _tokenise
65+
{ local $_ = join ',', @_;
66+
my (@words,$snippet,$field);
67+
68+
s/\A\s+//;
69+
s/[\r\n]+/ /g;
70+
71+
while ($_ ne '')
72+
{ $field = '';
73+
if(s/^\s*\(/(/ ) # (...)
74+
{ my $depth = 0;
75+
76+
PAREN: while(s/^(\(([^\(\)\\]|\\.)*)//)
77+
{ $field .= $1;
78+
$depth++;
79+
while(s/^(([^\(\)\\]|\\.)*\)\s*)//)
80+
{ $field .= $1;
81+
last PAREN unless --$depth;
82+
$field .= $1 if s/^(([^\(\)\\]|\\.)+)//;
83+
}
84+
}
85+
86+
carp "Unmatched () '$field' '$_'"
87+
if $depth;
88+
89+
$field =~ s/\s+\Z//;
90+
push @words, $field;
91+
92+
next;
93+
}
94+
95+
if( s/^("(?:[^"\\]+|\\.)*")\s*// # "..."
96+
|| s/^(\[(?:[^\]\\]+|\\.)*\])\s*// # [...]
97+
|| s/^([^\s()<>\@,;:\\".[\]]+)\s*//
98+
|| s/^([()<>\@,;:\\".[\]])\s*//
99+
)
100+
{ push @words, $1;
101+
next;
102+
}
103+
104+
croak "Unrecognised line: $_";
105+
}
106+
107+
push @words, ",";
108+
\@words;
109+
}
110+
111+
sub _find_next
112+
{ my ($idx, $tokens, $len) = @_;
113+
114+
while($idx < $len)
115+
{ my $c = $tokens->[$idx];
116+
return $c if $c eq ',' || $c eq ';' || $c eq '<';
117+
$idx++;
118+
}
119+
120+
"";
121+
}
122+
123+
sub _complete
124+
{ my ($class, $phrase, $address, $comment) = @_;
125+
126+
@$phrase || @$comment || @$address
127+
or return undef;
128+
129+
my $o = $class->new(join(" ",@$phrase), join("",@$address), join(" ",@$comment));
130+
@$phrase = @$address = @$comment = ();
131+
$o;
132+
}
133+
134+
#------------
135+
136+
sub new(@)
137+
{ my $class = shift;
138+
bless [@_], $class;
139+
}
140+
141+
142+
sub parse(@)
143+
{ my $class = shift;
144+
my @line = grep {defined} @_;
145+
my $line = join '', @line;
146+
147+
my (@phrase, @comment, @address, @objs);
148+
my ($depth, $idx) = (0, 0);
149+
150+
my $tokens = _tokenise @line;
151+
my $len = @$tokens;
152+
my $next = _find_next $idx, $tokens, $len;
153+
154+
local $_;
155+
for(my $idx = 0; $idx < $len; $idx++)
156+
{ $_ = $tokens->[$idx];
157+
158+
if(substr($_,0,1) eq '(') { push @comment, $_ }
159+
elsif($_ eq '<') { $depth++ }
160+
elsif($_ eq '>') { $depth-- if $depth }
161+
elsif($_ eq ',' || $_ eq ';')
162+
{ warn "Unmatched '<>' in $line" if $depth;
163+
my $o = $class->_complete(\@phrase, \@address, \@comment);
164+
push @objs, $o if defined $o;
165+
$depth = 0;
166+
$next = _find_next $idx+1, $tokens, $len;
167+
}
168+
elsif($depth) { push @address, $_ }
169+
elsif($next eq '<') { push @phrase, $_ }
170+
elsif( /^[.\@:;]$/ || !@address || $address[-1] =~ /^[.\@:;]$/ )
171+
{ push @address, $_ }
172+
else
173+
{ warn "Unmatched '<>' in $line" if $depth;
174+
my $o = $class->_complete(\@phrase, \@address, \@comment);
175+
push @objs, $o if defined $o;
176+
$depth = 0;
177+
push @address, $_;
178+
}
179+
}
180+
@objs;
181+
}
182+
183+
#------------
184+
185+
sub phrase { shift->set_or_get(0, @_) }
186+
sub address { shift->set_or_get(1, @_) }
187+
sub comment { shift->set_or_get(2, @_) }
188+
189+
sub set_or_get($)
190+
{ my ($self, $i) = (shift, shift);
191+
@_ or return $self->[$i];
192+
193+
my $val = $self->[$i];
194+
$self->[$i] = shift if @_;
195+
$val;
196+
}
197+
198+
199+
my $atext = '[\-\w !#$%&\'*+/=?^`{|}~]';
200+
sub format
201+
{ my @addrs;
202+
203+
foreach (@_)
204+
{ my ($phrase, $email, $comment) = @$_;
205+
my @addr;
206+
207+
if(defined $phrase && length $phrase)
208+
{ push @addr
209+
, $phrase =~ /^(?:\s*$atext\s*)+$/o ? $phrase
210+
: $phrase =~ /(?<!\\)"/ ? $phrase
211+
: qq("$phrase");
212+
213+
push @addr, "<$email>"
214+
if defined $email && length $email;
215+
}
216+
elsif(defined $email && length $email)
217+
{ push @addr, $email;
218+
}
219+
220+
if(defined $comment && $comment =~ /\S/)
221+
{ $comment =~ s/^\s*\(?/(/;
222+
$comment =~ s/\)?\s*$/)/;
223+
}
224+
225+
push @addr, $comment
226+
if defined $comment && length $comment;
227+
228+
push @addrs, join(" ", @addr)
229+
if @addr;
230+
}
231+
232+
join ", ", @addrs;
233+
}
234+
235+
#------------
236+
237+
sub name
238+
{ my $self = shift;
239+
my $phrase = $self->phrase;
240+
my $addr = $self->address;
241+
242+
$phrase = $self->comment
243+
unless defined $phrase && length $phrase;
244+
245+
my $name = $self->_extract_name($phrase);
246+
247+
# first.last@domain address
248+
if($name eq '' && $addr =~ /([^\%\.\@_]+([\._][^\%\.\@_]+)+)[\@\%]/)
249+
{ ($name = $1) =~ s/[\._]+/ /g;
250+
$name = _extract_name $name;
251+
}
252+
253+
if($name eq '' && $addr =~ m#/g=#i) # X400 style address
254+
{ my ($f) = $addr =~ m#g=([^/]*)#i;
255+
my ($l) = $addr =~ m#s=([^/]*)#i;
256+
$name = _extract_name "$f $l";
257+
}
258+
259+
length $name ? $name : undef;
260+
}
261+
262+
263+
sub host
264+
{ my $addr = shift->address || '';
265+
my $i = rindex $addr, '@';
266+
$i >= 0 ? substr($addr, $i+1) : undef;
267+
}
268+
269+
270+
sub user
271+
{ my $addr = shift->address || '';
272+
my $i = rindex $addr, '@';
273+
$i >= 0 ? substr($addr,0,$i) : $addr;
274+
}
275+
276+
1;

perl/Git/Mail/Address.pm

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package Git::Mail::Address;
2+
use 5.008;
3+
use strict;
4+
use warnings;
5+
6+
=head1 NAME
7+
8+
Git::Mail::Address - Wrapper for the L<Mail::Address> module, in case it's not installed
9+
10+
=head1 DESCRIPTION
11+
12+
This module is only intended to be used for code shipping in the
13+
C<git.git> repository. Use it for anything else at your peril!
14+
15+
=cut
16+
17+
eval {
18+
require Mail::Address;
19+
1;
20+
} or do {
21+
require Git::FromCPAN::Mail::Address;
22+
};
23+
24+
1;

0 commit comments

Comments
 (0)