@@ -508,7 +508,15 @@ func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
508
508
if ! ok {
509
509
return m , ProtocolError ("malformed MIME header line: " + string (kv ))
510
510
}
511
- key := canonicalMIMEHeaderKey (k )
511
+ key , ok := canonicalMIMEHeaderKey (k )
512
+ if ! ok {
513
+ return m , ProtocolError ("malformed MIME header line: " + string (kv ))
514
+ }
515
+ for _ , c := range v {
516
+ if ! validHeaderValueByte (c ) {
517
+ return m , ProtocolError ("malformed MIME header line: " + string (kv ))
518
+ }
519
+ }
512
520
513
521
// As per RFC 7230 field-name is a token, tokens consist of one or more chars.
514
522
// We could return a ProtocolError here, but better to be liberal in what we
@@ -585,10 +593,12 @@ func CanonicalMIMEHeaderKey(s string) string {
585
593
return s
586
594
}
587
595
if upper && 'a' <= c && c <= 'z' {
588
- return canonicalMIMEHeaderKey ([]byte (s ))
596
+ s , _ = canonicalMIMEHeaderKey ([]byte (s ))
597
+ return s
589
598
}
590
599
if ! upper && 'A' <= c && c <= 'Z' {
591
- return canonicalMIMEHeaderKey ([]byte (s ))
600
+ s , _ = canonicalMIMEHeaderKey ([]byte (s ))
601
+ return s
592
602
}
593
603
upper = c == '-'
594
604
}
@@ -597,16 +607,66 @@ func CanonicalMIMEHeaderKey(s string) string {
597
607
598
608
const toLower = 'a' - 'A'
599
609
600
- // validHeaderFieldByte reports whether b is a valid byte in a header
610
+ // validHeaderFieldByte reports whether c is a valid byte in a header
601
611
// field name. RFC 7230 says:
602
612
//
603
613
// header-field = field-name ":" OWS field-value OWS
604
614
// field-name = token
605
615
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
606
616
// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
607
617
// token = 1*tchar
608
- func validHeaderFieldByte (b byte ) bool {
609
- return int (b ) < len (isTokenTable ) && isTokenTable [b ]
618
+ func validHeaderFieldByte (c byte ) bool {
619
+ // mask is a 128-bit bitmap with 1s for allowed bytes,
620
+ // so that the byte c can be tested with a shift and an and.
621
+ // If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
622
+ // and this function will return false.
623
+ const mask = 0 |
624
+ (1 << (10 )- 1 )<< '0' |
625
+ (1 << (26 )- 1 )<< 'a' |
626
+ (1 << (26 )- 1 )<< 'A' |
627
+ 1 << '!' |
628
+ 1 << '#' |
629
+ 1 << '$' |
630
+ 1 << '%' |
631
+ 1 << '&' |
632
+ 1 << '\'' |
633
+ 1 << '*' |
634
+ 1 << '+' |
635
+ 1 << '-' |
636
+ 1 << '.' |
637
+ 1 << '^' |
638
+ 1 << '_' |
639
+ 1 << '`' |
640
+ 1 << '|' |
641
+ 1 << '~'
642
+ return ((uint64 (1 )<< c )& (mask & (1 << 64 - 1 )) |
643
+ (uint64 (1 )<< (c - 64 ))& (mask >> 64 )) != 0
644
+ }
645
+
646
+ // validHeaderValueByte reports whether c is a valid byte in a header
647
+ // field value. RFC 7230 says:
648
+ //
649
+ // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
650
+ // field-vchar = VCHAR / obs-text
651
+ // obs-text = %x80-FF
652
+ //
653
+ // RFC 5234 says:
654
+ //
655
+ // HTAB = %x09
656
+ // SP = %x20
657
+ // VCHAR = %x21-7E
658
+ func validHeaderValueByte (c byte ) bool {
659
+ // mask is a 128-bit bitmap with 1s for allowed bytes,
660
+ // so that the byte c can be tested with a shift and an and.
661
+ // If c >= 128, then 1<<c and 1<<(c-64) will both be zero.
662
+ // Since this is the obs-text range, we invert the mask to
663
+ // create a bitmap with 1s for disallowed bytes.
664
+ const mask = 0 |
665
+ (1 << (0x7f - 0x21 )- 1 )<< 0x21 | // VCHAR: %x21-7E
666
+ 1 << 0x20 | // SP: %x20
667
+ 1 << 0x09 // HTAB: %x09
668
+ return ((uint64 (1 )<< c )&^(mask & (1 << 64 - 1 )) |
669
+ (uint64 (1 )<< (c - 64 ))&^(mask >> 64 )) == 0
610
670
}
611
671
612
672
// canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
@@ -615,14 +675,29 @@ func validHeaderFieldByte(b byte) bool {
615
675
//
616
676
// For invalid inputs (if a contains spaces or non-token bytes), a
617
677
// is unchanged and a string copy is returned.
618
- func canonicalMIMEHeaderKey (a []byte ) string {
678
+ //
679
+ // ok is true if the header key contains only valid characters and spaces.
680
+ // ReadMIMEHeader accepts header keys containing spaces, but does not
681
+ // canonicalize them.
682
+ func canonicalMIMEHeaderKey (a []byte ) (_ string , ok bool ) {
619
683
// See if a looks like a header key. If not, return it unchanged.
684
+ noCanon := false
620
685
for _ , c := range a {
621
686
if validHeaderFieldByte (c ) {
622
687
continue
623
688
}
624
689
// Don't canonicalize.
625
- return string (a )
690
+ if c == ' ' {
691
+ // We accept invalid headers with a space before the
692
+ // colon, but must not canonicalize them.
693
+ // See https://go.dev/issue/34540.
694
+ noCanon = true
695
+ continue
696
+ }
697
+ return string (a ), false
698
+ }
699
+ if noCanon {
700
+ return string (a ), true
626
701
}
627
702
628
703
upper := true
@@ -644,9 +719,9 @@ func canonicalMIMEHeaderKey(a []byte) string {
644
719
// case, so a copy of a's bytes into a new string does not
645
720
// happen in this map lookup:
646
721
if v := commonHeader [string (a )]; v != "" {
647
- return v
722
+ return v , true
648
723
}
649
- return string (a )
724
+ return string (a ), true
650
725
}
651
726
652
727
// commonHeader interns common header strings.
@@ -700,85 +775,3 @@ func initCommonHeader() {
700
775
commonHeader [v ] = v
701
776
}
702
777
}
703
-
704
- // isTokenTable is a copy of net/http/lex.go's isTokenTable.
705
- // See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators
706
- var isTokenTable = [127 ]bool {
707
- '!' : true ,
708
- '#' : true ,
709
- '$' : true ,
710
- '%' : true ,
711
- '&' : true ,
712
- '\'' : true ,
713
- '*' : true ,
714
- '+' : true ,
715
- '-' : true ,
716
- '.' : true ,
717
- '0' : true ,
718
- '1' : true ,
719
- '2' : true ,
720
- '3' : true ,
721
- '4' : true ,
722
- '5' : true ,
723
- '6' : true ,
724
- '7' : true ,
725
- '8' : true ,
726
- '9' : true ,
727
- 'A' : true ,
728
- 'B' : true ,
729
- 'C' : true ,
730
- 'D' : true ,
731
- 'E' : true ,
732
- 'F' : true ,
733
- 'G' : true ,
734
- 'H' : true ,
735
- 'I' : true ,
736
- 'J' : true ,
737
- 'K' : true ,
738
- 'L' : true ,
739
- 'M' : true ,
740
- 'N' : true ,
741
- 'O' : true ,
742
- 'P' : true ,
743
- 'Q' : true ,
744
- 'R' : true ,
745
- 'S' : true ,
746
- 'T' : true ,
747
- 'U' : true ,
748
- 'W' : true ,
749
- 'V' : true ,
750
- 'X' : true ,
751
- 'Y' : true ,
752
- 'Z' : true ,
753
- '^' : true ,
754
- '_' : true ,
755
- '`' : true ,
756
- 'a' : true ,
757
- 'b' : true ,
758
- 'c' : true ,
759
- 'd' : true ,
760
- 'e' : true ,
761
- 'f' : true ,
762
- 'g' : true ,
763
- 'h' : true ,
764
- 'i' : true ,
765
- 'j' : true ,
766
- 'k' : true ,
767
- 'l' : true ,
768
- 'm' : true ,
769
- 'n' : true ,
770
- 'o' : true ,
771
- 'p' : true ,
772
- 'q' : true ,
773
- 'r' : true ,
774
- 's' : true ,
775
- 't' : true ,
776
- 'u' : true ,
777
- 'v' : true ,
778
- 'w' : true ,
779
- 'x' : true ,
780
- 'y' : true ,
781
- 'z' : true ,
782
- '|' : true ,
783
- '~' : true ,
784
- }
0 commit comments