Skip to content

Commit 243c8c0

Browse files
dsnetgopherbot
authored andcommitted
encoding: require unique alphabet for base32 and base64
In order for decoding to faithfully reproduce the encoded input, the symbols must be unique (i.e., provide a bijective mapping). Thus, reject duplicate symbols in NewEncoding. As a minor optimization, modify WithPadding to use the decodeMap to quickly check whether the padding character is used in O(1) instead of O(32) or O(64). Change-Id: I5631f6ff9335c35d59d020dc0e307e3520786fbc Reviewed-on: https://go-review.googlesource.com/c/go/+/520335 Reviewed-by: Dmitri Shuralyov <[email protected]> Auto-Submit: Joseph Tsai <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]> Run-TryBot: Joseph Tsai <[email protected]>
1 parent 9070f27 commit 243c8c0

File tree

2 files changed

+44
-32
lines changed

2 files changed

+44
-32
lines changed

src/encoding/base32/base32.go

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ import (
2020
// introduced for SASL GSSAPI and standardized in RFC 4648.
2121
// The alternate "base32hex" encoding is used in DNSSEC.
2222
type Encoding struct {
23-
encode [32]byte
24-
decodeMap [256]byte
23+
encode [32]byte // mapping of symbol index to symbol byte value
24+
decodeMap [256]uint8 // mapping of symbol byte value to symbol index
2525
padChar rune
2626
}
2727

@@ -45,14 +45,19 @@ const (
4545
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
4646
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
4747
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
48+
invalidIndex = '\xff'
4849
)
4950

5051
const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
5152
const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
5253

53-
// NewEncoding returns a new Encoding defined by the given alphabet,
54-
// which must be a 32-byte string. The alphabet is treated as sequence
55-
// of byte values without any special treatment for multi-byte UTF-8.
54+
// NewEncoding returns a new padded Encoding defined by the given alphabet,
55+
// which must be a 32-byte string that contains unique byte values and
56+
// does not contain the padding character or CR / LF ('\r', '\n').
57+
// The alphabet is treated as a sequence of byte values
58+
// without any special treatment for multi-byte UTF-8.
59+
// The resulting Encoding uses the default padding character ('='),
60+
// which may be changed or disabled via WithPadding.
5661
func NewEncoding(encoder string) *Encoding {
5762
if len(encoder) != 32 {
5863
panic("encoding alphabet is not 32-bytes long")
@@ -64,7 +69,16 @@ func NewEncoding(encoder string) *Encoding {
6469
copy(e.decodeMap[:], decodeMapInitialize)
6570

6671
for i := 0; i < len(encoder); i++ {
67-
e.decodeMap[encoder[i]] = byte(i)
72+
// Note: While we document that the alphabet cannot contain
73+
// the padding character, we do not enforce it since we do not know
74+
// if the caller intends to switch the padding from StdPadding later.
75+
switch {
76+
case encoder[i] == '\n' || encoder[i] == '\r':
77+
panic("encoding alphabet contains newline character")
78+
case e.decodeMap[encoder[i]] != invalidIndex:
79+
panic("encoding alphabet includes duplicate symbols")
80+
}
81+
e.decodeMap[encoder[i]] = uint8(i)
6882
}
6983
return e
7084
}
@@ -85,16 +99,12 @@ var HexEncoding = NewEncoding(encodeHex)
8599
// Padding characters above '\x7f' are encoded as their exact byte value
86100
// rather than using the UTF-8 representation of the codepoint.
87101
func (enc Encoding) WithPadding(padding rune) *Encoding {
88-
if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
102+
switch {
103+
case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
89104
panic("invalid padding")
105+
case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
106+
panic("padding contained in alphabet")
90107
}
91-
92-
for i := 0; i < len(enc.encode); i++ {
93-
if rune(enc.encode[i]) == padding {
94-
panic("padding contained in alphabet")
95-
}
96-
}
97-
98108
enc.padChar = padding
99109
return &enc
100110
}

src/encoding/base64/base64.go

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ import (
2222
// (RFC 1421). RFC 4648 also defines an alternate encoding, which is
2323
// the standard encoding with - and _ substituted for + and /.
2424
type Encoding struct {
25-
encode [64]byte
26-
decodeMap [256]byte
25+
encode [64]byte // mapping of symbol index to symbol byte value
26+
decodeMap [256]uint8 // mapping of symbol byte value to symbol index
2727
padChar rune
2828
strict bool
2929
}
@@ -48,34 +48,40 @@ const (
4848
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
4949
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
5050
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
51+
invalidIndex = '\xff'
5152
)
5253

5354
const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
5455
const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
5556

5657
// NewEncoding returns a new padded Encoding defined by the given alphabet,
57-
// which must be a 64-byte string that does not contain the padding character
58-
// or CR / LF ('\r', '\n'). The alphabet is treated as sequence of byte values
58+
// which must be a 64-byte string that contains unique byte values and
59+
// does not contain the padding character or CR / LF ('\r', '\n').
60+
// The alphabet is treated as a sequence of byte values
5961
// without any special treatment for multi-byte UTF-8.
6062
// The resulting Encoding uses the default padding character ('='),
6163
// which may be changed or disabled via WithPadding.
6264
func NewEncoding(encoder string) *Encoding {
6365
if len(encoder) != 64 {
6466
panic("encoding alphabet is not 64-bytes long")
6567
}
66-
for i := 0; i < len(encoder); i++ {
67-
if encoder[i] == '\n' || encoder[i] == '\r' {
68-
panic("encoding alphabet contains newline character")
69-
}
70-
}
7168

7269
e := new(Encoding)
7370
e.padChar = StdPadding
7471
copy(e.encode[:], encoder)
7572
copy(e.decodeMap[:], decodeMapInitialize)
7673

7774
for i := 0; i < len(encoder); i++ {
78-
e.decodeMap[encoder[i]] = byte(i)
75+
// Note: While we document that the alphabet cannot contain
76+
// the padding character, we do not enforce it since we do not know
77+
// if the caller intends to switch the padding from StdPadding later.
78+
switch {
79+
case encoder[i] == '\n' || encoder[i] == '\r':
80+
panic("encoding alphabet contains newline character")
81+
case e.decodeMap[encoder[i]] != invalidIndex:
82+
panic("encoding alphabet includes duplicate symbols")
83+
}
84+
e.decodeMap[encoder[i]] = uint8(i)
7985
}
8086
return e
8187
}
@@ -88,16 +94,12 @@ func NewEncoding(encoder string) *Encoding {
8894
// Padding characters above '\x7f' are encoded as their exact byte value
8995
// rather than using the UTF-8 representation of the codepoint.
9096
func (enc Encoding) WithPadding(padding rune) *Encoding {
91-
if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
97+
switch {
98+
case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
9299
panic("invalid padding")
100+
case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
101+
panic("padding contained in alphabet")
93102
}
94-
95-
for i := 0; i < len(enc.encode); i++ {
96-
if rune(enc.encode[i]) == padding {
97-
panic("padding contained in alphabet")
98-
}
99-
}
100-
101103
enc.padChar = padding
102104
return &enc
103105
}

0 commit comments

Comments
 (0)