@@ -20,6 +20,7 @@ import (
2020 "errors"
2121 "sort"
2222 "strings"
23+ "unicode/utf16"
2324 "unicode/utf8"
2425
2526 "github.com/tidwall/gjson"
@@ -78,8 +79,9 @@ func NewEventJSONsFromEvents(he []*Event) EventJSONs {
7879// CanonicalJSON re-encodes the JSON in a canonical encoding. The encoding is
7980// the shortest possible encoding using integer values with sorted object keys.
8081// At present this function performs:
81- // * shortest encoding, sorted lexicographically by UTF-8 codepoint:
82- // https://matrix.org/docs/spec/appendices#canonical-json
82+ // - shortest encoding, sorted lexicographically by UTF-8 codepoint:
83+ // https://matrix.org/docs/spec/appendices#canonical-json
84+ //
8385// Returns a gomatrixserverlib.BadJSONError if JSON validation fails.
8486func CanonicalJSON (input []byte ) ([]byte , error ) {
8587 if ! gjson .Valid (string (input )) {
@@ -91,10 +93,11 @@ func CanonicalJSON(input []byte) ([]byte, error) {
9193
9294// Returns a gomatrixserverlib.BadJSONError if the canonical JSON fails enforced
9395// checks or if JSON validation fails. At present this function performs:
94- // * integer bounds checking for room version 6 and above:
95- // https://matrix.org/docs/spec/rooms/v6#canonical-json
96- // * shortest encoding, sorted lexicographically by UTF-8 codepoint:
97- // https://matrix.org/docs/spec/appendices#canonical-json
96+ // - integer bounds checking for room version 6 and above:
97+ // https://matrix.org/docs/spec/rooms/v6#canonical-json
98+ // - shortest encoding, sorted lexicographically by UTF-8 codepoint:
99+ // https://matrix.org/docs/spec/appendices#canonical-json
100+ //
98101// Returns a gomatrixserverlib.BadJSONError if JSON validation fails.
99102func EnforcedCanonicalJSON (input []byte , roomVersion RoomVersion ) ([]byte , error ) {
100103 if enforce , err := roomVersion .EnforceCanonicalJSON (); err == nil && enforce {
@@ -296,6 +299,11 @@ func CompactJSON(input, output []byte) []byte {
296299// compactUnicodeEscape unpacks a 4 byte unicode escape starting at index.
297300// Returns the output slice and a new input index.
298301func compactUnicodeEscape (input , output []byte , index int ) ([]byte , int ) {
302+ appendUTF8 := func (c rune ) {
303+ var buffer [4 ]byte
304+ n := utf8 .EncodeRune (buffer [:], c )
305+ output = append (output , buffer [:n ]... )
306+ }
299307 const (
300308 ESCAPES = "uuuuuuuubtnufruuuuuuuuuuuuuuuuuu"
301309 HEX = "0123456789ABCDEF"
@@ -317,17 +325,26 @@ func compactUnicodeEscape(input, output []byte, index int) ([]byte, int) {
317325 } else if c == '\\' || c == '"' {
318326 // Otherwise the character only needs escaping if it is a QUOTE '"' or BACKSLASH '\\'.
319327 output = append (output , '\\' , byte (c ))
328+ } else if utf16 .IsSurrogate (c ) {
329+ if input [index ] != '\\' && input [index + 1 ] != 'u' {
330+ return output , index
331+ }
332+ index += 2 // skip the \u"
333+ if len (input )- index < 4 {
334+ return output , index
335+ }
336+ c2 := readHexDigits (input [index : index + 4 ])
337+ index += 4
338+ appendUTF8 (utf16 .DecodeRune (c , c2 ))
320339 } else {
321- var buffer [4 ]byte
322- n := utf8 .EncodeRune (buffer [:], rune (c ))
323- output = append (output , buffer [:n ]... )
340+ appendUTF8 (c )
324341 }
325342 return output , index
326343}
327344
328345// Read 4 hex digits from the input slice.
329346// Taken from https://github.com/NegativeMjark/indolentjson-rust/blob/8b959791fe2656a88f189c5d60d153be05fe3deb/src/readhex.rs#L21
330- func readHexDigits (input []byte ) uint32 {
347+ func readHexDigits (input []byte ) rune {
331348 hex := binary .BigEndian .Uint32 (input )
332349 // subtract '0'
333350 hex -= 0x30303030
@@ -341,7 +358,7 @@ func readHexDigits(input []byte) uint32 {
341358 hex |= hex >> 4
342359 hex &= 0xFF00FF
343360 hex |= hex >> 8
344- return hex & 0xFFFF
361+ return rune ( hex & 0xFFFF )
345362}
346363
347364// RawJSONFromResult extracts the raw JSON bytes pointed to by result.
0 commit comments