@@ -86,15 +86,18 @@ def testPrintExotic(self, message_module):
86
86
message .repeated_string .append ('\000 \001 \a \b \f \n \r \t \v \\ \' "' )
87
87
message .repeated_string .append (u'\u00fc \ua71f ' )
88
88
self .CompareToGoldenText (
89
- self .RemoveRedundantZeros (text_format .MessageToString (message )),
89
+ self .RemoveRedundantZeros (
90
+ text_format .MessageToString (message , as_utf8 = True )
91
+ ),
90
92
'repeated_int64: -9223372036854775808\n '
91
93
'repeated_uint64: 18446744073709551615\n '
92
94
'repeated_double: 123.456\n '
93
95
'repeated_double: 1.23e+22\n '
94
96
'repeated_double: 1.23e-18\n '
95
97
'repeated_string:'
96
98
' "\\ 000\\ 001\\ 007\\ 010\\ 014\\ n\\ r\\ t\\ 013\\ \\ \\ \' \\ ""\n '
97
- 'repeated_string: "\\ 303\\ 274\\ 352\\ 234\\ 237"\n ' )
99
+ 'repeated_string: "üꜟ"\n ' ,
100
+ )
98
101
99
102
def testPrintFloatPrecision (self , message_module ):
100
103
message = message_module .TestAllTypes ()
@@ -204,8 +207,8 @@ class UnicodeSub(str):
204
207
message = message_module .TestAllTypes ()
205
208
message .repeated_string .append (UnicodeSub (u'\u00fc \ua71f ' ))
206
209
self .CompareToGoldenText (
207
- text_format .MessageToString (message ),
208
- 'repeated_string: "\\ 303 \\ 274 \\ 352 \\ 234 \\ 237 "\n ' )
210
+ text_format .MessageToString (message , as_utf8 = True ),
211
+ 'repeated_string: "üꜟ "\n ' )
209
212
210
213
def testPrintNestedMessageAsOneLine (self , message_module ):
211
214
message = message_module .TestAllTypes ()
@@ -282,15 +285,15 @@ def testPrintExoticAsOneLine(self, message_module):
282
285
message .repeated_string .append (u'\u00fc \ua71f ' )
283
286
self .CompareToGoldenText (
284
287
self .RemoveRedundantZeros (text_format .MessageToString (
285
- message , as_one_line = True )),
288
+ message , as_one_line = True , as_utf8 = True )),
286
289
'repeated_int64: -9223372036854775808'
287
290
' repeated_uint64: 18446744073709551615'
288
291
' repeated_double: 123.456'
289
292
' repeated_double: 1.23e+22'
290
293
' repeated_double: 1.23e-18'
291
294
' repeated_string: '
292
295
'"\\ 000\\ 001\\ 007\\ 010\\ 014\\ n\\ r\\ t\\ 013\\ \\ \\ \' \\ ""'
293
- ' repeated_string: "\\ 303 \\ 274 \\ 352 \\ 234 \\ 237 "' )
296
+ ' repeated_string: "üꜟ "' )
294
297
295
298
def testRoundTripExoticAsOneLine (self , message_module ):
296
299
message = message_module .TestAllTypes ()
@@ -616,8 +619,8 @@ def testMessageToBytes(self, message_module):
616
619
def testRawUtf8RoundTrip (self , message_module ):
617
620
message = message_module .TestAllTypes ()
618
621
message .repeated_string .append (u'\u00fc \t \ua71f ' )
619
- utf8_text = text_format .MessageToBytes (message , as_utf8 = True )
620
- golden_bytes = b'repeated_string: "\xc3 \xbc \\ t \xea \x9c \x9f "\n '
622
+ utf8_text = text_format .MessageToBytes (message , as_utf8 = False )
623
+ golden_bytes = b'repeated_string: "\\ 303 \\ 274 \\ t \\ 352 \\ 234 \\ 237 "\n '
621
624
self .CompareToGoldenText (utf8_text , golden_bytes )
622
625
parsed_message = message_module .TestAllTypes ()
623
626
text_format .Parse (utf8_text , parsed_message )
@@ -626,10 +629,41 @@ def testRawUtf8RoundTrip(self, message_module):
626
629
(message , parsed_message , message .repeated_string [0 ],
627
630
parsed_message .repeated_string [0 ]))
628
631
632
+ def testRawUtf8RoundTripAsUtf8 (self , message_module ):
633
+ message = message_module .TestAllTypes ()
634
+ message .repeated_string .append (u'\u00fc \t \ua71f ' )
635
+ utf8_text = text_format .MessageToString (message , as_utf8 = True )
636
+ parsed_message = message_module .TestAllTypes ()
637
+ text_format .Parse (utf8_text , parsed_message )
638
+ self .assertEqual (
639
+ message , parsed_message , '\n %s != %s (%s != %s)' %
640
+ (message , parsed_message , message .repeated_string [0 ],
641
+ parsed_message .repeated_string [0 ]))
642
+
643
+ # We can only test this case under proto2, because proto3 will reject invalid
644
+ # UTF-8 in the parser, so there should be no way of creating a string field
645
+ # that contains invalid UTF-8.
646
+ #
647
+ # We also can't test it in pure-Python, which validates all string fields for
648
+ # UTF-8 even when the spec says it shouldn't.
649
+ @unittest .skipIf (api_implementation .Type () == 'python' ,
650
+ 'Python can\' t create invalid UTF-8 strings' )
651
+ def testInvalidUtf8RoundTrip (self , message_module ):
652
+ if message_module is not unittest_pb2 :
653
+ return
654
+ one_bytes = unittest_pb2 .OneBytes ()
655
+ one_bytes .data = b'ABC\xff 123'
656
+ one_string = unittest_pb2 .OneString ()
657
+ one_string .ParseFromString (one_bytes .SerializeToString ())
658
+ self .assertIn (
659
+ 'data: "ABC\\ 377123"' ,
660
+ text_format .MessageToString (one_string , as_utf8 = True ),
661
+ )
662
+
629
663
def testEscapedUtf8ASCIIRoundTrip (self , message_module ):
630
664
message = message_module .TestAllTypes ()
631
665
message .repeated_string .append (u'\u00fc \t \ua71f ' )
632
- ascii_text = text_format .MessageToBytes (message ) # as_utf8=False default
666
+ ascii_text = text_format .MessageToBytes (message , as_utf8 = False )
633
667
golden_bytes = b'repeated_string: "\\ 303\\ 274\\ t\\ 352\\ 234\\ 237"\n '
634
668
self .CompareToGoldenText (ascii_text , golden_bytes )
635
669
parsed_message = message_module .TestAllTypes ()
0 commit comments