@@ -772,7 +772,8 @@ translate_into_utf8(const char* str, const char* enc) {
772772
773773
774774static char *
775- translate_newlines (const char * s , int exec_input , struct tok_state * tok ) {
775+ translate_newlines (const char * s , int exec_input , int preserve_crlf ,
776+ struct tok_state * tok ) {
776777 int skip_next_lf = 0 ;
777778 size_t needed_length = strlen (s ) + 2 , final_length ;
778779 char * buf , * current ;
@@ -792,7 +793,7 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
792793 break ;
793794 }
794795 }
795- if (c == '\r' ) {
796+ if (! preserve_crlf && c == '\r' ) {
796797 skip_next_lf = 1 ;
797798 c = '\n' ;
798799 }
@@ -822,14 +823,14 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
822823 inside TOK. */
823824
824825static char *
825- decode_str (const char * input , int single , struct tok_state * tok )
826+ decode_str (const char * input , int single , struct tok_state * tok , int preserve_crlf )
826827{
827828 PyObject * utf8 = NULL ;
828829 char * str ;
829830 const char * s ;
830831 const char * newl [2 ] = {NULL , NULL };
831832 int lineno = 0 ;
832- tok -> input = str = translate_newlines (input , single , tok );
833+ tok -> input = str = translate_newlines (input , single , preserve_crlf , tok );
833834 if (str == NULL )
834835 return NULL ;
835836 tok -> enc = NULL ;
@@ -881,14 +882,14 @@ decode_str(const char *input, int single, struct tok_state *tok)
881882/* Set up tokenizer for string */
882883
883884struct tok_state *
884- _PyTokenizer_FromString (const char * str , int exec_input )
885+ _PyTokenizer_FromString (const char * str , int exec_input , int preserve_crlf )
885886{
886887 struct tok_state * tok = tok_new ();
887888 char * decoded ;
888889
889890 if (tok == NULL )
890891 return NULL ;
891- decoded = decode_str (str , exec_input , tok );
892+ decoded = decode_str (str , exec_input , tok , preserve_crlf );
892893 if (decoded == NULL ) {
893894 _PyTokenizer_Free (tok );
894895 return NULL ;
@@ -902,13 +903,13 @@ _PyTokenizer_FromString(const char *str, int exec_input)
902903/* Set up tokenizer for UTF-8 string */
903904
904905struct tok_state *
905- _PyTokenizer_FromUTF8 (const char * str , int exec_input )
906+ _PyTokenizer_FromUTF8 (const char * str , int exec_input , int preserve_crlf )
906907{
907908 struct tok_state * tok = tok_new ();
908909 char * translated ;
909910 if (tok == NULL )
910911 return NULL ;
911- tok -> input = translated = translate_newlines (str , exec_input , tok );
912+ tok -> input = translated = translate_newlines (str , exec_input , preserve_crlf , tok );
912913 if (translated == NULL ) {
913914 _PyTokenizer_Free (tok );
914915 return NULL ;
@@ -1050,7 +1051,7 @@ tok_underflow_interactive(struct tok_state *tok) {
10501051 }
10511052 char * newtok = PyOS_Readline (tok -> fp ? tok -> fp : stdin , stdout , tok -> prompt );
10521053 if (newtok != NULL ) {
1053- char * translated = translate_newlines (newtok , 0 , tok );
1054+ char * translated = translate_newlines (newtok , 0 , 0 , tok );
10541055 PyMem_Free (newtok );
10551056 if (translated == NULL ) {
10561057 return 0 ;
@@ -1594,6 +1595,9 @@ tok_decimal_tail(struct tok_state *tok)
15941595static inline int
15951596tok_continuation_line (struct tok_state * tok ) {
15961597 int c = tok_nextc (tok );
1598+ if (c == '\r' ) {
1599+ c = tok_nextc (tok );
1600+ }
15971601 if (c != '\n' ) {
15981602 tok -> done = E_LINECONT ;
15991603 return -1 ;
@@ -1693,7 +1697,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
16931697 }
16941698 }
16951699 tok_backup (tok , c );
1696- if (c == '#' || c == '\n' ) {
1700+ if (c == '#' || c == '\n' || c == '\r' ) {
16971701 /* Lines with only whitespace and/or comments
16981702 shouldn't affect the indentation and are
16991703 not passed to the parser as NEWLINE tokens,
@@ -1822,7 +1826,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
18221826 const char * prefix , * type_start ;
18231827 int current_starting_col_offset ;
18241828
1825- while (c != EOF && c != '\n' ) {
1829+ while (c != EOF && c != '\n' && c != '\r' ) {
18261830 c = tok_nextc (tok );
18271831 }
18281832
@@ -2002,6 +2006,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
20022006 return MAKE_TOKEN (NAME );
20032007 }
20042008
2009+ if (c == '\r' ) {
2010+ c = tok_nextc (tok );
2011+ }
2012+
20052013 /* Newline */
20062014 if (c == '\n' ) {
20072015 tok -> atbol = 1 ;
@@ -2405,7 +2413,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
24052413 else {
24062414 end_quote_size = 0 ;
24072415 if (c == '\\' ) {
2408- tok_nextc (tok ); /* skip escaped char */
2416+ c = tok_nextc (tok ); /* skip escaped char */
2417+ if (c == '\r' ) {
2418+ c = tok_nextc (tok );
2419+ }
24092420 }
24102421 }
24112422 }
@@ -2696,6 +2707,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
26962707 return MAKE_TOKEN (FSTRING_MIDDLE );
26972708 } else if (c == '\\' ) {
26982709 int peek = tok_nextc (tok );
2710+ if (peek == '\r' ) {
2711+ peek = tok_nextc (tok );
2712+ }
26992713 // Special case when the backslash is right before a curly
27002714 // brace. We have to restore and return the control back
27012715 // to the loop for the next iteration.
0 commit comments