@@ -986,6 +986,26 @@ Lex::is_hex_digit(char c)
986
986
|| (c >= ' a' && c <= ' f' ));
987
987
}
988
988
989
+ // Return whether C is a valid digit in BASE.
990
+
991
+ bool
992
+ Lex::is_base_digit (int base, char c)
993
+ {
994
+ switch (base)
995
+ {
996
+ case 2 :
997
+ return c == ' 0' || c == ' 1' ;
998
+ case 8 :
999
+ return c >= ' 0' && c <= ' 7' ;
1000
+ case 10 :
1001
+ return c >= ' 0' && c <= ' 9' ;
1002
+ case 16 :
1003
+ return Lex::is_hex_digit (c);
1004
+ default :
1005
+ go_unreachable ();
1006
+ }
1007
+ }
1008
+
989
1009
// not a hex value
990
1010
#define NHV 100
991
1011
@@ -1032,13 +1052,24 @@ Lex::hex_val(char c)
1032
1052
return hex_value_lookup_table[static_cast <unsigned char >(c)];
1033
1053
}
1034
1054
1035
- // Return whether an exponent could start at P.
1055
+ // Return whether an exponent could start at P, in base BASE .
1036
1056
1037
1057
bool
1038
- Lex::could_be_exponent (const char * p, const char * pend)
1058
+ Lex::could_be_exponent (int base, const char * p, const char * pend)
1039
1059
{
1040
- if (*p != ' e' && *p != ' E' )
1041
- return false ;
1060
+ switch (base)
1061
+ {
1062
+ case 10 :
1063
+ if (*p != ' e' && *p != ' E' )
1064
+ return false ;
1065
+ break ;
1066
+ case 16 :
1067
+ if (*p != ' p' && *p != ' P' )
1068
+ return false ;
1069
+ break ;
1070
+ default :
1071
+ go_unreachable ();
1072
+ }
1042
1073
++p;
1043
1074
if (p >= pend)
1044
1075
return false ;
@@ -1062,87 +1093,160 @@ Lex::gather_number()
1062
1093
1063
1094
Location location = this ->location ();
1064
1095
1065
- bool neg = false ;
1066
- if (*p == ' +' )
1067
- ++p;
1068
- else if (*p == ' -' )
1069
- {
1070
- ++p;
1071
- neg = true ;
1072
- }
1073
-
1074
- const char * pnum = p;
1096
+ int base = 10 ;
1097
+ std::string num;
1075
1098
if (*p == ' 0' )
1076
1099
{
1077
- int base ;
1078
- if ((p[ 1 ] == ' x ' || p[ 1 ] == ' X ' )
1079
- && Lex::is_hex_digit (p[2 ]) )
1100
+ int basecheck ;
1101
+ int off;
1102
+ if (p[1 ] == ' x ' || p[ 1 ] == ' X ' )
1080
1103
{
1081
1104
base = 16 ;
1082
- p += 2 ;
1083
- pnum = p;
1084
- while (p < pend)
1085
- {
1086
- if (!Lex::is_hex_digit (*p))
1087
- break ;
1088
- ++p;
1089
- }
1105
+ basecheck = 16 ;
1106
+ off = 2 ;
1107
+ }
1108
+ else if (p[1 ] == ' o' || p[1 ] == ' O' )
1109
+ {
1110
+ base = 8 ;
1111
+ basecheck = 8 ;
1112
+ off = 2 ;
1113
+ }
1114
+ else if (p[1 ] == ' b' || p[1 ] == ' B' )
1115
+ {
1116
+ base = 2 ;
1117
+ basecheck = 2 ;
1118
+ off = 2 ;
1090
1119
}
1091
1120
else
1092
1121
{
1122
+ // Old style octal literal. May also be the start of a
1123
+ // floating-point number (e.g., 09.2, 09e2) or an imaginary
1124
+ // literal (e.g., 09i), so we have to accept decimal digits.
1093
1125
base = 8 ;
1094
- pnum = p;
1095
- while (p < pend)
1096
- {
1097
- if (*p < ' 0' || *p > ' 9' )
1098
- break ;
1099
- ++p;
1100
- }
1126
+ basecheck = 10 ;
1127
+ off = 0 ;
1128
+ }
1129
+
1130
+ p += off;
1131
+ if (*p == ' _' && Lex::is_base_digit (basecheck, p[1 ]))
1132
+ ++p;
1133
+
1134
+ while (Lex::is_base_digit (basecheck, *p))
1135
+ {
1136
+ num.push_back (*p);
1137
+ ++p;
1138
+ if (*p == ' _' && Lex::is_base_digit (basecheck, p[1 ]))
1139
+ ++p;
1140
+ }
1141
+
1142
+ // We must see at least one valid digit, except for a case like
1143
+ // 0x.0p1.
1144
+ if (num.length () == 0 && (base != 16 || *p != ' .' ))
1145
+ {
1146
+ go_error_at (this ->location (), " invalid numeric literal" );
1147
+ this ->lineoff_ = p - this ->linebuf_ ;
1148
+ mpz_t val;
1149
+ mpz_init_set_ui (val, 0 );
1150
+ Token ret = Token::make_integer_token (val, location);
1151
+ mpz_clear (val);
1152
+ return ret;
1153
+ }
1154
+
1155
+ bool is_float = false ;
1156
+ // A number that looks like an old-style octal literal might
1157
+ // actually be the beginning of a floating-point or imaginary
1158
+ // literal, in which case the value is decimal digits. Handle
1159
+ // that case below by treating the leading '0' as decimal.
1160
+ if (off == 0
1161
+ && (*p == ' .' || *p == ' i' || Lex::could_be_exponent (10 , p, pend)))
1162
+ {
1163
+ is_float = true ;
1164
+ base = 10 ;
1101
1165
}
1166
+ else if (base == 16
1167
+ && (*p == ' .' || Lex::could_be_exponent (16 , p, pend)))
1168
+ is_float = true ;
1102
1169
1103
- // A partial token that looks like an octal literal might actually be the
1104
- // beginning of a floating-point or imaginary literal.
1105
- if (base == 16 || (*p != ' .' && *p != ' i' && !Lex::could_be_exponent (p, pend)))
1170
+ if (!is_float)
1106
1171
{
1107
- std::string s (pnum, p - pnum);
1108
1172
mpz_t val;
1109
- int r = mpz_init_set_str (val, s .c_str (), base);
1173
+ int r = mpz_init_set_str (val, num .c_str (), base);
1110
1174
if (r != 0 )
1111
1175
{
1112
- if (base == 8 )
1113
- go_error_at (this ->location (), " invalid octal literal" );
1114
- else
1115
- go_error_at (this ->location (), " invalid hex literal" );
1176
+ const char *errword;
1177
+ switch (base)
1178
+ {
1179
+ case 2 :
1180
+ errword = " binary" ;
1181
+ break ;
1182
+ case 8 :
1183
+ errword = " octal" ;
1184
+ break ;
1185
+ case 16 :
1186
+ errword = " hex" ;
1187
+ break ;
1188
+ default :
1189
+ go_unreachable ();
1190
+ }
1191
+ go_error_at (this ->location (), " invalid %s literal" , errword);
1116
1192
}
1117
1193
1118
- if (neg)
1119
- mpz_neg (val, val);
1194
+ bool is_imaginary = *p == ' i' ;
1195
+ if (is_imaginary)
1196
+ ++p;
1120
1197
1121
1198
this ->lineoff_ = p - this ->linebuf_ ;
1122
- Token ret = Token::make_integer_token (val, location);
1123
- mpz_clear (val);
1124
- return ret;
1199
+
1200
+ if (*p == ' e' || *p == ' E' || *p == ' p' || *p == ' P' )
1201
+ {
1202
+ go_error_at (location,
1203
+ " invalid prefix for floating constant" );
1204
+ this ->skip_exponent ();
1205
+ }
1206
+
1207
+ if (!is_imaginary)
1208
+ {
1209
+ Token ret = Token::make_integer_token (val, location);
1210
+ mpz_clear (val);
1211
+ return ret;
1212
+ }
1213
+ else
1214
+ {
1215
+ mpfr_t ival;
1216
+ mpfr_init_set_z (ival, val, GMP_RNDN);
1217
+ mpz_clear (val);
1218
+ Token ret = Token::make_imaginary_token (ival, location);
1219
+ mpfr_clear (ival);
1220
+ return ret;
1221
+ }
1125
1222
}
1126
1223
}
1127
1224
1128
1225
while (p < pend)
1129
1226
{
1130
- if (*p < ' 0' || *p > ' 9' )
1227
+ if (*p == ' _' && p[1 ] >= ' 0' && p[1 ] <= ' 9' )
1228
+ ++p;
1229
+ else if (*p < ' 0' || *p > ' 9' )
1131
1230
break ;
1231
+ num.push_back (*p);
1132
1232
++p;
1133
1233
}
1134
1234
1135
- if (*p != ' .' && *p != ' i' && !Lex::could_be_exponent (p, pend))
1235
+ if (*p != ' .' && *p != ' i' && !Lex::could_be_exponent (base, p, pend))
1136
1236
{
1137
- std::string s (pnum, p - pnum);
1138
1237
mpz_t val;
1139
- int r = mpz_init_set_str (val, s .c_str (), 10 );
1238
+ int r = mpz_init_set_str (val, num .c_str (), 10 );
1140
1239
go_assert (r == 0 );
1141
1240
1142
- if (neg)
1143
- mpz_neg (val, val);
1144
-
1145
1241
this ->lineoff_ = p - this ->linebuf_ ;
1242
+
1243
+ if (*p == ' e' || *p == ' E' || *p == ' p' || *p == ' P' )
1244
+ {
1245
+ go_error_at (location,
1246
+ " invalid prefix for floating constant" );
1247
+ this ->skip_exponent ();
1248
+ }
1249
+
1146
1250
Token ret = Token::make_integer_token (val, location);
1147
1251
mpz_clear (val);
1148
1252
return ret;
@@ -1152,48 +1256,76 @@ Lex::gather_number()
1152
1256
{
1153
1257
bool dot = *p == ' .' ;
1154
1258
1259
+ num.push_back (*p);
1155
1260
++p;
1156
1261
1157
1262
if (!dot)
1158
1263
{
1159
1264
if (*p == ' +' || *p == ' -' )
1160
- ++p;
1265
+ {
1266
+ num.push_back (*p);
1267
+ ++p;
1268
+ }
1161
1269
}
1162
1270
1271
+ bool first = true ;
1163
1272
while (p < pend)
1164
1273
{
1165
- if (*p < ' 0' || *p > ' 9' )
1274
+ if (!first && *p == ' _' && Lex::is_base_digit (base, p[1 ]))
1275
+ ++p;
1276
+ else if (!Lex::is_base_digit (base, *p))
1166
1277
break ;
1278
+ num.push_back (*p);
1167
1279
++p;
1280
+ first = false ;
1168
1281
}
1169
1282
1170
- if (dot && Lex::could_be_exponent (p, pend))
1283
+ if (dot && Lex::could_be_exponent (base, p, pend))
1171
1284
{
1285
+ num.push_back (*p);
1172
1286
++p;
1173
1287
if (*p == ' +' || *p == ' -' )
1174
- ++p;
1288
+ {
1289
+ num.push_back (*p);
1290
+ ++p;
1291
+ }
1292
+ first = true ;
1175
1293
while (p < pend)
1176
1294
{
1177
- if (*p < ' 0' || *p > ' 9' )
1295
+ if (!first && *p == ' _' && p[1 ] >= ' 0' && p[1 ] <= ' 9' )
1296
+ ++p;
1297
+ else if (*p < ' 0' || *p > ' 9' )
1178
1298
break ;
1299
+ num.push_back (*p);
1179
1300
++p;
1301
+ first = false ;
1180
1302
}
1181
1303
}
1304
+ else if (dot && base == 16 )
1305
+ {
1306
+ go_error_at (this ->location (),
1307
+ " invalid hex floating-point literal with no exponent" );
1308
+ num.append (" p0" );
1309
+ }
1182
1310
}
1183
1311
1184
- std::string s (pnum, p - pnum);
1185
1312
mpfr_t val;
1186
- int r = mpfr_init_set_str (val, s .c_str (), 10 , GMP_RNDN);
1313
+ int r = mpfr_init_set_str (val, num .c_str (), base , GMP_RNDN);
1187
1314
go_assert (r == 0 );
1188
1315
1189
- if (neg)
1190
- mpfr_neg (val, val, GMP_RNDN);
1191
-
1192
1316
bool is_imaginary = *p == ' i' ;
1193
1317
if (is_imaginary)
1194
1318
++p;
1195
1319
1196
1320
this ->lineoff_ = p - this ->linebuf_ ;
1321
+
1322
+ if (*p == ' e' || *p == ' E' || *p == ' p' || *p == ' P' )
1323
+ {
1324
+ go_error_at (location,
1325
+ " invalid prefix for floating constant" );
1326
+ this ->skip_exponent ();
1327
+ }
1328
+
1197
1329
if (is_imaginary)
1198
1330
{
1199
1331
Token ret = Token::make_imaginary_token (val, location);
@@ -1208,6 +1340,27 @@ Lex::gather_number()
1208
1340
}
1209
1341
}
1210
1342
1343
+ // Skip an exponent after reporting an error.
1344
+
1345
+ void
1346
+ Lex::skip_exponent ()
1347
+ {
1348
+ const char * p = this ->linebuf_ + this ->lineoff_ ;
1349
+ const char * pend = this ->linebuf_ + this ->linesize_ ;
1350
+ if (*p != ' e' && *p != ' E' && *p != ' p' && *p != ' P' )
1351
+ return ;
1352
+ ++p;
1353
+ if (*p == ' +' || *p == ' -' )
1354
+ ++p;
1355
+ while (p < pend)
1356
+ {
1357
+ if ((*p < ' 0' || *p > ' 9' ) && *p != ' _' )
1358
+ break ;
1359
+ ++p;
1360
+ }
1361
+ this ->lineoff_ = p - this ->linebuf_ ;
1362
+ }
1363
+
1211
1364
// Advance one character, possibly escaped. Return the pointer beyond
1212
1365
// the character. Set *VALUE to the character. Set *IS_CHARACTER if
1213
1366
// this is a character (e.g., 'a' or '\u1234') rather than a byte
0 commit comments