Skip to content
This repository was archived by the owner on Jul 5, 2023. It is now read-only.

Commit 93d4e80

Browse files
ilevkivskyiddfisher
authored andcommitted
Implement underscores in numeric literals (#21)
Implements underscores in numeric literals. Cherry-picks the necessary parts from the original implementation in Python 3.6 by Georg and Serhiy. This could be useful for mypy.
1 parent 15e1bf1 commit 93d4e80

File tree

5 files changed

+152
-48
lines changed

5 files changed

+152
-48
lines changed

ast35/Include/Python-ast.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ struct _expr {
304304

305305
struct {
306306
object n;
307+
int contains_underscores;
307308
} Num;
308309

309310
struct {
@@ -570,8 +571,9 @@ expr_ty _Ta35_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators,
570571
#define Call(a0, a1, a2, a3, a4, a5) _Ta35_Call(a0, a1, a2, a3, a4, a5)
571572
expr_ty _Ta35_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int
572573
lineno, int col_offset, PyArena *arena);
573-
#define Num(a0, a1, a2, a3) _Ta35_Num(a0, a1, a2, a3)
574-
expr_ty _Ta35_Num(object n, int lineno, int col_offset, PyArena *arena);
574+
#define Num(a0, a1, a2, a3, a4) _Ta35_Num(a0, a1, a2, a3, a4)
575+
expr_ty _Ta35_Num(object n, int contains_underscores, int lineno, int
576+
col_offset, PyArena *arena);
575577
#define Str(a0, a1, a2, a3) _Ta35_Str(a0, a1, a2, a3)
576578
expr_ty _Ta35_Str(string s, int lineno, int col_offset, PyArena *arena);
577579
#define Bytes(a0, a1, a2, a3) _Ta35_Bytes(a0, a1, a2, a3)

ast35/Parser/Python.asdl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ module Python
7070
-- x < 4 < 3 and (x < 4) < 3
7171
| Compare(expr left, cmpop* ops, expr* comparators)
7272
| Call(expr func, expr* args, keyword* keywords)
73-
| Num(object n) -- a number as a PyObject.
73+
-- contains_underscores is not part of standard Python ASDL
74+
-- and exists here to signal that a Python 3.6 feature was used
75+
| Num(object n, int? contains_underscores) -- a number as a PyObject.
7476
| Str(string s) -- need to specify raw, unicode, etc?
7577
| Bytes(bytes s)
7678
| NameConstant(singleton value)

ast35/Parser/tokenizer.c

Lines changed: 90 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,6 +1343,27 @@ verify_identifier(struct tok_state *tok)
13431343
}
13441344
#endif
13451345

1346+
static int
1347+
tok_decimal_tail(struct tok_state *tok)
1348+
{
1349+
int c;
1350+
while (1) {
1351+
do {
1352+
c = tok_nextc(tok);
1353+
} while (isdigit(c));
1354+
if (c != '_') {
1355+
break;
1356+
}
1357+
c = tok_nextc(tok);
1358+
if (!isdigit(c)) {
1359+
tok->done = E_TOKEN;
1360+
tok_backup(tok, c);
1361+
return 0;
1362+
}
1363+
}
1364+
return c;
1365+
}
1366+
13461367
/* Get next token, after space stripping etc. */
13471368

13481369
static int
@@ -1644,64 +1665,88 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
16441665
if (c == '0') {
16451666
/* Hex, octal or binary -- maybe. */
16461667
c = tok_nextc(tok);
1647-
if (c == '.')
1648-
goto fraction;
1649-
if (c == 'j' || c == 'J')
1650-
goto imaginary;
16511668
if (c == 'x' || c == 'X') {
1652-
16531669
/* Hex */
16541670
c = tok_nextc(tok);
1655-
if (!isxdigit(c)) {
1656-
tok->done = E_TOKEN;
1657-
tok_backup(tok, c);
1658-
return ERRORTOKEN;
1659-
}
16601671
do {
1661-
c = tok_nextc(tok);
1662-
} while (isxdigit(c));
1672+
if (c == '_')
1673+
c = tok_nextc(tok);
1674+
if (!isxdigit(c)) {
1675+
tok->done = E_TOKEN;
1676+
tok_backup(tok, c);
1677+
return ERRORTOKEN;
1678+
}
1679+
do {
1680+
c = tok_nextc(tok);
1681+
} while (isxdigit(c));
1682+
} while (c == '_');
16631683
}
16641684
else if (c == 'o' || c == 'O') {
16651685
/* Octal */
16661686
c = tok_nextc(tok);
1667-
if (c < '0' || c >= '8') {
1668-
tok->done = E_TOKEN;
1669-
tok_backup(tok, c);
1670-
return ERRORTOKEN;
1671-
}
16721687
do {
1673-
c = tok_nextc(tok);
1674-
} while ('0' <= c && c < '8');
1688+
if (c == '_')
1689+
c = tok_nextc(tok);
1690+
if (c < '0' || c >= '8') {
1691+
tok->done = E_TOKEN;
1692+
tok_backup(tok, c);
1693+
return ERRORTOKEN;
1694+
}
1695+
do {
1696+
c = tok_nextc(tok);
1697+
} while ('0' <= c && c < '8');
1698+
} while (c == '_');
16751699
}
16761700
else if (c == 'b' || c == 'B') {
16771701
/* Binary */
16781702
c = tok_nextc(tok);
1679-
if (c != '0' && c != '1') {
1680-
tok->done = E_TOKEN;
1681-
tok_backup(tok, c);
1682-
return ERRORTOKEN;
1683-
}
16841703
do {
1685-
c = tok_nextc(tok);
1686-
} while (c == '0' || c == '1');
1704+
if (c == '_')
1705+
c = tok_nextc(tok);
1706+
if (c != '0' && c != '1') {
1707+
tok->done = E_TOKEN;
1708+
tok_backup(tok, c);
1709+
return ERRORTOKEN;
1710+
}
1711+
do {
1712+
c = tok_nextc(tok);
1713+
} while (c == '0' || c == '1');
1714+
} while (c == '_');
16871715
}
16881716
else {
16891717
int nonzero = 0;
16901718
/* maybe old-style octal; c is first char of it */
16911719
/* in any case, allow '0' as a literal */
1692-
while (c == '0')
1720+
while (1) {
1721+
if (c == '_') {
1722+
c = tok_nextc(tok);
1723+
if (!isdigit(c)) {
1724+
tok->done = E_TOKEN;
1725+
tok_backup(tok, c);
1726+
return ERRORTOKEN;
1727+
}
1728+
}
1729+
if (c != '0')
1730+
break;
16931731
c = tok_nextc(tok);
1694-
while (isdigit(c)) {
1732+
}
1733+
if (isdigit(c)) {
16951734
nonzero = 1;
1696-
c = tok_nextc(tok);
1735+
c = tok_decimal_tail(tok);
1736+
if (c == 0) {
1737+
return ERRORTOKEN;
1738+
}
16971739
}
1698-
if (c == '.')
1740+
if (c == '.') {
1741+
c = tok_nextc(tok);
16991742
goto fraction;
1743+
}
17001744
else if (c == 'e' || c == 'E')
17011745
goto exponent;
17021746
else if (c == 'j' || c == 'J')
17031747
goto imaginary;
17041748
else if (nonzero) {
1749+
/* Old-style octal: now disallowed. */
17051750
tok->done = E_TOKEN;
17061751
tok_backup(tok, c);
17071752
return ERRORTOKEN;
@@ -1710,17 +1755,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
17101755
}
17111756
else {
17121757
/* Decimal */
1713-
do {
1714-
c = tok_nextc(tok);
1715-
} while (isdigit(c));
1758+
c = tok_decimal_tail(tok);
1759+
if (c == 0) {
1760+
return ERRORTOKEN;
1761+
}
17161762
{
17171763
/* Accept floating point numbers. */
17181764
if (c == '.') {
1765+
c = tok_nextc(tok);
17191766
fraction:
17201767
/* Fraction */
1721-
do {
1722-
c = tok_nextc(tok);
1723-
} while (isdigit(c));
1768+
if (isdigit(c)) {
1769+
c = tok_decimal_tail(tok);
1770+
if (c == 0) {
1771+
return ERRORTOKEN;
1772+
}
1773+
}
17241774
}
17251775
if (c == 'e' || c == 'E') {
17261776
int e;
@@ -1742,9 +1792,10 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
17421792
*p_end = tok->cur;
17431793
return NUMBER;
17441794
}
1745-
do {
1746-
c = tok_nextc(tok);
1747-
} while (isdigit(c));
1795+
c = tok_decimal_tail(tok);
1796+
if (c == 0) {
1797+
return ERRORTOKEN;
1798+
}
17481799
}
17491800
if (c == 'j' || c == 'J')
17501801
/* Imaginary part */

ast35/Python/Python-ast.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,10 @@ static char *Call_fields[]={
293293
};
294294
static PyTypeObject *Num_type;
295295
_Py_IDENTIFIER(n);
296+
_Py_IDENTIFIER(contains_underscores);
296297
static char *Num_fields[]={
297298
"n",
299+
"contains_underscores",
298300
};
299301
static PyTypeObject *Str_type;
300302
_Py_IDENTIFIER(s);
@@ -937,7 +939,7 @@ static int init_types(void)
937939
if (!Compare_type) return 0;
938940
Call_type = make_type("Call", expr_type, Call_fields, 3);
939941
if (!Call_type) return 0;
940-
Num_type = make_type("Num", expr_type, Num_fields, 1);
942+
Num_type = make_type("Num", expr_type, Num_fields, 2);
941943
if (!Num_type) return 0;
942944
Str_type = make_type("Str", expr_type, Str_fields, 1);
943945
if (!Str_type) return 0;
@@ -2077,7 +2079,8 @@ Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int
20772079
}
20782080

20792081
expr_ty
2080-
Num(object n, int lineno, int col_offset, PyArena *arena)
2082+
Num(object n, int contains_underscores, int lineno, int col_offset, PyArena
2083+
*arena)
20812084
{
20822085
expr_ty p;
20832086
if (!n) {
@@ -2090,6 +2093,7 @@ Num(object n, int lineno, int col_offset, PyArena *arena)
20902093
return NULL;
20912094
p->kind = Num_kind;
20922095
p->v.Num.n = n;
2096+
p->v.Num.contains_underscores = contains_underscores;
20932097
p->lineno = lineno;
20942098
p->col_offset = col_offset;
20952099
return p;
@@ -3267,6 +3271,12 @@ ast2obj_expr(void* _o)
32673271
if (_PyObject_SetAttrId(result, &PyId_n, value) == -1)
32683272
goto failed;
32693273
Py_DECREF(value);
3274+
value = ast2obj_int(o->v.Num.contains_underscores);
3275+
if (!value) goto failed;
3276+
if (_PyObject_SetAttrId(result, &PyId_contains_underscores, value) ==
3277+
-1)
3278+
goto failed;
3279+
Py_DECREF(value);
32703280
break;
32713281
case Str_kind:
32723282
result = PyType_GenericNew(Str_type, NULL, NULL);
@@ -6267,6 +6277,7 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
62676277
}
62686278
if (isinstance) {
62696279
object n;
6280+
int contains_underscores;
62706281

62716282
if (_PyObject_HasAttrId(obj, &PyId_n)) {
62726283
int res;
@@ -6279,7 +6290,17 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
62796290
PyErr_SetString(PyExc_TypeError, "required field \"n\" missing from Num");
62806291
return 1;
62816292
}
6282-
*out = Num(n, lineno, col_offset, arena);
6293+
if (exists_not_none(obj, &PyId_contains_underscores)) {
6294+
int res;
6295+
tmp = _PyObject_GetAttrId(obj, &PyId_contains_underscores);
6296+
if (tmp == NULL) goto failed;
6297+
res = obj2ast_int(tmp, &contains_underscores, arena);
6298+
if (res != 0) goto failed;
6299+
Py_CLEAR(tmp);
6300+
} else {
6301+
contains_underscores = 0;
6302+
}
6303+
*out = Num(n, contains_underscores, lineno, col_offset, arena);
62836304
if (*out == NULL) goto failed;
62846305
return 0;
62856306
}

ast35/Python/ast.c

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2204,15 +2204,18 @@ ast_for_atom(struct compiling *c, const node *n)
22042204
return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
22052205
}
22062206
case NUMBER: {
2207-
PyObject *pynum = parsenumber(c, STR(ch));
2207+
const char *s = STR(ch);
2208+
int contains_underscores = strchr(s, '_') != NULL;
2209+
PyObject *pynum = parsenumber(c, s);
22082210
if (!pynum)
22092211
return NULL;
22102212

22112213
if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
22122214
Py_DECREF(pynum);
22132215
return NULL;
22142216
}
2215-
return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
2217+
return Num(pynum, contains_underscores, LINENO(n),
2218+
n->n_col_offset, c->c_arena);
22162219
}
22172220
case ELLIPSIS: /* Ellipsis */
22182221
return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena);
@@ -4124,7 +4127,7 @@ ast_for_stmt(struct compiling *c, const node *n)
41244127
}
41254128

41264129
static PyObject *
4127-
parsenumber(struct compiling *c, const char *s)
4130+
parsenumber_raw(struct compiling *c, const char *s)
41284131
{
41294132
const char *end;
41304133
long x;
@@ -4166,6 +4169,31 @@ parsenumber(struct compiling *c, const char *s)
41664169
}
41674170
}
41684171

4172+
static PyObject *
4173+
parsenumber(struct compiling *c, const char *s)
4174+
{
4175+
char *dup, *end;
4176+
PyObject *res = NULL;
4177+
4178+
assert(s != NULL);
4179+
4180+
if (strchr(s, '_') == NULL) {
4181+
return parsenumber_raw(c, s);
4182+
}
4183+
/* Create a duplicate without underscores. */
4184+
dup = PyMem_Malloc(strlen(s) + 1);
4185+
end = dup;
4186+
for (; *s; s++) {
4187+
if (*s != '_') {
4188+
*end++ = *s;
4189+
}
4190+
}
4191+
*end = '\0';
4192+
res = parsenumber_raw(c, dup);
4193+
PyMem_Free(dup);
4194+
return res;
4195+
}
4196+
41694197
static PyObject *
41704198
decode_utf8(struct compiling *c, const char **sPtr, const char *end)
41714199
{

0 commit comments

Comments
 (0)