From 8487811fc1b628ca73a1d1f40e6e98ba80b2cb4c Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Tue, 18 Oct 2016 21:36:38 +0200 Subject: [PATCH 1/5] Implement underscores in numeric literals --- ast35/Parser/tokenizer.c | 124 ++++++++++++++++++++++++++++----------- ast35/Python/ast.c | 27 ++++++++- 2 files changed, 116 insertions(+), 35 deletions(-) diff --git a/ast35/Parser/tokenizer.c b/ast35/Parser/tokenizer.c index a23546ed..e635dc7b 100644 --- a/ast35/Parser/tokenizer.c +++ b/ast35/Parser/tokenizer.c @@ -1343,6 +1343,27 @@ verify_identifier(struct tok_state *tok) } #endif +static int +tok_decimal_tail(struct tok_state *tok) +{ + int c; + while (1) { + do { + c = tok_nextc(tok); + } while (isdigit(c)); + if (c != '_') { + break; + } + c = tok_nextc(tok); + if (!isdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return 0; + } + } + return c; +} + /* Get next token, after space stripping etc. */ static int @@ -1652,56 +1673,85 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) /* Hex */ c = tok_nextc(tok); - if (!isxdigit(c)) { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } do { - c = tok_nextc(tok); - } while (isxdigit(c)); + if (c == '_') + c = tok_nextc(tok); + if (!isxdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (isxdigit(c)); + } while (c == '_'); } else if (c == 'o' || c == 'O') { /* Octal */ c = tok_nextc(tok); - if (c < '0' || c >= '8') { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } do { - c = tok_nextc(tok); - } while ('0' <= c && c < '8'); + if (c == '_') + c = tok_nextc(tok); + if (c < '0' || c >= '8') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while ('0' <= c && c < '8'); + } while (c == '_'); } else if (c == 'b' || c == 'B') { /* Binary */ c = tok_nextc(tok); - if (c != '0' && c != '1') { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } do { - c = tok_nextc(tok); - } while (c == '0' || c == '1'); + if (c == '_') + c = tok_nextc(tok); + if (c != '0' && c != '1') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (c == '0' || c == '1'); + } while (c == '_'); } else { int nonzero = 0; /* maybe old-style octal; c is first char of it */ /* in any case, allow '0' as a literal */ - while (c == '0') + while (1) { + if (c == '_') { + c = tok_nextc(tok); + if (!isdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + } + if (c != '0') + break; c = tok_nextc(tok); - while (isdigit(c)) { + } + if (isdigit(c)) { nonzero = 1; - c = tok_nextc(tok); + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } } - if (c == '.') + if (c == '.') { + c = tok_nextc(tok); goto fraction; + } else if (c == 'e' || c == 'E') goto exponent; else if (c == 'j' || c == 'J') goto imaginary; else if (nonzero) { + /* Old-style octal: now disallowed. */ tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; @@ -1710,17 +1760,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) } else { /* Decimal */ - do { - c = tok_nextc(tok); - } while (isdigit(c)); + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } { /* Accept floating point numbers. */ if (c == '.') { + c = tok_nextc(tok); fraction: /* Fraction */ - do { - c = tok_nextc(tok); - } while (isdigit(c)); + if (isdigit(c)) { + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } + } } if (c == 'e' || c == 'E') { int e; @@ -1742,9 +1797,10 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) *p_end = tok->cur; return NUMBER; } - do { - c = tok_nextc(tok); - } while (isdigit(c)); + c = tok_decimal_tail(tok); + if (c == 0) { + return ERRORTOKEN; + } } if (c == 'j' || c == 'J') /* Imaginary part */ diff --git a/ast35/Python/ast.c b/ast35/Python/ast.c index 1340717e..c1f056c2 100644 --- a/ast35/Python/ast.c +++ b/ast35/Python/ast.c @@ -4124,7 +4124,7 @@ ast_for_stmt(struct compiling *c, const node *n) } static PyObject * -parsenumber(struct compiling *c, const char *s) +parsenumber_raw(struct compiling *c, const char *s) { const char *end; long x; @@ -4166,6 +4166,31 @@ parsenumber(struct compiling *c, const char *s) } } +static PyObject * +parsenumber(struct compiling *c, const char *s) +{ + char *dup, *end; + PyObject *res = NULL; + + assert(s != NULL); + + if (strchr(s, '_') == NULL) { + return parsenumber_raw(c, s); + } + /* Create a duplicate without underscores. */ + dup = PyMem_Malloc(strlen(s) + 1); + end = dup; + for (; *s; s++) { + if (*s != '_') { + *end++ = *s; + } + } + *end = '\0'; + res = parsenumber_raw(c, dup); + PyMem_Free(dup); + return res; +} + static PyObject * decode_utf8(struct compiling *c, const char **sPtr, const char *end) { From 9e6e7aa2ff476fd0a68bdd6d333ca79c87933999 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 19 Oct 2016 10:50:12 +0200 Subject: [PATCH 2/5] Add a flag to indicate the use of underscores --- ast35/Include/Python-ast.h | 6 ++++-- ast35/Parser/Python.asdl | 2 +- ast35/Python/Python-ast.c | 25 ++++++++++++++++++++++--- ast35/Python/ast.c | 7 +++++-- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/ast35/Include/Python-ast.h b/ast35/Include/Python-ast.h index 6b070e69..e091c2d7 100644 --- a/ast35/Include/Python-ast.h +++ b/ast35/Include/Python-ast.h @@ -304,6 +304,7 @@ struct _expr { struct { object n; + int underscores; } Num; struct { @@ -570,8 +571,9 @@ expr_ty _Ta35_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators, #define Call(a0, a1, a2, a3, a4, a5) _Ta35_Call(a0, a1, a2, a3, a4, a5) expr_ty _Ta35_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int col_offset, PyArena *arena); -#define Num(a0, a1, a2, a3) _Ta35_Num(a0, a1, a2, a3) -expr_ty _Ta35_Num(object n, int lineno, int col_offset, PyArena *arena); +#define Num(a0, a1, a2, a3, a4) _Ta35_Num(a0, a1, a2, a3, a4) +expr_ty _Ta35_Num(object n, int underscores, int lineno, int col_offset, + PyArena *arena); #define Str(a0, a1, a2, a3) _Ta35_Str(a0, a1, a2, a3) expr_ty _Ta35_Str(string s, int lineno, int col_offset, PyArena *arena); #define Bytes(a0, a1, a2, a3) _Ta35_Bytes(a0, a1, a2, a3) diff --git a/ast35/Parser/Python.asdl b/ast35/Parser/Python.asdl index c59a551f..4e2e3cdb 100644 --- a/ast35/Parser/Python.asdl +++ b/ast35/Parser/Python.asdl @@ -70,7 +70,7 @@ module Python -- x < 4 < 3 and (x < 4) < 3 | Compare(expr left, cmpop* ops, expr* comparators) | Call(expr func, expr* args, keyword* keywords) - | Num(object n) -- a number as a PyObject. + | Num(object n, int? underscores) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? | Bytes(bytes s) | NameConstant(singleton value) diff --git a/ast35/Python/Python-ast.c b/ast35/Python/Python-ast.c index 45cf4bb6..a97319a5 100644 --- a/ast35/Python/Python-ast.c +++ b/ast35/Python/Python-ast.c @@ -293,8 +293,10 @@ static char *Call_fields[]={ }; static PyTypeObject *Num_type; _Py_IDENTIFIER(n); +_Py_IDENTIFIER(underscores); static char *Num_fields[]={ "n", + "underscores", }; static PyTypeObject *Str_type; _Py_IDENTIFIER(s); @@ -937,7 +939,7 @@ static int init_types(void) if (!Compare_type) return 0; Call_type = make_type("Call", expr_type, Call_fields, 3); if (!Call_type) return 0; - Num_type = make_type("Num", expr_type, Num_fields, 1); + Num_type = make_type("Num", expr_type, Num_fields, 2); if (!Num_type) return 0; Str_type = make_type("Str", expr_type, Str_fields, 1); if (!Str_type) return 0; @@ -2077,7 +2079,7 @@ Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int } expr_ty -Num(object n, int lineno, int col_offset, PyArena *arena) +Num(object n, int underscores, int lineno, int col_offset, PyArena *arena) { expr_ty p; if (!n) { @@ -2090,6 +2092,7 @@ Num(object n, int lineno, int col_offset, PyArena *arena) return NULL; p->kind = Num_kind; p->v.Num.n = n; + p->v.Num.underscores = underscores; p->lineno = lineno; p->col_offset = col_offset; return p; @@ -3267,6 +3270,11 @@ ast2obj_expr(void* _o) if (_PyObject_SetAttrId(result, &PyId_n, value) == -1) goto failed; Py_DECREF(value); + value = ast2obj_int(o->v.Num.underscores); + if (!value) goto failed; + if (_PyObject_SetAttrId(result, &PyId_underscores, value) == -1) + goto failed; + Py_DECREF(value); break; case Str_kind: result = PyType_GenericNew(Str_type, NULL, NULL); @@ -6267,6 +6275,7 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena) } if (isinstance) { object n; + int underscores; if (_PyObject_HasAttrId(obj, &PyId_n)) { int res; @@ -6279,7 +6288,17 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena) PyErr_SetString(PyExc_TypeError, "required field \"n\" missing from Num"); return 1; } - *out = Num(n, lineno, col_offset, arena); + if (exists_not_none(obj, &PyId_underscores)) { + int res; + tmp = _PyObject_GetAttrId(obj, &PyId_underscores); + if (tmp == NULL) goto failed; + res = obj2ast_int(tmp, &underscores, arena); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } else { + underscores = 0; + } + *out = Num(n, underscores, lineno, col_offset, arena); if (*out == NULL) goto failed; return 0; } diff --git a/ast35/Python/ast.c b/ast35/Python/ast.c index c1f056c2..7c9ffb52 100644 --- a/ast35/Python/ast.c +++ b/ast35/Python/ast.c @@ -2204,7 +2204,9 @@ ast_for_atom(struct compiling *c, const node *n) return Str(str, LINENO(n), n->n_col_offset, c->c_arena); } case NUMBER: { - PyObject *pynum = parsenumber(c, STR(ch)); + const char *s = STR(ch); + int underscores = strchr(s, '_') != NULL; + PyObject *pynum = parsenumber(c, s); if (!pynum) return NULL; @@ -2212,7 +2214,8 @@ ast_for_atom(struct compiling *c, const node *n) Py_DECREF(pynum); return NULL; } - return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena); + return Num(pynum, underscores, LINENO(n), + n->n_col_offset, c->c_arena); } case ELLIPSIS: /* Ellipsis */ return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena); From b749f95029e2a2487da8642e910ca3eb2d598ba0 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 19 Oct 2016 12:08:41 +0200 Subject: [PATCH 3/5] Fix a bug with fractional part --- ast35/Parser/tokenizer.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ast35/Parser/tokenizer.c b/ast35/Parser/tokenizer.c index e635dc7b..de122585 100644 --- a/ast35/Parser/tokenizer.c +++ b/ast35/Parser/tokenizer.c @@ -1665,12 +1665,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) if (c == '0') { /* Hex, octal or binary -- maybe. */ c = tok_nextc(tok); - if (c == '.') - goto fraction; - if (c == 'j' || c == 'J') - goto imaginary; if (c == 'x' || c == 'X') { - /* Hex */ c = tok_nextc(tok); do { From c5e7a3e5641030415e8e1271941623c0160e4e1f Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Sat, 29 Oct 2016 16:06:09 +0200 Subject: [PATCH 4/5] Replace underscores with contains_underscores plus a comment --- ast35/Include/Python-ast.h | 6 +++--- ast35/Parser/Python.asdl | 4 +++- ast35/Python/Python-ast.c | 26 ++++++++++++++------------ ast35/Python/ast.c | 4 ++-- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/ast35/Include/Python-ast.h b/ast35/Include/Python-ast.h index e091c2d7..94e9b5eb 100644 --- a/ast35/Include/Python-ast.h +++ b/ast35/Include/Python-ast.h @@ -304,7 +304,7 @@ struct _expr { struct { object n; - int underscores; + int contains_underscores; } Num; struct { @@ -572,8 +572,8 @@ expr_ty _Ta35_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators, expr_ty _Ta35_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int col_offset, PyArena *arena); #define Num(a0, a1, a2, a3, a4) _Ta35_Num(a0, a1, a2, a3, a4) -expr_ty _Ta35_Num(object n, int underscores, int lineno, int col_offset, - PyArena *arena); +expr_ty _Ta35_Num(object n, int contains_underscores, int lineno, int + col_offset, PyArena *arena); #define Str(a0, a1, a2, a3) _Ta35_Str(a0, a1, a2, a3) expr_ty _Ta35_Str(string s, int lineno, int col_offset, PyArena *arena); #define Bytes(a0, a1, a2, a3) _Ta35_Bytes(a0, a1, a2, a3) diff --git a/ast35/Parser/Python.asdl b/ast35/Parser/Python.asdl index 4e2e3cdb..ae52d976 100644 --- a/ast35/Parser/Python.asdl +++ b/ast35/Parser/Python.asdl @@ -70,7 +70,9 @@ module Python -- x < 4 < 3 and (x < 4) < 3 | Compare(expr left, cmpop* ops, expr* comparators) | Call(expr func, expr* args, keyword* keywords) - | Num(object n, int? underscores) -- a number as a PyObject. + -- the field contains_underscores is not a part of standard Python ASDL + -- and exists here to signal that a newer Python version feature was used + | Num(object n, int? contains_underscores) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? | Bytes(bytes s) | NameConstant(singleton value) diff --git a/ast35/Python/Python-ast.c b/ast35/Python/Python-ast.c index a97319a5..ce80939e 100644 --- a/ast35/Python/Python-ast.c +++ b/ast35/Python/Python-ast.c @@ -293,10 +293,10 @@ static char *Call_fields[]={ }; static PyTypeObject *Num_type; _Py_IDENTIFIER(n); -_Py_IDENTIFIER(underscores); +_Py_IDENTIFIER(contains_underscores); static char *Num_fields[]={ "n", - "underscores", + "contains_underscores", }; static PyTypeObject *Str_type; _Py_IDENTIFIER(s); @@ -2079,7 +2079,8 @@ Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int } expr_ty -Num(object n, int underscores, int lineno, int col_offset, PyArena *arena) +Num(object n, int contains_underscores, int lineno, int col_offset, PyArena + *arena) { expr_ty p; if (!n) { @@ -2092,7 +2093,7 @@ Num(object n, int underscores, int lineno, int col_offset, PyArena *arena) return NULL; p->kind = Num_kind; p->v.Num.n = n; - p->v.Num.underscores = underscores; + p->v.Num.contains_underscores = contains_underscores; p->lineno = lineno; p->col_offset = col_offset; return p; @@ -3270,9 +3271,10 @@ ast2obj_expr(void* _o) if (_PyObject_SetAttrId(result, &PyId_n, value) == -1) goto failed; Py_DECREF(value); - value = ast2obj_int(o->v.Num.underscores); + value = ast2obj_int(o->v.Num.contains_underscores); if (!value) goto failed; - if (_PyObject_SetAttrId(result, &PyId_underscores, value) == -1) + if (_PyObject_SetAttrId(result, &PyId_contains_underscores, value) == + -1) goto failed; Py_DECREF(value); break; @@ -6275,7 +6277,7 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena) } if (isinstance) { object n; - int underscores; + int contains_underscores; if (_PyObject_HasAttrId(obj, &PyId_n)) { int res; @@ -6288,17 +6290,17 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena) PyErr_SetString(PyExc_TypeError, "required field \"n\" missing from Num"); return 1; } - if (exists_not_none(obj, &PyId_underscores)) { + if (exists_not_none(obj, &PyId_contains_underscores)) { int res; - tmp = _PyObject_GetAttrId(obj, &PyId_underscores); + tmp = _PyObject_GetAttrId(obj, &PyId_contains_underscores); if (tmp == NULL) goto failed; - res = obj2ast_int(tmp, &underscores, arena); + res = obj2ast_int(tmp, &contains_underscores, arena); if (res != 0) goto failed; Py_CLEAR(tmp); } else { - underscores = 0; + contains_underscores = 0; } - *out = Num(n, underscores, lineno, col_offset, arena); + *out = Num(n, contains_underscores, lineno, col_offset, arena); if (*out == NULL) goto failed; return 0; } diff --git a/ast35/Python/ast.c b/ast35/Python/ast.c index 7c9ffb52..d9b632c9 100644 --- a/ast35/Python/ast.c +++ b/ast35/Python/ast.c @@ -2205,7 +2205,7 @@ ast_for_atom(struct compiling *c, const node *n) } case NUMBER: { const char *s = STR(ch); - int underscores = strchr(s, '_') != NULL; + int contains_underscores = strchr(s, '_') != NULL; PyObject *pynum = parsenumber(c, s); if (!pynum) return NULL; @@ -2214,7 +2214,7 @@ ast_for_atom(struct compiling *c, const node *n) Py_DECREF(pynum); return NULL; } - return Num(pynum, underscores, LINENO(n), + return Num(pynum, contains_underscores, LINENO(n), n->n_col_offset, c->c_arena); } case ELLIPSIS: /* Ellipsis */ From c3c8665e304ce1e05b889317e84970c837c55cf5 Mon Sep 17 00:00:00 2001 From: David Fisher Date: Mon, 31 Oct 2016 13:29:01 -0700 Subject: [PATCH 5/5] make comment a bit more specific --- ast35/Parser/Python.asdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ast35/Parser/Python.asdl b/ast35/Parser/Python.asdl index ae52d976..190ee317 100644 --- a/ast35/Parser/Python.asdl +++ b/ast35/Parser/Python.asdl @@ -70,8 +70,8 @@ module Python -- x < 4 < 3 and (x < 4) < 3 | Compare(expr left, cmpop* ops, expr* comparators) | Call(expr func, expr* args, keyword* keywords) - -- the field contains_underscores is not a part of standard Python ASDL - -- and exists here to signal that a newer Python version feature was used + -- contains_underscores is not part of standard Python ASDL + -- and exists here to signal that a Python 3.6 feature was used | Num(object n, int? contains_underscores) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? | Bytes(bytes s)