Skip to content
This repository was archived by the owner on Jul 5, 2023. It is now read-only.

Commit d498191

Browse files
ilevkivskyigvanrossum
authored andcommitted
Preserve string kind modifiers (#49)
This will help experimenting with python/mypy#3648 Currently we preserve only ``b`` string modifier on Python 2, this is a bit arbitrary and for the above mentioned PR we need to keep ``u``. Instead of adding another special-casing I just always preserve all string modifiers in a short string ``kind`` on ``Str`` node, for example: ```python >>> st = ast3.parse("u'hi'") >>> st.body[0].value.kind 'u' ```
1 parent ec01f89 commit d498191

File tree

8 files changed

+80
-29
lines changed

8 files changed

+80
-29
lines changed

ast27/Include/Python-ast.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ struct _expr {
286286

287287
struct {
288288
string s;
289-
int has_b;
289+
string kind;
290290
} Str;
291291

292292
struct {
@@ -505,7 +505,7 @@ expr_ty _Ta27_Repr(expr_ty value, int lineno, int col_offset, PyArena *arena);
505505
#define Num(a0, a1, a2, a3) _Ta27_Num(a0, a1, a2, a3)
506506
expr_ty _Ta27_Num(object n, int lineno, int col_offset, PyArena *arena);
507507
#define Str(a0, a1, a2, a3, a4) _Ta27_Str(a0, a1, a2, a3, a4)
508-
expr_ty _Ta27_Str(string s, int has_b, int lineno, int col_offset, PyArena *arena);
508+
expr_ty _Ta27_Str(string s, string kind, int lineno, int col_offset, PyArena *arena);
509509
#define Attribute(a0, a1, a2, a3, a4, a5) _Ta27_Attribute(a0, a1, a2, a3, a4, a5)
510510
expr_ty _Ta27_Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int lineno, int
511511
col_offset, PyArena *arena);

ast27/Parser/Python.asdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ module Python version "$Revision$"
7171
expr? starargs, expr? kwargs)
7272
| Repr(expr value)
7373
| Num(object n) -- a number as a PyObject.
74-
| Str(string s, int? has_b) -- need to specify raw, unicode, etc?
74+
| Str(string s, string kind)
7575
-- other literals? bools?
7676

7777
-- the following expression can appear in assignment context

ast27/Python/Python-ast.c

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ static char *Num_fields[]={
252252
static PyTypeObject *Str_type;
253253
static char *Str_fields[]={
254254
"s",
255-
"has_b",
255+
"kind",
256256
};
257257
static PyTypeObject *Attribute_type;
258258
static char *Attribute_fields[]={
@@ -1850,20 +1850,25 @@ Num(object n, int lineno, int col_offset, PyArena *arena)
18501850
}
18511851

18521852
expr_ty
1853-
Str(string s, int has_b, int lineno, int col_offset, PyArena *arena)
1853+
Str(string s, string kind, int lineno, int col_offset, PyArena *arena)
18541854
{
18551855
expr_ty p;
18561856
if (!s) {
18571857
PyErr_SetString(PyExc_ValueError,
18581858
"field s is required for Str");
18591859
return NULL;
18601860
}
1861+
if (!kind) {
1862+
PyErr_SetString(PyExc_ValueError,
1863+
"field kind is required for Str");
1864+
return NULL;
1865+
}
18611866
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
18621867
if (!p)
18631868
return NULL;
18641869
p->kind = Str_kind;
18651870
p->v.Str.s = s;
1866-
p->v.Str.has_b = has_b;
1871+
p->v.Str.kind = kind;
18671872
p->lineno = lineno;
18681873
p->col_offset = col_offset;
18691874
return p;
@@ -2889,9 +2894,9 @@ ast2obj_expr(void* _o)
28892894
if (PyObject_SetAttrString(result, "s", value) == -1)
28902895
goto failed;
28912896
Py_DECREF(value);
2892-
value = ast2obj_int(o->v.Str.has_b);
2897+
value = ast2obj_string(o->v.Str.kind);
28932898
if (!value) goto failed;
2894-
if (PyObject_SetAttrString(result, "has_b", value) == -1)
2899+
if (PyObject_SetAttrString(result, "kind", value) == -1)
28952900
goto failed;
28962901
Py_DECREF(value);
28972902
break;
@@ -5714,7 +5719,7 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
57145719
}
57155720
if (isinstance) {
57165721
string s;
5717-
int has_b;
5722+
string kind;
57185723

57195724
if (PyObject_HasAttrString(obj, "s")) {
57205725
int res;
@@ -5728,18 +5733,19 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
57285733
PyErr_SetString(PyExc_TypeError, "required field \"s\" missing from Str");
57295734
return 1;
57305735
}
5731-
if (PyObject_HasAttrString(obj, "has_b")) {
5736+
if (PyObject_HasAttrString(obj, "kind")) {
57325737
int res;
5733-
tmp = PyObject_GetAttrString(obj, "has_b");
5738+
tmp = PyObject_GetAttrString(obj, "kind");
57345739
if (tmp == NULL) goto failed;
5735-
res = obj2ast_int(tmp, &has_b, arena);
5740+
res = obj2ast_string(tmp, &kind, arena);
57365741
if (res != 0) goto failed;
57375742
Py_XDECREF(tmp);
57385743
tmp = NULL;
57395744
} else {
5740-
has_b = 0;
5745+
PyErr_SetString(PyExc_TypeError, "required field \"kind\" missing from Str");
5746+
return 1;
57415747
}
5742-
*out = Str(s, has_b, lineno, col_offset, arena);
5748+
*out = Str(s, kind, lineno, col_offset, arena);
57435749
if (*out == NULL) goto failed;
57445750
return 0;
57455751
}

ast27/Python/ast.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,10 +1498,20 @@ ast_for_atom(struct compiling *c, const node *n)
14981498
return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
14991499
}
15001500
case STRING: {
1501-
PyObject *str = parsestrplus(c, n);
1502-
const char *s = STR(CHILD(n, 0));
1501+
PyObject *kind, *str = parsestrplus(c, n);
1502+
const char *raw, *s = STR(CHILD(n, 0));
15031503
int quote = Py_CHARMASK(*s);
1504-
int has_b = 0;
1504+
/* currently Python allows up to 2 string modifiers */
1505+
char *ch, s_kind[3] = {0, 0, 0};
1506+
ch = s_kind;
1507+
raw = s;
1508+
while (*raw && *raw != '\'' && *raw != '"') {
1509+
*ch++ = *raw++;
1510+
}
1511+
kind = PyUnicode_FromString(s_kind);
1512+
if (!kind) {
1513+
return NULL;
1514+
}
15051515
if (!str) {
15061516
#ifdef Py_USING_UNICODE
15071517
if (PyErr_ExceptionMatches(PyExc_UnicodeError)){
@@ -1526,10 +1536,7 @@ ast_for_atom(struct compiling *c, const node *n)
15261536
return NULL;
15271537
}
15281538
PyArena_AddPyObject(c->c_arena, str);
1529-
if (quote == 'b' || quote == 'B') {
1530-
has_b = 1;
1531-
}
1532-
return Str(str, has_b, LINENO(n), n->n_col_offset, c->c_arena);
1539+
return Str(str, kind, LINENO(n), n->n_col_offset, c->c_arena);
15331540
}
15341541
case NUMBER: {
15351542
PyObject *pynum = parsenumber(c, STR(ch));

ast3/Include/Python-ast.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ struct _expr {
318318

319319
struct {
320320
string s;
321+
string kind;
321322
} Str;
322323

323324
struct {
@@ -601,8 +602,9 @@ expr_ty _Ta3_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int
601602
lineno, int col_offset, PyArena *arena);
602603
#define Num(a0, a1, a2, a3) _Ta3_Num(a0, a1, a2, a3)
603604
expr_ty _Ta3_Num(object n, int lineno, int col_offset, PyArena *arena);
604-
#define Str(a0, a1, a2, a3) _Ta3_Str(a0, a1, a2, a3)
605-
expr_ty _Ta3_Str(string s, int lineno, int col_offset, PyArena *arena);
605+
#define Str(a0, a1, a2, a3, a4) _Ta3_Str(a0, a1, a2, a3, a4)
606+
expr_ty _Ta3_Str(string s, string kind, int lineno, int col_offset, PyArena
607+
*arena);
606608
#define FormattedValue(a0, a1, a2, a3, a4, a5) _Ta3_FormattedValue(a0, a1, a2, a3, a4, a5)
607609
expr_ty _Ta3_FormattedValue(expr_ty value, int conversion, expr_ty format_spec,
608610
int lineno, int col_offset, PyArena *arena);

ast3/Parser/Python.asdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ module Python
7777
| Compare(expr left, cmpop* ops, expr* comparators)
7878
| Call(expr func, expr* args, keyword* keywords)
7979
| Num(object n) -- a number as a PyObject.
80-
| Str(string s) -- need to specify raw, unicode, etc?
80+
| Str(string s, string kind)
8181
| FormattedValue(expr value, int? conversion, expr? format_spec)
8282
| JoinedStr(expr* values)
8383
| Bytes(bytes s)

ast3/Python/Python-ast.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,10 @@ static char *Num_fields[]={
307307
};
308308
static PyTypeObject *Str_type;
309309
_Py_IDENTIFIER(s);
310+
_Py_IDENTIFIER(kind);
310311
static char *Str_fields[]={
311312
"s",
313+
"kind",
312314
};
313315
static PyTypeObject *FormattedValue_type;
314316
_Py_IDENTIFIER(conversion);
@@ -983,7 +985,7 @@ static int init_types(void)
983985
if (!Call_type) return 0;
984986
Num_type = make_type("Num", expr_type, Num_fields, 1);
985987
if (!Num_type) return 0;
986-
Str_type = make_type("Str", expr_type, Str_fields, 1);
988+
Str_type = make_type("Str", expr_type, Str_fields, 2);
987989
if (!Str_type) return 0;
988990
FormattedValue_type = make_type("FormattedValue", expr_type,
989991
FormattedValue_fields, 3);
@@ -2181,19 +2183,25 @@ Num(object n, int lineno, int col_offset, PyArena *arena)
21812183
}
21822184

21832185
expr_ty
2184-
Str(string s, int lineno, int col_offset, PyArena *arena)
2186+
Str(string s, string kind, int lineno, int col_offset, PyArena *arena)
21852187
{
21862188
expr_ty p;
21872189
if (!s) {
21882190
PyErr_SetString(PyExc_ValueError,
21892191
"field s is required for Str");
21902192
return NULL;
21912193
}
2194+
if (!kind) {
2195+
PyErr_SetString(PyExc_ValueError,
2196+
"field kind is required for Str");
2197+
return NULL;
2198+
}
21922199
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
21932200
if (!p)
21942201
return NULL;
21952202
p->kind = Str_kind;
21962203
p->v.Str.s = s;
2204+
p->v.Str.kind = kind;
21972205
p->lineno = lineno;
21982206
p->col_offset = col_offset;
21992207
return p;
@@ -3448,6 +3456,11 @@ ast2obj_expr(void* _o)
34483456
if (_PyObject_SetAttrId(result, &PyId_s, value) == -1)
34493457
goto failed;
34503458
Py_DECREF(value);
3459+
value = ast2obj_string(o->v.Str.kind);
3460+
if (!value) goto failed;
3461+
if (_PyObject_SetAttrId(result, &PyId_kind, value) == -1)
3462+
goto failed;
3463+
Py_DECREF(value);
34513464
break;
34523465
case FormattedValue_kind:
34533466
result = PyType_GenericNew(FormattedValue_type, NULL, NULL);
@@ -6766,6 +6779,7 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
67666779
}
67676780
if (isinstance) {
67686781
string s;
6782+
string kind;
67696783

67706784
if (_PyObject_HasAttrId(obj, &PyId_s)) {
67716785
int res;
@@ -6778,7 +6792,18 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
67786792
PyErr_SetString(PyExc_TypeError, "required field \"s\" missing from Str");
67796793
return 1;
67806794
}
6781-
*out = Str(s, lineno, col_offset, arena);
6795+
if (_PyObject_HasAttrId(obj, &PyId_kind)) {
6796+
int res;
6797+
tmp = _PyObject_GetAttrId(obj, &PyId_kind);
6798+
if (tmp == NULL) goto failed;
6799+
res = obj2ast_string(tmp, &kind, arena);
6800+
if (res != 0) goto failed;
6801+
Py_CLEAR(tmp);
6802+
} else {
6803+
PyErr_SetString(PyExc_TypeError, "required field \"kind\" missing from Str");
6804+
return 1;
6805+
}
6806+
*out = Str(s, kind, lineno, col_offset, arena);
67826807
if (*out == NULL) goto failed;
67836808
return 0;
67846809
}

ast3/Python/ast.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4971,14 +4971,25 @@ FstringParser_Dealloc(FstringParser *state)
49714971
static expr_ty
49724972
make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
49734973
{
4974-
PyObject *s = *str;
4974+
PyObject *kind, *s = *str;
4975+
const char *raw = STR(CHILD(n, 0));
4976+
/* currently Python allows up to 2 string modifiers */
4977+
char *ch, s_kind[3] = {0, 0, 0};
4978+
ch = s_kind;
4979+
while (*raw && *raw != '\'' && *raw != '"') {
4980+
*ch++ = *raw++;
4981+
}
4982+
kind = PyUnicode_FromString(s_kind);
4983+
if (!kind) {
4984+
return NULL;
4985+
}
49754986
*str = NULL;
49764987
assert(PyUnicode_CheckExact(s));
49774988
if (PyArena_AddPyObject(c->c_arena, s) < 0) {
49784989
Py_DECREF(s);
49794990
return NULL;
49804991
}
4981-
return Str(s, LINENO(n), n->n_col_offset, c->c_arena);
4992+
return Str(s, kind, LINENO(n), n->n_col_offset, c->c_arena);
49824993
}
49834994

49844995
/* Add a non-f-string (that is, a regular literal string). str is

0 commit comments

Comments
 (0)