Skip to content

Commit 9c7e557

Browse files
Merge pull request #988 from Thirumalai-Shaktivel/string
2 parents cd89bef + 720023d commit 9c7e557

File tree

5 files changed

+103
-7
lines changed

5 files changed

+103
-7
lines changed

src/lpython/parser/parser.yy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,7 @@ subscript
900900

901901
string
902902
: string TK_STRING { $$ = STRING2($1, $2, @$); } // TODO
903+
| string id TK_STRING { $$ = STRING4($1, STRING3($2, $3, @$), @$); }
903904
| TK_STRING { $$ = STRING1($1, @$); }
904905
| id TK_STRING { $$ = STRING3($1, $2, @$); }
905906
;

src/lpython/parser/semantics.h

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include <lpython/python_ast.h>
1616
#include <libasr/string_utils.h>
17+
#include <lpython/parser/parser_exception.h>
1718

1819
// This is only used in parser.tab.cc, nowhere else, so we simply include
1920
// everything from LFortran::AST to save typing:
@@ -664,9 +665,78 @@ static inline ast_t* BOOLOP_01(Allocator &al, Location &loc,
664665
#define COMPARE(x, op, y, l) make_Compare_t(p.m_a, l, \
665666
EXPR(x), cmpopType::op, EXPRS(A2LIST(p.m_a, y)), 1)
666667

667-
char* concat_string(Allocator &al, ast_t *a, char *b) {
668-
char *s = down_cast2<ConstantStr_t>(a)->m_value;
669-
return LFortran::s2c(al, std::string(s) + std::string(b));
668+
static inline ast_t* concat_string(Allocator &al, Location &l,
669+
expr_t *string, std::string str, expr_t *string_literal) {
670+
std::string str1 = "";
671+
ast_t* tmp = nullptr;
672+
Vec<expr_t *> exprs;
673+
exprs.reserve(al, 4);
674+
675+
// TODO: Merge two concurrent ConstantStr's into one in the JoinedStr
676+
if (string_literal) {
677+
if (is_a<ConstantStr_t>(*string)
678+
&& is_a<ConstantStr_t>(*string_literal)) {
679+
str1 = std::string(down_cast<ConstantStr_t>(string)->m_value);
680+
str = std::string(down_cast<ConstantStr_t>(string_literal)->m_value);
681+
str1 = str1 + str;
682+
tmp = make_ConstantStr_t(al, l, LFortran::s2c(al, str1), nullptr);
683+
} else if (is_a<JoinedStr_t>(*string)
684+
&& is_a<JoinedStr_t>(*string_literal)) {
685+
JoinedStr_t *t = down_cast<JoinedStr_t>(string);
686+
for (size_t i = 0; i < t->n_values; i++) {
687+
exprs.push_back(al, t->m_values[i]);
688+
}
689+
t = down_cast<JoinedStr_t>(string_literal);
690+
for (size_t i = 0; i < t->n_values; i++) {
691+
exprs.push_back(al, t->m_values[i]);
692+
}
693+
tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size());
694+
} else if (is_a<JoinedStr_t>(*string)
695+
&& is_a<ConstantStr_t>(*string_literal)) {
696+
JoinedStr_t *t = down_cast<JoinedStr_t>(string);
697+
for (size_t i = 0; i < t->n_values; i++) {
698+
exprs.push_back(al, t->m_values[i]);
699+
}
700+
exprs.push_back(al, string_literal);
701+
tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size());
702+
} else if (is_a<ConstantStr_t>(*string)
703+
&& is_a<JoinedStr_t>(*string_literal)) {
704+
exprs.push_back(al, string);
705+
JoinedStr_t *t = down_cast<JoinedStr_t>(string_literal);
706+
for (size_t i = 0; i < t->n_values; i++) {
707+
exprs.push_back(al, t->m_values[i]);
708+
}
709+
tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size());
710+
} else if (is_a<ConstantBytes_t>(*string)
711+
&& is_a<ConstantBytes_t>(*string_literal)) {
712+
str1 = std::string(down_cast<ConstantBytes_t>(string)->m_value);
713+
str1 = str1.substr(0, str1.size() - 1);
714+
str = std::string(down_cast<ConstantBytes_t>(string_literal)->m_value);
715+
str = str.substr(2, str.size());
716+
str1 = str1 + str;
717+
tmp = make_ConstantBytes_t(al, l, LFortran::s2c(al, str1), nullptr);
718+
} else {
719+
throw LFortran::parser_local::ParserError(
720+
"The byte and non-byte literals can not be combined", l);
721+
}
722+
} else {
723+
if (is_a<ConstantStr_t>(*string)) {
724+
str1 = std::string(down_cast<ConstantStr_t>(string)->m_value);
725+
str1 = str1 + str;
726+
tmp = make_ConstantStr_t(al, l, LFortran::s2c(al, str1), nullptr);
727+
} else if (is_a<JoinedStr_t>(*string)) {
728+
JoinedStr_t *t = down_cast<JoinedStr_t>(string);
729+
for (size_t i = 0; i < t->n_values; i++) {
730+
exprs.push_back(al, t->m_values[i]);
731+
}
732+
exprs.push_back(al, (expr_t *)make_ConstantStr_t(al, l,
733+
LFortran::s2c(al, str), nullptr));
734+
tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size());
735+
} else {
736+
LFORTRAN_ASSERT(false);
737+
}
738+
}
739+
return tmp;
670740
}
671741

672742
char* unescape(Allocator &al, LFortran::Str &s) {
@@ -687,8 +757,9 @@ char* unescape(Allocator &al, LFortran::Str &s) {
687757
// `x.int_n` is of type BigInt but we store the int64_t directly in AST
688758
#define INTEGER(x, l) make_ConstantInt_t(p.m_a, l, x, nullptr)
689759
#define STRING1(x, l) make_ConstantStr_t(p.m_a, l, unescape(p.m_a, x), nullptr)
690-
#define STRING2(x, y, l) make_ConstantStr_t(p.m_a, l, concat_string(p.m_a, x, y.c_str(p.m_a)), nullptr)
760+
#define STRING2(x, y, l) concat_string(p.m_a, l, EXPR(x), y.str(), nullptr)
691761
#define STRING3(id, x, l) PREFIX_STRING(p.m_a, l, name2char(id), x.c_str(p.m_a))
762+
#define STRING4(x, s, l) concat_string(p.m_a, l, EXPR(x), "", EXPR(s))
692763
#define FLOAT(x, l) make_ConstantFloat_t(p.m_a, l, x, nullptr)
693764
#define COMPLEX(x, l) make_ConstantComplex_t(p.m_a, l, 0, x, nullptr)
694765
#define BOOL(x, l) make_ConstantBool_t(p.m_a, l, x, nullptr)

tests/parser/string1.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,27 @@
4747
RB"Text"
4848

4949
rf'\N{AMPERSAND}'
50+
51+
(f"Text{a}, {b}"
52+
f"Text {a}")
53+
54+
(f"Text{a}, {b}"
55+
"Text")
56+
57+
("Text"
58+
f"{b}, Text")
59+
60+
(f"Text {a}"
61+
r"Text")
62+
63+
(r"Text"
64+
r"Text")
65+
66+
(r"Text"
67+
"Text")
68+
69+
(r"Text"
70+
f"{a} Text")
71+
72+
(b"Text"
73+
b"Text")

tests/reference/ast_new-string1-96b90b3.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
"basename": "ast_new-string1-96b90b3",
33
"cmd": "lpython --show-ast --new-parser --no-color {infile} -o {outfile}",
44
"infile": "tests/parser/string1.py",
5-
"infile_hash": "e4090ab45efb09242e14f3dd494ca869a32e71de3617306d959d8721",
5+
"infile_hash": "fd64c289d2ab5638fcc3093bda8f07fdde0e034798d4c04791fd441b",
66
"outfile": null,
77
"outfile_hash": null,
88
"stdout": "ast_new-string1-96b90b3.stdout",
9-
"stdout_hash": "1666ca8ebba593a8bf35e7906efabf5726626b7c82b130324bd78a43",
9+
"stdout_hash": "e23911e24aada1bf320ffde6cca77cc2a19700d09a2e9b01bbd37afa",
1010
"stderr": null,
1111
"stderr_hash": null,
1212
"returncode": 0

tests/reference/ast_new-string1-96b90b3.stdout

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ anotherline
55
Text
66
" ())) (Expr (ConstantStr "Text" ())) (Expr (ConstantStr "a\tb\nA\tB" ())) (Expr (ConstantStr "1,2,3 # comment" "u")) (Expr (ConstantStr "Text" "u")) (Expr (ConstantStr "Text" ())) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name id Load) -1 ())])) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name id Load) -1 ())])) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name id Load) -1 ())])) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name id Load) -1 ())])) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name id Load) -1 ())])) (Expr (JoinedStr [(ConstantStr "
77
Text " ()) (FormattedValue (Name id Load) -1 ()) (ConstantStr "
8-
" ())])) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name id Load) -1 ())])) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name id Load) -1 ())])) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'\nText\n'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'\nText\n'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (JoinedStr [(ConstantStr "\N" ()) (FormattedValue (Name AMPERSAND Load) -1 ())]))] [])
8+
" ())])) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name id Load) -1 ())])) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name id Load) -1 ())])) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'\nText\n'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'\nText\n'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (ConstantBytes "b'Text'" ())) (Expr (JoinedStr [(ConstantStr "\N" ()) (FormattedValue (Name AMPERSAND Load) -1 ())])) (Expr (JoinedStr [(ConstantStr "Text" ()) (FormattedValue (Name a Load) -1 ()) (ConstantStr ", " ()) (FormattedValue (Name b Load) -1 ()) (ConstantStr "Text " ()) (FormattedValue (Name a Load) -1 ())])) (Expr (JoinedStr [(ConstantStr "Text" ()) (FormattedValue (Name a Load) -1 ()) (ConstantStr ", " ()) (FormattedValue (Name b Load) -1 ()) (ConstantStr "Text" ())])) (Expr (JoinedStr [(ConstantStr "Text" ()) (FormattedValue (Name b Load) -1 ()) (ConstantStr ", Text" ())])) (Expr (JoinedStr [(ConstantStr "Text " ()) (FormattedValue (Name a Load) -1 ()) (ConstantStr "Text" ())])) (Expr (ConstantStr "TextText" ())) (Expr (ConstantStr "TextText" ())) (Expr (JoinedStr [(ConstantStr "Text" ()) (FormattedValue (Name a Load) -1 ()) (ConstantStr " Text" ())])) (Expr (ConstantBytes "b'TextText'" ()))] [])

0 commit comments

Comments
 (0)