Skip to content

Commit c2ae7ac

Browse files
authored
Merge pull request #2445 from AkshayWarrier/akshaywarrier/implement-str-funcs
Implement ``str.split()``
2 parents 0a79bf5 + e54fb58 commit c2ae7ac

File tree

62 files changed

+1315
-1230
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1315
-1230
lines changed

integration_tests/test_str_01.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,28 @@ def test_constant_str_subscript():
123123
assert "abc"[2] == "c"
124124
assert "abc"[:2] == "ab"
125125

126+
def test_str_split():
127+
a: str = "1,2,3"
128+
b: str = "1,2,,3,"
129+
c: str = "1and2and3"
130+
d: str = "1 2 3"
131+
e: str = " 1 2 3 "
132+
f: str = "123"
133+
res: list[str] = a.split(",")
134+
res1: list[str] = b.split(",")
135+
res2: list[str] = c.split("and")
136+
res3: list[str] = d.split()
137+
res4: list[str] = e.split()
138+
res5: list[str] = f.split(" ")
139+
# res6: list[str] = "".split(" ")
140+
assert res == ["1", "2", "3"]
141+
assert res1 == ["1", "2", "", "3", ""]
142+
assert res2 == ["1", "2", "3"]
143+
assert res3 == ["1", "2", "3"]
144+
assert res4 == ["1", "2", "3"]
145+
assert res5 == ["123"]
146+
# assert res6 == [""]
147+
126148
def check():
127149
f()
128150
test_str_concat()
@@ -137,5 +159,6 @@ def check():
137159
test_str_title()
138160
test_str_istitle()
139161
test_str_isalpha()
162+
test_str_split()
140163

141164
check()

src/lpython/semantics/python_ast_to_asr.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6761,6 +6761,29 @@ class BodyVisitor : public CommonVisitor<BodyVisitor> {
67616761
// Push string and substring argument on top of Vector (or Function Arguments Stack basically)
67626762
fn_args.push_back(al, str);
67636763
fn_args.push_back(al, value);
6764+
} else if(attr_name == "split") {
6765+
if(args.size() > 1) {
6766+
throw SemanticError("str.split() takes at most one argument for now.", loc);
6767+
}
6768+
fn_call_name = "_lpython_str_split";
6769+
ASR::call_arg_t str;
6770+
str.loc = loc;
6771+
str.m_value = s_var;
6772+
6773+
if (args.size() == 1) {
6774+
ASR::expr_t *arg_value = args[0].m_value;
6775+
ASR::ttype_t *arg_value_type = ASRUtils::expr_type(arg_value);
6776+
if (!ASRUtils::is_character(*arg_value_type)) {
6777+
throw SemanticError("str.split() takes one argument of type: str", loc);
6778+
}
6779+
ASR::call_arg_t value;
6780+
value.loc = loc;
6781+
value.m_value = args[0].m_value;
6782+
fn_args.push_back(al, str);
6783+
fn_args.push_back(al, value);
6784+
} else {
6785+
fn_args.push_back(al, str);
6786+
}
67646787
} else if(attr_name.size() > 2 && attr_name[0] == 'i' && attr_name[1] == 's') {
67656788
/*
67666789
String Validation Methods i.e all "is" based functions are handled here

src/lpython/semantics/python_comptime_eval.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ struct PythonIntrinsicProcedures {
8888
{"_lpython_str_rstrip", {m_builtin, &not_implemented}},
8989
{"_lpython_str_lstrip", {m_builtin, &not_implemented}},
9090
{"_lpython_str_strip", {m_builtin, &not_implemented}},
91+
{"_lpython_str_split", {m_builtin, &not_implemented}},
9192
{"_lpython_str_swapcase", {m_builtin, &not_implemented}},
9293
{"_lpython_str_startswith", {m_builtin, &not_implemented}},
9394
{"_lpython_str_endswith", {m_builtin, &not_implemented}},

src/runtime/lpython_builtin.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,44 @@ def _lpython_str_strip(x: str) -> str:
821821
res = _lpython_str_rstrip(res)
822822
return res
823823

824+
@overload
825+
def _lpython_str_split(x: str) -> list[str]:
826+
sep: str = ' '
827+
res: list[str] = []
828+
start:i32 = 0
829+
ind: i32
830+
x_strip: str = _lpython_str_strip(x)
831+
if (x_strip == ""):
832+
return res
833+
while True:
834+
while (start < len(x_strip) and x_strip[start] == ' '):
835+
start += 1
836+
ind = _lpython_str_find(x_strip[start:len(x_strip)], sep)
837+
if ind == -1:
838+
res.append(x_strip[start:len(x_strip)])
839+
break
840+
else:
841+
res.append(x_strip[start:start + ind])
842+
start += ind + len(sep)
843+
return res
844+
845+
@overload
846+
def _lpython_str_split(x: str, sep:str) -> list[str]:
847+
if len(sep) == 0:
848+
raise ValueError('empty separator')
849+
res: list[str] = []
850+
start:i32 = 0
851+
ind: i32
852+
while True:
853+
ind = _lpython_str_find(x[start:len(x)], sep)
854+
if ind == -1:
855+
res.append(x[start:len(x)])
856+
break
857+
else:
858+
res.append(x[start:start + ind])
859+
start += ind + len(sep)
860+
return res
861+
824862
@overload
825863
def _lpython_str_swapcase(s: str) -> str:
826864
res :str = ""
@@ -870,7 +908,7 @@ def _lpython_str_partition(s:str, sep: str) -> tuple[str, str, str]:
870908
if len(s) == 0:
871909
raise ValueError('empty string cannot be partitioned')
872910
if len(sep) == 0:
873-
raise ValueError('empty seperator')
911+
raise ValueError('empty separator')
874912
res : tuple[str, str, str]
875913
ind : i32
876914
ind = _lpython_str_find(s, sep)

tests/reference/asr-array_01_decl-39cf894.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"outfile": null,
77
"outfile_hash": null,
88
"stdout": "asr-array_01_decl-39cf894.stdout",
9-
"stdout_hash": "489d2e6a364cc6020f2942b94738849349928901f1269b975a6e2464",
9+
"stdout_hash": "5d4751789e2ddcd882c4d6026f801ba32cfc227fafff7395a788bdd9",
1010
"stderr": null,
1111
"stderr_hash": null,
1212
"returncode": 0

0 commit comments

Comments
 (0)