Skip to content

Strings methods: find() #1012

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions integration_tests/test_str_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@ def strip():
assert " AASAsaSas " .lstrip() == "AASAsaSas "
assert " AASAsaSas " .strip() == "AASAsaSas"

def find():
s: str
sub: str
s = "AaaaAABBbbbbBB!@12223BN"
sub = "@"
assert s.find(sub) == 15
assert s.find('B') == 6
assert "empty strings" .find("string") == 6
s2: str
s2 = "Well copying a string from a website makes us prone to copyright claims. Can you just write something of your own? Like just take this review comment and put it as a string?"
assert s2.find("of") == 102
assert s2.find("own") == 110
assert s2.find("this") == 130
assert s2.find("") == 0
assert "".find("dd") == -1
assert "".find("") == 0
s2 = ""
assert s2.find("") == 0
assert s2.find("we") == -1

def startswith():
s: str
s = " empty"
Expand All @@ -45,4 +65,5 @@ def startswith():
capitalize()
lower()
strip()
find()
startswith()
98 changes: 98 additions & 0 deletions src/lpython/semantics/python_ast_to_asr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4512,6 +4512,30 @@ class BodyVisitor : public CommonVisitor<BodyVisitor> {
args.push_back(al, arg);
tmp = make_call_helper(al, fn_div, current_scope, args, "_lpython_str_lower", x.base.base.loc);
return;
} else if (std::string(at->m_attr) == std::string("find")) {
if(args.size() != 1) {
throw SemanticError("str.find() takes one argument",
x.base.base.loc);
}
ASR::expr_t *arg_sub = args[0].m_value;
ASR::ttype_t *arg_sub_type = ASRUtils::expr_type(arg_sub);
if(!ASRUtils::is_character(*arg_sub_type)) {
throw SemanticError("str.find() takes one argument of type: str",
x.base.base.loc);
}
ASR::symbol_t *fn_div = resolve_intrinsic_function(x.base.base.loc, "_lpython_str_find");
Vec<ASR::call_arg_t> function_args;
function_args.reserve(al, 1);
ASR::call_arg_t str;
str.loc = x.base.base.loc;
str.m_value = se;
ASR::call_arg_t sub;
sub.loc = x.base.base.loc;
sub.m_value = args[0].m_value;
function_args.push_back(al, str);
function_args.push_back(al, sub);
tmp = make_call_helper(al, fn_div, current_scope, function_args, "_lpython_str_find", x.base.base.loc);
return;
} else if (std::string(at->m_attr) == std::string("rstrip")) {
if(args.size() != 0) {
throw SemanticError("str.srtrip() takes no arguments",
Expand Down Expand Up @@ -4642,6 +4666,80 @@ class BodyVisitor : public CommonVisitor<BodyVisitor> {
1, 1, nullptr, nullptr , 0));
tmp = ASR::make_StringConstant_t(al, x.base.base.loc, s2c(al, res), str_type);
return;
} else if (std::string(at->m_attr) == std::string("find")) {
if (args.size() != 1) {
throw SemanticError("str.find() takes one arguments",
x.base.base.loc);
}
ASR::expr_t *arg = args[0].m_value;
ASR::ttype_t *type = ASRUtils::expr_type(arg);
if (ASRUtils::is_character(*type)) {
AST::ConstantStr_t* str_str_con = AST::down_cast<AST::ConstantStr_t>(at->m_value);
std::string str = str_str_con->m_value;
if (ASRUtils::expr_value(arg) != nullptr) {
ASR::StringConstant_t* sub_str_con = ASR::down_cast<ASR::StringConstant_t>(arg);
std::string sub = sub_str_con->m_s;
//KMP matching
int str_len = str.size();
int sub_len = sub.size();
bool flag = 0;
int res = -1;
std::vector<int>lps(sub_len, 0);
if (str_len == 0 || sub_len == 0) {
res = (!sub_len || (sub_len == str_len))? 0: -1;
} else {
for(int i = 1, len = 0; i < sub_len;) {
if (sub[i] == sub[len]) {
lps[i++] = ++len;
} else {
if (len != 0) {
len = lps[len - 1];
} else {
lps[i++] = 0;
}
}
}
for (int i = 0, j = 0; (str_len - i) >= (sub_len - j) && !flag;) {
if (sub[j] == str[i]) {
j++, i++;
}
if (j == sub_len) {
res = i - j;
flag = 1;
j = lps[j - 1];
} else if (i < str_len && sub[j] != str[i]) {
if (j != 0) {
j = lps[j - 1];
} else {
i = i + 1;
}
}
}
}
tmp = ASR::make_IntegerConstant_t(al, x.base.base.loc, res, ASRUtils::TYPE(ASR::make_Integer_t(al, x.base.base.loc,
4, nullptr, 0)));
} else {
ASR::symbol_t *fn_div = resolve_intrinsic_function(x.base.base.loc, "_lpython_str_find");
Vec<ASR::call_arg_t> args;
args.reserve(al, 1);
ASR::call_arg_t str_arg;
str_arg.loc = x.base.base.loc;
ASR::ttype_t *str_type = ASRUtils::TYPE(ASR::make_Character_t(al, x.base.base.loc,
1, 0, nullptr, nullptr, 0));
str_arg.m_value = ASRUtils::EXPR(
ASR::make_StringConstant_t(al, x.base.base.loc, s2c(al, str), str_type));
ASR::call_arg_t sub_arg;
sub_arg.loc = x.base.base.loc;
sub_arg.m_value = arg;
args.push_back(al, str_arg);
args.push_back(al, sub_arg);
tmp = make_call_helper(al, fn_div, current_scope, args, "_lpython_str_find", x.base.base.loc);
}
} else {
throw SemanticError("str.find() takes one arguments of type: str",
arg->base.loc);
}
return;
} else if (std::string(at->m_attr) == std::string("rstrip")) {
if(args.size() != 0) {
throw SemanticError("str.rstrip() takes no arguments",
Expand Down
9 changes: 8 additions & 1 deletion src/lpython/semantics/python_comptime_eval.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct PythonIntrinsicProcedures {
{"min" , {m_builtin , &eval_min}},
{"_lpython_str_capitalize", {m_builtin, &eval__lpython_str_capitalize}},
{"_lpython_str_lower", {m_builtin, &eval__lpython_str_lower}},
{"_lpython_str_find", {m_builtin, &eval_lpython_str_find}},
{"_lpython_str_rstrip", {m_builtin, &eval__lpython_str_rstrip}},
{"_lpython_str_lstrip", {m_builtin, &eval__lpython_str_lstrip}},
{"_lpython_str_strip", {m_builtin, &eval__lpython_str_strip}},
Expand Down Expand Up @@ -741,7 +742,13 @@ struct PythonIntrinsicProcedures {
ASR::ttype_t *res_type = ASRUtils::TYPE(ASR::make_StringConstant_t(al, loc, s2c(al, ""), type));
return ASR::down_cast<ASR::expr_t>(ASR::make_StringConstant_t(al, loc, s2c(al, res), res_type));
}


static ASR::expr_t *eval_lpython_str_find(Allocator &al, const Location &loc, Vec<ASR::expr_t *> &/*args*/) {
// compile time action implemented on ast->asr
ASR::ttype_t *int_type = ASRUtils::TYPE(ASR::make_Integer_t(al, loc, 4, nullptr, 0));
return ASR::down_cast<ASR::expr_t>(ASR::make_IntegerConstant_t(al, loc, -1, int_type));
}

static ASR::expr_t *eval__lpython_str_startswith(Allocator &al, const Location &loc, Vec<ASR::expr_t *> &/*args*/) {
// compile time action implemented on ast->asr
ASR::ttype_t* res_type = ASRUtils::TYPE(ASR::make_Logical_t(al, loc,
Expand Down
47 changes: 47 additions & 0 deletions src/runtime/lpython_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,53 @@ def _lpython_str_lower(x: str) -> str:
return res

@overload
def _lpython_str_find(s: str, sub: str) -> i32:
s_len :i32; sub_len :i32; flag: bool; _len: i32;
res: i32; i: i32;
lps: list[i32]
s_len = len(s)
sub_len = len(sub)
flag = False
res = -1
if s_len == 0 or sub_len == 0:
return 0 if sub_len == 0 or (sub_len == s_len) else -1

for i in range(sub_len):
lps.append(0)

i = 1
_len = 0
while i < sub_len:
if sub[i] == sub[_len]:
_len += 1
lps[i] = _len
i += 1
else:
if _len != 0:
_len = lps[_len - 1]
else:
lps[i] = 0
i += 1

j: i32
j = 0
i = 0
while (s_len - i) >= (sub_len - j) and not flag:
if sub[j] == s[i]:
i += 1
j += 1
if j == sub_len:
res = i- j
flag = True
j = lps[j - 1]
elif i < s_len and sub[j] != s[i]:
if j != 0:
j = lps[j - 1]
else:
i = i + 1

return res

def _lpython_str_rstrip(x: str) -> str:
ind: i32
ind = len(x) - 1
Expand Down
2 changes: 1 addition & 1 deletion tests/reference/asr-complex1-f26c460.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"outfile": null,
"outfile_hash": null,
"stdout": "asr-complex1-f26c460.stdout",
"stdout_hash": "533e7e69cb0732f89d58e6ee1d45d246cda1466bad59e039353ec7d3",
"stdout_hash": "ec30511e10451c64a896595e1470c429df7ec2bb31dd60b299a15d2b",
"stderr": null,
"stderr_hash": null,
"returncode": 0
Expand Down
Loading