Skip to content

Commit 79ec20a

Browse files
authored
Merge pull request #433 from Aman-Godara/develop_find
implemented low level find function for string matching
2 parents 88adb9f + d0fea86 commit 79ec20a

File tree

4 files changed

+236
-11
lines changed

4 files changed

+236
-11
lines changed

doc/specs/stdlib_strings.md

+63-5
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ and remains active until `last` index is crossed.
221221

222222
#### Syntax
223223

224-
`string = [[stdlib_strings(module):slice(interface)]] (string, first, last, stride)`
224+
`string = [[stdlib_strings(module):slice(interface)]] (string [, first, last, stride])`
225225

226226
#### Status
227227

@@ -233,13 +233,13 @@ Pure function.
233233

234234
#### Argument
235235

236-
- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]]
236+
- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]].
237237
This argument is intent(in).
238-
- `first`: integer
238+
- `first`: integer.
239239
This argument is intent(in) and optional.
240-
- `last`: integer
240+
- `last`: integer.
241241
This argument is intent(in) and optional.
242-
- `stride`: integer
242+
- `stride`: integer.
243243
This argument is intent(in) and optional.
244244

245245
#### Result value
@@ -270,3 +270,61 @@ program demo_slice
270270
271271
end program demo_slice
272272
```
273+
274+
275+
<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
276+
### `find`
277+
278+
#### Description
279+
280+
Returns the starting index of the `occurrence`th occurrence of the substring `pattern`
281+
in the input string `string`.
282+
Default value of `occurrence` is set to `1`.
283+
If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring as two different occurrences.
284+
If `occurrence`th occurrence is not found, function returns `0`.
285+
286+
287+
#### Syntax
288+
289+
`string = [[stdlib_strings(module):find(interface)]] (string, pattern [, occurrence, consider_overlapping])`
290+
291+
#### Status
292+
293+
Experimental
294+
295+
#### Class
296+
297+
Elemental function
298+
299+
#### Argument
300+
301+
- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]].
302+
This argument is intent(in).
303+
- `pattern`: Character scalar or [[stdlib_string_type(module):string_type(type)]].
304+
This argument is intent(in).
305+
- `occurrence`: integer.
306+
This argument is intent(in) and optional.
307+
- `consider_overlapping`: logical.
308+
This argument is intent(in) and optional.
309+
310+
#### Result value
311+
312+
The result is a scalar of integer type or integer array of rank equal to the highest rank among all dummy arguments.
313+
314+
#### Example
315+
316+
```fortran
317+
program demo_find
318+
use stdlib_string_type, only: string_type, assignment(=)
319+
use stdlib_strings, only : find
320+
implicit none
321+
string_type :: string
322+
323+
string = "needle in the character-stack"
324+
325+
print *, find(string, "needle") ! 1
326+
print *, find(string, ["a", "c"], [3, 2]) ! [27, 20]
327+
print *, find("qwqwqwq", "qwq", 3, [.false., .true.]) ! [0, 5]
328+
329+
end program demo_find
330+
```

src/Makefile.manual

+3-2
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ stdlib_stats_distribution_PRNG.o: \
126126
stdlib_kinds.o \
127127
stdlib_error.o
128128
stdlib_string_type.o: stdlib_ascii.o \
129-
stdlib_kinds.o
129+
stdlib_kinds.o
130130
stdlib_strings.o: stdlib_ascii.o \
131-
stdlib_string_type.o
131+
stdlib_string_type.o \
132+
stdlib_optval.o
132133
stdlib_math.o: stdlib_kinds.o

src/stdlib_strings.f90

+136-3
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@
44
!>
55
!> The specification of this module is available [here](../page/specs/stdlib_strings.html).
66
module stdlib_strings
7-
use stdlib_ascii, only : whitespace
8-
use stdlib_string_type, only : string_type, char, verify
7+
use stdlib_ascii, only: whitespace
8+
use stdlib_string_type, only: string_type, char, verify
9+
use stdlib_optval, only: optval
910
implicit none
1011
private
1112

1213
public :: strip, chomp
1314
public :: starts_with, ends_with
14-
public :: slice
15+
public :: slice, find
1516

1617

1718
!> Remove leading and trailing whitespace characters.
@@ -67,6 +68,16 @@ module stdlib_strings
6768
module procedure :: slice_char
6869
end interface slice
6970

71+
!> Finds the starting index of substring 'pattern' in the input 'string'
72+
!> [Specifications](link to the specs - to be completed)
73+
!>
74+
!> Version: experimental
75+
interface find
76+
module procedure :: find_string_string
77+
module procedure :: find_string_char
78+
module procedure :: find_char_string
79+
module procedure :: find_char_char
80+
end interface find
7081

7182
contains
7283

@@ -366,5 +377,127 @@ pure function slice_char(string, first, last, stride) result(sliced_string)
366377
end do
367378
end function slice_char
368379

380+
!> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern'
381+
!> in input 'string'
382+
!> Returns an integer
383+
elemental function find_string_string(string, pattern, occurrence, consider_overlapping) result(res)
384+
type(string_type), intent(in) :: string
385+
type(string_type), intent(in) :: pattern
386+
integer, intent(in), optional :: occurrence
387+
logical, intent(in), optional :: consider_overlapping
388+
integer :: res
389+
390+
res = find(char(string), char(pattern), occurrence, consider_overlapping)
391+
392+
end function find_string_string
393+
394+
!> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern'
395+
!> in input 'string'
396+
!> Returns an integer
397+
elemental function find_string_char(string, pattern, occurrence, consider_overlapping) result(res)
398+
type(string_type), intent(in) :: string
399+
character(len=*), intent(in) :: pattern
400+
integer, intent(in), optional :: occurrence
401+
logical, intent(in), optional :: consider_overlapping
402+
integer :: res
403+
404+
res = find(char(string), pattern, occurrence, consider_overlapping)
405+
406+
end function find_string_char
407+
408+
!> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern'
409+
!> in input 'string'
410+
!> Returns an integer
411+
elemental function find_char_string(string, pattern, occurrence, consider_overlapping) result(res)
412+
character(len=*), intent(in) :: string
413+
type(string_type), intent(in) :: pattern
414+
integer, intent(in), optional :: occurrence
415+
logical, intent(in), optional :: consider_overlapping
416+
integer :: res
417+
418+
res = find(string, char(pattern), occurrence, consider_overlapping)
419+
420+
end function find_char_string
421+
422+
!> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern'
423+
!> in input 'string'
424+
!> Returns an integer
425+
elemental function find_char_char(string, pattern, occurrence, consider_overlapping) result(res)
426+
character(len=*), intent(in) :: string
427+
character(len=*), intent(in) :: pattern
428+
integer, intent(in), optional :: occurrence
429+
logical, intent(in), optional :: consider_overlapping
430+
integer :: lps_array(len(pattern))
431+
integer :: res, s_i, p_i, length_string, length_pattern, occurrence_
432+
logical :: consider_overlapping_
433+
434+
consider_overlapping_ = optval(consider_overlapping, .true.)
435+
occurrence_ = optval(occurrence, 1)
436+
res = 0
437+
length_string = len(string)
438+
length_pattern = len(pattern)
439+
440+
if (length_pattern > 0 .and. length_pattern <= length_string &
441+
& .and. occurrence_ > 0) then
442+
lps_array = compute_lps(pattern)
443+
444+
s_i = 1
445+
p_i = 1
446+
do while(s_i <= length_string)
447+
if (string(s_i:s_i) == pattern(p_i:p_i)) then
448+
if (p_i == length_pattern) then
449+
occurrence_ = occurrence_ - 1
450+
if (occurrence_ == 0) then
451+
res = s_i - length_pattern + 1
452+
exit
453+
else if (consider_overlapping_) then
454+
p_i = lps_array(p_i)
455+
else
456+
p_i = 0
457+
end if
458+
end if
459+
s_i = s_i + 1
460+
p_i = p_i + 1
461+
else if (p_i > 1) then
462+
p_i = lps_array(p_i - 1) + 1
463+
else
464+
s_i = s_i + 1
465+
end if
466+
end do
467+
end if
468+
469+
end function find_char_char
470+
471+
!> Computes longest prefix suffix for each index of the input 'string'
472+
!>
473+
!> Returns an array of integers
474+
pure function compute_lps(string) result(lps_array)
475+
character(len=*), intent(in) :: string
476+
integer :: lps_array(len(string))
477+
integer :: i, j, length_string
478+
479+
length_string = len(string)
480+
481+
if (length_string > 0) then
482+
lps_array(1) = 0
483+
484+
i = 2
485+
j = 1
486+
do while (i <= length_string)
487+
if (string(j:j) == string(i:i)) then
488+
lps_array(i) = j
489+
i = i + 1
490+
j = j + 1
491+
else if (j > 1) then
492+
j = lps_array(j - 1) + 1
493+
else
494+
lps_array(i) = 0
495+
i = i + 1
496+
end if
497+
end do
498+
end if
499+
500+
end function compute_lps
501+
369502

370503
end module stdlib_strings

src/tests/string/test_string_functions.f90

+34-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ module test_string_functions
44
use stdlib_error, only : check
55
use stdlib_string_type, only : string_type, assignment(=), operator(==), &
66
to_lower, to_upper, to_title, to_sentence, reverse
7-
use stdlib_strings, only: slice
7+
use stdlib_strings, only: slice, find
88
use stdlib_optval, only: optval
99
use stdlib_ascii, only : to_string
1010
implicit none
@@ -162,6 +162,38 @@ subroutine test_slice_string
162162

163163
end subroutine test_slice_string
164164

165+
subroutine test_find
166+
type(string_type) :: test_string_1, test_string_2, test_pattern_1, test_pattern_2
167+
test_string_1 = "qwqwqwqwqwqwqw"
168+
test_string_2 = "abccbabccbabc"
169+
test_pattern_1 = "qwq"
170+
test_pattern_2 = "abccbabc"
171+
172+
call check(all(find([test_string_1, test_string_2], test_pattern_1, 4) == [7, 0]), &
173+
& 'Find: [test_string_1, test_string_2], test_pattern_1, 4')
174+
call check(all(find(test_string_1, [test_pattern_1, test_pattern_2], 3, .false.) == [9, 0]), &
175+
& 'Find: test_string_1, [test_pattern_1, test_pattern_2], 3, .false.')
176+
call check(find(test_string_1, test_pattern_1, 7) == 0, &
177+
& 'Find: test_string_1, test_pattern_1, 7')
178+
call check(all(find([test_string_1, test_string_2, test_string_2], [test_pattern_1, &
179+
& test_pattern_2, test_pattern_2], [7, 2, 2], [.true., .false., .true.]) == [0, 0, 6]), &
180+
& 'Find: [test_string_1, test_string_2, test_string_2], [test_pattern_1, &
181+
& test_pattern_2, test_pattern_2], [7, 2, 2], [.true., .false., .true.]')
182+
call check(find("qwqwqwqwqwqwqw", test_pattern_1) == 1, &
183+
& 'Find: "qwqwqwqwqwqwqw", test_pattern_1')
184+
call check(all(find(test_string_1, ["qwq", "wqw"], 2) == [3, 4]), &
185+
& 'Find: test_string_1, ["qwq", "wqw"], 2')
186+
call check(find("qwqwqwqwqwqwqw", "qwq", 2, .false.) == 5, &
187+
& 'Find: "qwqwqwqwqwqwqw", "qwq", 2, .false.')
188+
call check(find("", "") == 0, &
189+
& 'Find: "", ""')
190+
call check(find("", test_pattern_1) == 0, &
191+
& 'Find: "", test_pattern_1')
192+
call check(find(test_string_1, "") == 0, &
193+
& 'Find: test_string_1, ""')
194+
195+
end subroutine test_find
196+
165197
subroutine test_slice_gen
166198
character(len=*), parameter :: test = &
167199
& "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
@@ -300,5 +332,6 @@ program tester
300332
call test_reverse_string
301333
call test_slice_string
302334
call test_slice_gen
335+
call test_find
303336

304337
end program tester

0 commit comments

Comments
 (0)