Skip to content

Commit b833711

Browse files
Merge pull request #352 from jacobwilliams/351-string-speedup
Some efficiency improvements when parsing strings.
2 parents 4baff3b + 15da7e9 commit b833711

File tree

9 files changed

+190
-137
lines changed

9 files changed

+190
-137
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ visual_studio/jsonfortranlib/
2727
visual_studio/jsonfortrantest/Debug/
2828
visual_studio/jsonfortrantest/Release/
2929
visual_studio/jsonfortrantest/x64
30-
30+
visual_studio/config
31+
My Advisor Results*
3132
My Amplifier*
3233
My Inspector*
3334
x64/

files/inputs/invalid.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@
1010
"..\\path\\to\\files\\file1.txt",
1111
"..\\path\\to\\files\\file2.txt",
1212
"..\\path\\to\\files\\file3.txt"
13-
}
13+
}

files/inputs/invalid3.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"a": "blah\",
3+
"b": 2
4+
}

files/inputs/test1.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
"..\\path\\to\\files\\file3.txt",
1313
"test \u2FA4 \uABCD \uABCD\uABCDtest",
1414
" test \\u \" blah\\\" test test",
15-
"..\\path\\to\\files\\"
15+
"..\\path\\to\\files\\",
16+
"\\"
1617
],
1718
"a": {
1819
"b": 1

src/json_file_module.F90

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ module json_file_module
6464

6565
private
6666

67-
type(json_core) :: core !! The instance of the [[json_core(type)]] factory used for this file.
67+
type(json_core) :: core !! The instance of the [[json_core(type)]]
68+
!! factory used for this file.
6869
type(json_value),pointer :: p => null() !! the JSON structure read from the file
6970

7071
contains
@@ -319,10 +320,17 @@ subroutine json_file_check_for_errors(me,status_ok,error_msg)
319320
implicit none
320321

321322
class(json_file),intent(inout) :: me
322-
logical(LK),intent(out) :: status_ok !! true if there were no errors
323-
character(kind=CK,len=:),allocatable,intent(out) :: error_msg !! the error message (if there were errors)
324-
323+
logical(LK),intent(out),optional :: status_ok !! true if there were no errors
324+
character(kind=CK,len=:),allocatable,intent(out),optional :: error_msg !! the error message
325+
!! (if there were errors)
326+
327+
#if defined __GFORTRAN__
328+
character(kind=CK,len=:),allocatable :: tmp !! workaround for gfortran bugs
329+
call me%core%check_for_errors(status_ok,tmp)
330+
error_msg = tmp
331+
#else
325332
call me%core%check_for_errors(status_ok,error_msg)
333+
#endif
326334

327335
end subroutine json_file_check_for_errors
328336
!*****************************************************************************************

src/json_string_utilities.F90

Lines changed: 77 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -433,68 +433,74 @@ end subroutine escape_string
433433
! * `\t` - horizontal tab
434434
! * `\uXXXX` - 4 hexadecimal digits
435435

436-
subroutine unescape_string(str_in, str_out, error_message)
436+
subroutine unescape_string(str, error_message)
437437

438438
implicit none
439439

440-
character(kind=CK,len=*),intent(in) :: str_in !! string as stored in a [[json_value]]
441-
character(kind=CK,len=:),allocatable,intent(out) :: str_out !! decoded string
442-
character(kind=CK,len=:),allocatable,intent(out) :: error_message !! will be allocated if there was an error
440+
character(kind=CK,len=:),allocatable,intent(inout) :: str !! in: string as stored
441+
!! in a [[json_value]].
442+
!! out: decoded string.
443+
character(kind=CK,len=:),allocatable,intent(out) :: error_message !! will be allocated if
444+
!! there was an error
443445

444446
integer :: i !! counter
445-
integer :: n !! length of str_in
446-
integer :: m !! length of str_out
447+
integer :: n !! length of `str`
448+
integer :: m !! length of `str_tmp`
447449
character(kind=CK,len=1) :: c !! for scanning each character in string
450+
character(kind=CK,len=:),allocatable :: str_tmp !! temp decoded string (if the input
451+
!! string contains an escape character
452+
!! and needs to be decoded).
448453

449-
#if defined __GFORTRAN__
450-
character(kind=CK,len=:),allocatable :: tmp !! for GFortran bug workaround
451-
#endif
452-
453-
if (scan(str_in,backslash)>0) then
454+
if (scan(str,backslash)>0) then
454455

455456
!there is at least one escape character, so process this string:
456457

457-
n = len(str_in)
458-
str_out = repeat(space,n) !size the output string (will be trimmed later)
459-
m = 0 !counter in str_out
460-
i = 0 !counter in str_in
458+
n = len(str)
459+
str_tmp = repeat(space,n) !size the output string (will be trimmed later)
460+
m = 0 !counter in str_tmp
461+
i = 0 !counter in str
461462

462463
do
463464

464465
i = i + 1
465466
if (i>n) exit ! finished
466-
c = str_in(i:i) ! get next character in the string
467+
c = str(i:i) ! get next character in the string
467468

468469
if (c == backslash) then
469470

470471
if (i<n) then
471472

472473
i = i + 1
473-
c = str_in(i:i) !character after the escape
474-
475-
if (any(c == [quotation_mark,backslash,slash, &
476-
to_unicode(['b','f','n','r','t'])])) then
477-
478-
select case(c)
479-
case (quotation_mark,backslash,slash)
480-
!use d as is
481-
case (CK_'b')
482-
c = bspace
483-
case (CK_'f')
484-
c = formfeed
485-
case (CK_'n')
486-
c = newline
487-
case (CK_'r')
488-
c = carriage_return
489-
case (CK_'t')
490-
c = horizontal_tab
491-
end select
474+
c = str(i:i) !character after the escape
492475

476+
select case(c)
477+
case (quotation_mark,backslash,slash)
478+
!use d as is
479+
m = m + 1
480+
str_tmp(m:m) = c
481+
case (CK_'b')
482+
c = bspace
483+
m = m + 1
484+
str_tmp(m:m) = c
485+
case (CK_'f')
486+
c = formfeed
493487
m = m + 1
494-
str_out(m:m) = c
488+
str_tmp(m:m) = c
489+
case (CK_'n')
490+
c = newline
491+
m = m + 1
492+
str_tmp(m:m) = c
493+
case (CK_'r')
494+
c = carriage_return
495+
m = m + 1
496+
str_tmp(m:m) = c
497+
case (CK_'t')
498+
c = horizontal_tab
499+
m = m + 1
500+
str_tmp(m:m) = c
495501

496-
else if (c == 'u') then !expecting 4 hexadecimal digits after
497-
!the escape character [\uXXXX]
502+
case (CK_'u') ! expecting 4 hexadecimal digits after
503+
! the escape character [\uXXXX]
498504

499505
!for now, we are just returning them as is
500506
![not checking to see if it is a valid hex value]
@@ -504,54 +510,59 @@ subroutine unescape_string(str_in, str_out, error_message)
504510
! \uXXXX
505511

506512
if (i+4<=n) then
507-
m = m + 1
508-
str_out(m:m+5) = str_in(i-1:i+4)
509-
i = i + 4
510-
m = m + 5
513+
514+
! validate the hex string:
515+
if (valid_json_hex(str(i+1:i+4))) then
516+
m = m + 1
517+
str_tmp(m:m+5) = str(i-1:i+4)
518+
i = i + 4
519+
m = m + 5
520+
else
521+
error_message = 'Error in unescape_string:'//&
522+
' Invalid hexadecimal sequence in string "'//&
523+
trim(str)//'" ['//str(i-1:i+4)//']'
524+
if (allocated(str_tmp)) deallocate(str_tmp)
525+
return
526+
end if
511527
else
512528
error_message = 'Error in unescape_string:'//&
513-
' Invalid hexadecimal sequence'//&
514-
' in string: '//str_in(i-1:)
515-
if (allocated(str_out)) deallocate(str_out)
529+
' Invalid hexadecimal sequence in string "'//&
530+
trim(str)//'" ['//str(i-1:)//']'
531+
if (allocated(str_tmp)) deallocate(str_tmp)
516532
return
517533
end if
518534

519-
else
535+
case default
536+
520537
!unknown escape character
521538
error_message = 'Error in unescape_string:'//&
522-
' unknown escape sequence in string "'//&
523-
trim(str_in)//'" ['//backslash//c//']'
524-
if (allocated(str_out)) deallocate(str_out)
539+
' unknown escape sequence in string "'//&
540+
trim(str)//'" ['//backslash//c//']'
541+
if (allocated(str_tmp)) deallocate(str_tmp)
525542
return
526-
end if
543+
544+
end select
527545

528546
else
529-
!an escape character is the last character in
530-
! the string [this may not be valid syntax,
531-
! but just keep it]
532-
m = m + 1
533-
str_out(m:m) = c
547+
! an escape character is the last character in
548+
! the string. This is an error.
549+
error_message = 'Error in unescape_string:'//&
550+
' invalid escape character in string "'//&
551+
trim(str)//'"'
552+
if (allocated(str_tmp)) deallocate(str_tmp)
553+
return
534554
end if
535555

536556
else
537557
m = m + 1
538-
str_out(m:m) = c
558+
str_tmp(m:m) = c
539559
end if
540560

541561
end do
542562

543563
!trim trailing space:
544-
#if defined __GFORTRAN__
545-
! workaround for Gfortran 6.1.0 bug
546-
tmp = str_out(1:m)
547-
str_out = tmp
548-
#else
549-
str_out = str_out(1:m)
550-
#endif
564+
str = str_tmp(1:m)
551565

552-
else
553-
!there are no escape characters, so return as is:
554-
str_out = str_in
555566
end if
556567

557568
end subroutine unescape_string

0 commit comments

Comments
 (0)