Skip to content

Some efficiency improvements when parsing strings. #352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 23, 2018
Merged
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ visual_studio/jsonfortranlib/
visual_studio/jsonfortrantest/Debug/
visual_studio/jsonfortrantest/Release/
visual_studio/jsonfortrantest/x64

visual_studio/config
My Advisor Results*
My Amplifier*
My Inspector*
x64/
Expand Down
2 changes: 1 addition & 1 deletion files/inputs/invalid.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
"..\\path\\to\\files\\file1.txt",
"..\\path\\to\\files\\file2.txt",
"..\\path\\to\\files\\file3.txt"
}
}
4 changes: 4 additions & 0 deletions files/inputs/invalid3.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"a": "blah\",
"b": 2
}
3 changes: 2 additions & 1 deletion files/inputs/test1.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"..\\path\\to\\files\\file3.txt",
"test \u2FA4 \uABCD \uABCD\uABCDtest",
" test \\u \" blah\\\" test test",
"..\\path\\to\\files\\"
"..\\path\\to\\files\\",
"\\"
],
"a": {
"b": 1
Expand Down
16 changes: 12 additions & 4 deletions src/json_file_module.F90
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ module json_file_module

private

type(json_core) :: core !! The instance of the [[json_core(type)]] factory used for this file.
type(json_core) :: core !! The instance of the [[json_core(type)]]
!! factory used for this file.
type(json_value),pointer :: p => null() !! the JSON structure read from the file

contains
Expand Down Expand Up @@ -319,10 +320,17 @@ subroutine json_file_check_for_errors(me,status_ok,error_msg)
implicit none

class(json_file),intent(inout) :: me
logical(LK),intent(out) :: status_ok !! true if there were no errors
character(kind=CK,len=:),allocatable,intent(out) :: error_msg !! the error message (if there were errors)

logical(LK),intent(out),optional :: status_ok !! true if there were no errors
character(kind=CK,len=:),allocatable,intent(out),optional :: error_msg !! the error message
!! (if there were errors)

#if defined __GFORTRAN__
character(kind=CK,len=:),allocatable :: tmp !! workaround for gfortran bugs
call me%core%check_for_errors(status_ok,tmp)
error_msg = tmp
#else
call me%core%check_for_errors(status_ok,error_msg)
#endif

end subroutine json_file_check_for_errors
!*****************************************************************************************
Expand Down
143 changes: 77 additions & 66 deletions src/json_string_utilities.F90
Original file line number Diff line number Diff line change
Expand Up @@ -433,68 +433,74 @@ end subroutine escape_string
! * `\t` - horizontal tab
! * `\uXXXX` - 4 hexadecimal digits

subroutine unescape_string(str_in, str_out, error_message)
subroutine unescape_string(str, error_message)

implicit none

character(kind=CK,len=*),intent(in) :: str_in !! string as stored in a [[json_value]]
character(kind=CK,len=:),allocatable,intent(out) :: str_out !! decoded string
character(kind=CK,len=:),allocatable,intent(out) :: error_message !! will be allocated if there was an error
character(kind=CK,len=:),allocatable,intent(inout) :: str !! in: string as stored
!! in a [[json_value]].
!! out: decoded string.
character(kind=CK,len=:),allocatable,intent(out) :: error_message !! will be allocated if
!! there was an error

integer :: i !! counter
integer :: n !! length of str_in
integer :: m !! length of str_out
integer :: n !! length of `str`
integer :: m !! length of `str_tmp`
character(kind=CK,len=1) :: c !! for scanning each character in string
character(kind=CK,len=:),allocatable :: str_tmp !! temp decoded string (if the input
!! string contains an escape character
!! and needs to be decoded).

#if defined __GFORTRAN__
character(kind=CK,len=:),allocatable :: tmp !! for GFortran bug workaround
#endif

if (scan(str_in,backslash)>0) then
if (scan(str,backslash)>0) then

!there is at least one escape character, so process this string:

n = len(str_in)
str_out = repeat(space,n) !size the output string (will be trimmed later)
m = 0 !counter in str_out
i = 0 !counter in str_in
n = len(str)
str_tmp = repeat(space,n) !size the output string (will be trimmed later)
m = 0 !counter in str_tmp
i = 0 !counter in str

do

i = i + 1
if (i>n) exit ! finished
c = str_in(i:i) ! get next character in the string
c = str(i:i) ! get next character in the string

if (c == backslash) then

if (i<n) then

i = i + 1
c = str_in(i:i) !character after the escape

if (any(c == [quotation_mark,backslash,slash, &
to_unicode(['b','f','n','r','t'])])) then

select case(c)
case (quotation_mark,backslash,slash)
!use d as is
case (CK_'b')
c = bspace
case (CK_'f')
c = formfeed
case (CK_'n')
c = newline
case (CK_'r')
c = carriage_return
case (CK_'t')
c = horizontal_tab
end select
c = str(i:i) !character after the escape

select case(c)
case (quotation_mark,backslash,slash)
!use d as is
m = m + 1
str_tmp(m:m) = c
case (CK_'b')
c = bspace
m = m + 1
str_tmp(m:m) = c
case (CK_'f')
c = formfeed
m = m + 1
str_out(m:m) = c
str_tmp(m:m) = c
case (CK_'n')
c = newline
m = m + 1
str_tmp(m:m) = c
case (CK_'r')
c = carriage_return
m = m + 1
str_tmp(m:m) = c
case (CK_'t')
c = horizontal_tab
m = m + 1
str_tmp(m:m) = c

else if (c == 'u') then !expecting 4 hexadecimal digits after
!the escape character [\uXXXX]
case (CK_'u') ! expecting 4 hexadecimal digits after
! the escape character [\uXXXX]

!for now, we are just returning them as is
![not checking to see if it is a valid hex value]
Expand All @@ -504,54 +510,59 @@ subroutine unescape_string(str_in, str_out, error_message)
! \uXXXX

if (i+4<=n) then
m = m + 1
str_out(m:m+5) = str_in(i-1:i+4)
i = i + 4
m = m + 5

! validate the hex string:
if (valid_json_hex(str(i+1:i+4))) then
m = m + 1
str_tmp(m:m+5) = str(i-1:i+4)
i = i + 4
m = m + 5
else
error_message = 'Error in unescape_string:'//&
' Invalid hexadecimal sequence in string "'//&
trim(str)//'" ['//str(i-1:i+4)//']'
if (allocated(str_tmp)) deallocate(str_tmp)
return
end if
else
error_message = 'Error in unescape_string:'//&
' Invalid hexadecimal sequence'//&
' in string: '//str_in(i-1:)
if (allocated(str_out)) deallocate(str_out)
' Invalid hexadecimal sequence in string "'//&
trim(str)//'" ['//str(i-1:)//']'
if (allocated(str_tmp)) deallocate(str_tmp)
return
end if

else
case default

!unknown escape character
error_message = 'Error in unescape_string:'//&
' unknown escape sequence in string "'//&
trim(str_in)//'" ['//backslash//c//']'
if (allocated(str_out)) deallocate(str_out)
' unknown escape sequence in string "'//&
trim(str)//'" ['//backslash//c//']'
if (allocated(str_tmp)) deallocate(str_tmp)
return
end if

end select

else
!an escape character is the last character in
! the string [this may not be valid syntax,
! but just keep it]
m = m + 1
str_out(m:m) = c
! an escape character is the last character in
! the string. This is an error.
error_message = 'Error in unescape_string:'//&
' invalid escape character in string "'//&
trim(str)//'"'
if (allocated(str_tmp)) deallocate(str_tmp)
return
end if

else
m = m + 1
str_out(m:m) = c
str_tmp(m:m) = c
end if

end do

!trim trailing space:
#if defined __GFORTRAN__
! workaround for Gfortran 6.1.0 bug
tmp = str_out(1:m)
str_out = tmp
#else
str_out = str_out(1:m)
#endif
str = str_tmp(1:m)

else
!there are no escape characters, so return as is:
str_out = str_in
end if

end subroutine unescape_string
Expand Down
Loading