split Subroutine

private pure subroutine split(str, token, chunk_size, vals)

Split a character string using a token. This routine is inspired by the Python split function.

Example

   character(len=:),allocatable :: s
   type(csv_string),dimension(:),allocatable :: vals
   s = '1,2,3,4,5'
   call split(s,',',vals)

Warning

Does not account for tokens contained within quotes string !!!

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: str
character(len=*), intent(in) :: token
integer, intent(in) :: chunk_size

for expanding vectors

type(csv_string), intent(out), dimension(:), allocatable :: vals

Calls

proc~~split~~CallsGraph proc~split split proc~expand_vector expand_vector proc~split->proc~expand_vector

Called by

proc~~split~~CalledByGraph proc~split split proc~tokenize_csv_line csv_file%tokenize_csv_line proc~tokenize_csv_line->proc~split proc~read_csv_file csv_file%read_csv_file proc~read_csv_file->proc~tokenize_csv_line

Source Code

    pure subroutine split(str,token,chunk_size,vals)

    implicit none

    character(len=*),intent(in)  :: str
    character(len=*),intent(in)  :: token
    integer,intent(in)           :: chunk_size  !! for expanding vectors
    type(csv_string),dimension(:),allocatable,intent(out) :: vals

    integer :: i          !! counter
    integer :: len_str    !! significant length of `str`
    integer :: len_token  !! length of the token
    integer :: n_tokens   !! number of tokens
    integer :: i1         !! index
    integer :: i2         !! index
    integer :: j          !! counters
    integer,dimension(:),allocatable :: itokens !! start indices of the
                                                !! token locations in `str`

    len_token = len(token)  ! length of the token
    n_tokens  = 0           ! initialize the token counter
    j         = 0           ! index to start looking for the next token

    ! first, count the number of times the token
    ! appears in the string, and get the token indices.
    !
    ! Examples:
    !  ',         '    --> 1
    !  '1234,67,90'    --> 5,8
    !  '123,      '    --> 4

    ! length of the string
    if (token == ' ') then
        ! in this case, we can't ignore trailing space
        len_str = len(str)
    else
        ! safe to ignore trailing space when looking for tokens
        len_str = len_trim(str)
    end if

    j = 1
    n_tokens = 0
    do
        if (j>len_str) exit      ! end of string, finished
        i = index(str(j:),token) ! index of next token in remaining string
        if (i<=0) exit           ! no more tokens found
        call expand_vector(itokens,n_tokens,chunk_size,i+j-1)  ! save the token location
        j = j + i + (len_token - 1)
    end do
    call expand_vector(itokens,n_tokens,chunk_size,finished=.true.)  ! resize the vector

    allocate(vals(n_tokens+1))

    if (n_tokens>0) then

        len_str = len(str)

        i1 = 1
        i2 = itokens(1)-1
        if (i2>=i1) then
            vals(1)%str = str(i1:i2)
        else
            vals(1)%str = ''  !the first character is a token
        end if

        !      1 2 3
        !    'a,b,c,d'

        do i=2,n_tokens
            i1 = itokens(i-1)+len_token
            i2 = itokens(i)-1
            if (i2>=i1) then
                vals(i)%str = str(i1:i2)
            else
                vals(i)%str = ''  !empty element (e.g., 'abc,,def')
            end if
        end do

        i1 = itokens(n_tokens) + len_token
        i2 = len_str
        if (itokens(n_tokens)+len_token<=len_str) then
            vals(n_tokens+1)%str = str(i1:i2)
        else
            vals(n_tokens+1)%str = ''  !the last character was a token
        end if

    else
        !no tokens present, so just return the original string:
        vals(1)%str = str
    end if

    end subroutine split