!***************************************************************************************** !> author: Jacob Williams ! license: BSD ! ! String sorting routines. module string_sort_module use iso_fortran_env, only: ip => INT32 ! integer precision implicit none private character(len=*),parameter :: lowercase_letters = 'abcdefghijklmnopqrstuvwxyz' character(len=*),parameter :: uppercase_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' integer,parameter :: max_size_for_insertion_sort = 20 !! max size for using insertion sort. type :: int_list !! For converting a string into a vector of integers, !! in order to perform "natural" sorting. !! !! Contiguous integer values are stored as an integer. !! Characters are stored as their ASCII value. !! !!### Example !! * 'A123b' (case insensitive) => [97,123,98] !! * 'A123b' (case sensitive) => [65,123,98] private integer :: length = 0 !! number of chunks integer(ip),dimension(:),allocatable :: chunk !! the integer values logical,dimension(:),allocatable :: chunk_is_int !! if the corresponding entry in `chunk` represents an integer !! from the string. Otherwise, it is the ASCII value for a single !! character. contains private generic,public :: operator(<) => ints_lt procedure :: ints_lt end type int_list interface swap module procedure :: swap_chars module procedure :: swap_ints end interface public :: lexical_sort_recursive public :: lexical_sort_nonrecursive public :: lexical_sort_natural_recursive public :: list_is_sorted contains !***************************************************************************************** !***************************************************************************************** !> ! Swap two integer values. pure elemental subroutine swap_ints(s1,s2) implicit none integer,intent(inout) :: s1 integer,intent(inout) :: s2 integer :: tmp tmp = s1 s1 = s2 s2 = tmp end subroutine swap_ints !***************************************************************************************** !***************************************************************************************** !> ! Converts a character string into an array of integers suitable for the ! "natural sorting" algorithm. ! !@warning If the integer is too large to fit in an integer(ip), ! then there will be problems. pure elemental subroutine string_to_int_list(str,case_sensitive,list) implicit none character(len=*),intent(in) :: str logical,intent(in) :: case_sensitive type(int_list),intent(out) :: list integer :: i !! counter integer :: n !! length of input str character(len=1) :: c !! temp character character(len=:),allocatable :: tmp !! for accumulating blocks of contiguous ints logical :: is_int !! if the current character is an integer logical :: accumulating_ints !! if a block of contiguous ints is !! being accumulated list%length = 0 ! actual length will be accumulated as we go n = len_trim(str) if (n>0) then allocate(list%chunk(n)) ! worst case: all single characters allocate(list%chunk_is_int(n)) list%chunk_is_int = .false. accumulating_ints = .false. tmp = '' do i=1,n ! loop through each character in the string c = str(i:i) is_int = character_is_integer(c) if ( is_int ) then ! is a number ! accumulate this character in the current int block accumulating_ints = .true. tmp = tmp//c else ! not a number if (accumulating_ints) then !finish off previous int block list%length = list%length + 1 list%chunk(list%length) = string_to_integer(tmp) list%chunk_is_int(list%length) = .true. accumulating_ints = .false. tmp = '' end if !accumulate ascii value for current character: list%length = list%length + 1 if (case_sensitive) then list%chunk(list%length) = ichar(c) else list%chunk(list%length) = ichar(lowercase_char(c)) end if end if end do if (accumulating_ints) then ! last int block list%length = list%length + 1 list%chunk(list%length) = string_to_integer(tmp) list%chunk_is_int(list%length) = .true. end if else !empty string, just add one element so we can sort it: allocate(list%chunk(1)) list%chunk = 0 list%length = 1 end if !resize the array: list%chunk = list%chunk(1:list%length) ! Fortran 2008 LHS auto-reallocation end subroutine string_to_int_list !***************************************************************************************** !***************************************************************************************** !> ! Returns true if the i1 < i2 for two [[int_list]] variables. ! Each integer in each list is compared starting from the beginning. ! Returns true if the first non-matching i1%chunk(:) < i2%chunk(:). ! !@note Whether or not it is a case sensitive comparison was determined ! when the strings were converted to [[int_list]] arrays. pure logical function ints_lt(i1,i2) implicit none class(int_list),intent(in) :: i1 class(int_list),intent(in) :: i2 integer :: i !! counter integer,parameter :: ascii_zero = ichar('0') ints_lt = .false. do i = 1, min(i1%length, i2%length) if ((i1%chunk_is_int(i) .and. i2%chunk_is_int(i)) .or. & (.not. i1%chunk_is_int(i) .and. .not. i2%chunk_is_int(i)) ) then !both integers or both characters if (i1%chunk(i)/=i2%chunk(i)) then ints_lt = i1%chunk(i) < i2%chunk(i) return end if else !for [integer,character] comparisons, the actual !integer value doesn't matter, so we compare to '0' if (i1%chunk_is_int(i)) then ints_lt = ascii_zero < i2%chunk(i) else ints_lt = i1%chunk(i) < ascii_zero end if return end if end do !special case where i2 begins with i1, but is longer ints_lt = (i1%length<i2%length) end function ints_lt !***************************************************************************************** !***************************************************************************************** !> ! Convert a string to an integer. ! !@note Based on similar routine from `JSON-Fortran`. ! !@warning If the integer is too large to fit in an integer(ip), ! then there will be problems. pure elemental function string_to_integer(str) result(ival) implicit none character(len=*),intent(in) :: str integer(ip) :: ival integer :: ndigits_digits,ndigits,ierr ! Compute how many digits we need to read ndigits = 2*len_trim(str) ndigits_digits = floor(log10(real(ndigits))) + 1 block character(len=ndigits_digits) :: digits_str ! large enough to hold ndigits string write(digits_str,'(I0)') ndigits read(str,'(I'//trim(digits_str)//')',iostat=ierr) ival if (ierr/=0) ival = huge(1_ip) ! for errors just return a large value end block end function string_to_integer !***************************************************************************************** !***************************************************************************************** !> ! Returns true if the character represents an integer ('0','1',...,'9'). pure elemental function character_is_integer(c) result(is_integer) implicit none character(len=1),intent(in) :: c logical :: is_integer is_integer = c>='0' .and. c<='9' end function character_is_integer !***************************************************************************************** !***************************************************************************************** !> ! Returns lowercase version of the string. pure elemental function lower(str) result(lcase) implicit none character(len=*),intent(in) :: str character(len=(len(str))) :: lcase integer :: i,n n = len_trim(str) if (n>0) then do concurrent (i=1:n) lcase(i:i) = lowercase_char(str(i:i)) end do else lcase = '' end if end function lower !***************************************************************************************** !***************************************************************************************** !> ! Returns lowercase version of the character. pure elemental function lowercase_char(c) result(lcase) implicit none character(len=1),intent(in) :: c character(len=1) :: lcase integer :: j j = index( uppercase_letters,c ) if (j>0) then lcase = lowercase_letters(j:j) else lcase = c end if end function lowercase_char !***************************************************************************************** !***************************************************************************************** !> ! Returns true if the s1 < s2 in a lexical sense (can be case sensitive). pure logical function lexical_lt(s1,s2,case_sensitive) implicit none character(len=*),intent(in) :: s1 character(len=*),intent(in) :: s2 logical,intent(in) :: case_sensitive integer :: i !! counter character(len=1) :: c1 !! character from s1 character(len=1) :: c2 !! character from s2 lexical_lt = .false. do i = 1, min(len(s1), len(s2)) if (case_sensitive) then c1 = s1(i:i) c2 = s2(i:i) else c1 = lower(s1(i:i)) c2 = lower(s2(i:i)) end if if (c1/=c2) then lexical_lt = c1 < c2 return end if end do !special case where s2 begins with s1, but is longer lexical_lt = (len(s1)<len(s2)) end function lexical_lt !***************************************************************************************** !***************************************************************************************** !> ! Returns true if the s1 > s2 in a lexical sense (can be case sensitive). pure logical function lexical_gt(s1,s2,case_sensitive) implicit none character(len=*),intent(in) :: s1 character(len=*),intent(in) :: s2 logical,intent(in) :: case_sensitive integer :: i !! counter character(len=1) :: c1 !! character from s1 character(len=1) :: c2 !! character from s2 lexical_gt = .false. do i = 1, min(len(s1), len(s2)) if (case_sensitive) then c1 = s1(i:i) c2 = s2(i:i) else c1 = lower(s1(i:i)) c2 = lower(s2(i:i)) end if if (c1/=c2) then lexical_gt = c1 > c2 return end if end do !special case where s2 begins with s1, but is longer lexical_gt = (len(s1)>len(s2)) end function lexical_gt !***************************************************************************************** !***************************************************************************************** !> ! Returns true if the s1 == s2 in a lexical sense (can be case sensitive). pure logical function lexical_eq(s1,s2,case_sensitive) implicit none character(len=*),intent(in) :: s1 character(len=*),intent(in) :: s2 logical,intent(in) :: case_sensitive if (case_sensitive) then lexical_eq = s1 == s2 else lexical_eq = lower(s1) == lower(s2) end if end function lexical_eq !***************************************************************************************** !***************************************************************************************** !> ! Returns true if the s1 <= s2 in a lexical sense (can be case sensitive). pure logical function lexical_le(s1,s2,case_sensitive) implicit none character(len=*),intent(in) :: s1 character(len=*),intent(in) :: s2 logical,intent(in) :: case_sensitive lexical_le = lexical_lt(s1,s2,case_sensitive) .or. & lexical_eq(s1,s2,case_sensitive) end function lexical_le !***************************************************************************************** !***************************************************************************************** !> ! Returns true if the s1 >= s2 in a lexical sense (can be case sensitive). pure logical function lexical_ge(s1,s2,case_sensitive) implicit none character(len=*),intent(in) :: s1 character(len=*),intent(in) :: s2 logical,intent(in) :: case_sensitive lexical_ge = lexical_gt(s1,s2,case_sensitive) .or. & lexical_eq(s1,s2,case_sensitive) end function lexical_ge !***************************************************************************************** !***************************************************************************************** !> ! Swap two character strings. pure elemental subroutine swap_chars(s1,s2) implicit none character(len=*),intent(inout) :: s1 character(len=*),intent(inout) :: s2 character(len=len(s1)) :: tmp tmp = s1 s1 = s2 s2 = tmp end subroutine swap_chars !***************************************************************************************** !***************************************************************************************** !> ! Sorts a character array `str` in increasing order. ! ! Uses a non-recursive quicksort, reverting to insertion sort on arrays of ! size \(\le 20\). Dimension of `stack` limits array size to about \(2^{32}\). ! !### License ! * [Original LAPACK license](http://www.netlib.org/lapack/LICENSE.txt) ! !### History ! * Based on the LAPACK routine [DLASRT](http://www.netlib.org/lapack/explore-html/df/ddf/dlasrt_8f.html). ! * Extensively modified by Jacob Williams,Feb. 2016. Converted to ! modern Fortran and removed the descending sort option. pure subroutine lexical_sort_nonrecursive(str,case_sensitive) implicit none character(len=*),dimension(:),intent(inout) :: str !! on entry,the array to be sorted. !! on exit,`str` has been sorted into !! increasing order (`str(1) <= ... <= str(n)`) logical,intent(in) :: case_sensitive !! if true, the sort is case sensitive integer :: endd,i,j,n,start,stkpnt character(len=len(str)) :: d1,d2,d3,dmnmx,tmp integer,dimension(2,32) :: stack ! number of elements to sort: n = size(str) if ( n>1 ) then stkpnt = 1 stack(1,1) = 1 stack(2,1) = n do start = stack(1,stkpnt) endd = stack(2,stkpnt) stkpnt = stkpnt - 1 if ( endd-start<=max_size_for_insertion_sort .and. endd>start ) then ! do insertion sort on str( start:endd ) insertion: do i = start + 1,endd do j = i,start + 1,-1 if ( lexical_lt(str(j),str(j-1),case_sensitive) ) then dmnmx = str(j) str(j) = str(j-1) str(j-1) = dmnmx else exit end if end do end do insertion elseif ( endd-start>max_size_for_insertion_sort ) then ! partition str( start:endd ) and stack parts,largest one first ! choose partition entry as median of 3 d1 = str(start) d2 = str(endd) i =(start+endd)/2 d3 = str(i) if ( lexical_lt(d1,d2,case_sensitive) ) then if ( lexical_lt(d3,d1,case_sensitive) ) then dmnmx = d1 elseif ( lexical_lt(d3,d2,case_sensitive) ) then dmnmx = d3 else dmnmx = d2 endif elseif ( lexical_lt(d3,d2,case_sensitive) ) then dmnmx = d2 elseif ( lexical_lt(d3,d1,case_sensitive) ) then dmnmx = d3 else dmnmx = d1 endif i = start - 1 j = endd + 1 do do j = j - 1 if ( lexical_le(str(j),dmnmx,case_sensitive) ) exit end do do i = i + 1 if ( lexical_ge(str(i),dmnmx,case_sensitive) ) exit end do if ( i<j ) then tmp = str(i) str(i) = str(j) str(j) = tmp else exit endif end do if ( j-start>endd-j-1 ) then stkpnt = stkpnt + 1 stack(1,stkpnt) = start stack(2,stkpnt) = j stkpnt = stkpnt + 1 stack(1,stkpnt) = j + 1 stack(2,stkpnt) = endd else stkpnt = stkpnt + 1 stack(1,stkpnt) = j + 1 stack(2,stkpnt) = endd stkpnt = stkpnt + 1 stack(1,stkpnt) = start stack(2,stkpnt) = j endif endif if ( stkpnt<=0 ) exit end do end if end subroutine lexical_sort_nonrecursive !***************************************************************************************** !***************************************************************************************** !> ! Sorts a character array `str` in increasing order. ! Uses a basic recursive quicksort ! (with insertion sort for partitions with <= 20 elements). subroutine lexical_sort_recursive(str,case_sensitive) implicit none character(len=*),dimension(:),intent(inout) :: str logical,intent(in) :: case_sensitive !! if true, the sort is case sensitive call quicksort(1,size(str)) contains !*************************************************************** !> ! Sort the array, based on the lexical string comparison. recursive subroutine quicksort(ilow,ihigh) implicit none integer,intent(in) :: ilow integer,intent(in) :: ihigh integer :: ipivot !! pivot element integer :: i !! counter integer :: j !! counter if ( ihigh-ilow<=max_size_for_insertion_sort .and. ihigh>ilow ) then ! do insertion sort: do i = ilow + 1,ihigh do j = i,ilow + 1,-1 if ( lexical_lt(str(j),str(j-1),case_sensitive) ) then call swap(str(j),str(j-1)) else exit end if end do end do elseif ( ihigh-ilow>max_size_for_insertion_sort ) then ! do the normal quicksort: call partition(ilow,ihigh,ipivot) call quicksort(ilow,ipivot - 1) call quicksort(ipivot + 1,ihigh) end if end subroutine quicksort !*************************************************************** !> ! Partition the array, based on the lexical string comparison. subroutine partition(ilow,ihigh,ipivot) implicit none integer,intent(in) :: ilow integer,intent(in) :: ihigh integer,intent(out) :: ipivot integer :: i,ip call swap(str(ilow),str((ilow+ihigh)/2)) ip = ilow do i = ilow + 1, ihigh if (lexical_lt(str(i),str(ilow),case_sensitive)) then ip = ip + 1 call swap(str(ip),str(i)) end if end do call swap(str(ilow),str(ip)) ipivot = ip end subroutine partition end subroutine lexical_sort_recursive !***************************************************************************************** !***************************************************************************************** !> ! Sorts a character array `str` in increasing order, ! using a "natural" sorting method. ! ! Uses a basic recursive quicksort ! (with insertion sort for partitions with <= 20 elements). subroutine lexical_sort_natural_recursive(str,case_sensitive) implicit none character(len=*),dimension(:),intent(inout) :: str logical,intent(in) :: case_sensitive !! if true, the sort is case sensitive type(int_list),dimension(size(str)) :: ints !! the `str` converted into arrays of integers logical,dimension(size(str)) :: case_sensitive_vec !! for the elemental routine integer,dimension(size(str)) :: idx !! index vector for sorting integer :: i !! counter !convert vector of strings to vector of int vectors: case_sensitive_vec = case_sensitive call string_to_int_list(str,case_sensitive_vec,ints) idx = [(i, i=1,size(str))] call quicksort(1,size(str)) str = str(idx) contains !*************************************************************** !> ! Sort the index array of `str`, based on int vec comparison. recursive subroutine quicksort(ilow,ihigh) implicit none integer,intent(in) :: ilow integer,intent(in) :: ihigh integer :: ipivot !! pivot element integer :: i !! counter integer :: j !! counter if ( ihigh-ilow<=max_size_for_insertion_sort .and. ihigh>ilow ) then ! do insertion sort: do i = ilow + 1,ihigh do j = i,ilow + 1,-1 if ( ints(idx(j)) < ints(idx(j-1)) ) then call swap(idx(j),idx(j-1)) else exit end if end do end do elseif ( ihigh-ilow>max_size_for_insertion_sort ) then ! do the normal quicksort: call partition(ilow,ihigh,ipivot) call quicksort(ilow,ipivot - 1) call quicksort(ipivot + 1,ihigh) end if end subroutine quicksort !*************************************************************** !> ! Partition the index array of `str`, based on int vec comparison. subroutine partition(ilow,ihigh,ipivot) implicit none integer,intent(in) :: ilow integer,intent(in) :: ihigh integer,intent(out) :: ipivot integer :: i,ip call swap(idx(ilow),idx((ilow+ihigh)/2)) ip = ilow do i = ilow + 1, ihigh if ( ints(idx(i)) < ints(idx(ilow)) ) then ip = ip + 1 call swap(idx(ip),idx(i)) end if end do call swap(idx(ilow),idx(ip)) ipivot = ip end subroutine partition end subroutine lexical_sort_natural_recursive !***************************************************************************************** !***************************************************************************************** !> ! Returns true if the list is lexically sorted in increasing order. logical function list_is_sorted(str,case_sensitive,natural) result(sorted) implicit none character(len=*),dimension(:),intent(inout) :: str logical,intent(in) :: case_sensitive !! if true, the sort is case sensitive logical,intent(in) :: natural !! if true, the sort is "natural" type(int_list),dimension(size(str)) :: ints !! the `str` converted into arrays of integers logical,dimension(size(str)) :: case_sensitive_vec !! for the elemental routine integer :: i !! counter sorted = .true. if (natural) then !convert vector of strings to vector of int vectors: case_sensitive_vec = case_sensitive call string_to_int_list(str,case_sensitive_vec,ints) do i = 1, size(str)-1 if ( ints(i+1) < ints(i) ) then sorted = .false. return end if end do else do i = 1, size(str)-1 if (lexical_lt(str(i+1),str(i),case_sensitive)) then sorted = .false. return end if end do end if end function list_is_sorted !***************************************************************************************** !***************************************************************************************** end module string_sort_module !*****************************************************************************************