C++Guns – RoboBlog

17.06.2020

Fortran: Unicode UTF8 ISO8859 Umlaute

Filed under: Allgemein — Tags: — Thomas @ 14:06

Fortran Spass mit unterschiedlichen Encodings für Umlaute in UTF8 und ISO8859

Kompletter Quellcode characterClassification.F90.zip

function isUTF8umlaut(ch1, ch2)
      implicit none
      character, intent(in) :: ch1, ch2
      integer :: code1, code2
      logical :: isUTF8umlaut
      
      isUTF8umlaut = .false.
      code1 = iachar(ch1)
      code2 = iachar(ch2)
      
      ! UTF8 prefix ist 0xc3
      if(code1 == Z'c3') then
        ! ä 0xa4
        if(code2 == Z'a4') then
          isUTF8umlaut = .true.
        ! ü 0xbc
        else if(code2 == Z'BC') then
          isUTF8umlaut = .true.
        ! ö 0xb6
        else if(code2 == Z'B6') then
          isUTF8umlaut = .true.
        ! Ä 0x84
        else if(code2 == Z'84') then
          isUTF8umlaut = .true.  
        ! Ü 0x9c
        else if(code2 == Z'9C') then
          isUTF8umlaut = .true.  
        ! Ö 0x96
        else if(code2 == Z'96') then
          isUTF8umlaut = .true.
        ! ß 0x9f
        else if(code2 == Z'9f') then
          isUTF8umlaut = .true.
        ! ? 0xE1 0xBA 0x9E
!         else if(code2 == Z'') then
!           write(*,*) "UTF8 ?"
!           isUTF8umlaut = .true.  
        endif
      endif
    end function
    
    function isISO8859_15_Umlaut(ch)
      implicit none
      character, intent(in) :: ch
      integer :: code
      logical :: isISO8859_15_Umlaut
      
      isISO8859_15_Umlaut = .false.
      code = iachar(ch)
      
      ! ä
      if(code == Z'E4') then
        isISO8859_15_Umlaut = .true.
      ! ü
      else if(code == Z'FC') then
        isISO8859_15_Umlaut = .true.
      ! ö
      else if(code == Z'F6') then
        isISO8859_15_Umlaut = .true.
      ! Ä
      else if(code == Z'C4') then
        isISO8859_15_Umlaut = .true.
      ! Ü
      else if(code == Z'DC') then
        isISO8859_15_Umlaut = .true.
      ! Ö
      else if(code == Z'D6') then
        isISO8859_15_Umlaut = .true.
      ! ß
      else if(code == Z'DF') then
        isISO8859_15_Umlaut = .true.
      endif        
    end function

Um das Encoding eines Strings herauszufinden, einfach über jedes Zeichen iterieren

  subroutine checkEncoding(string)
    implicit none
    character(len=*), intent(in) :: string

    integer :: j
    logical :: skipNext, detectISO8850, detectUTF8
    character :: ch1, ch2

    skipNext = .false.
    detectUTF8 = .false.
    detectISO8850 = .false.

    do j=1, len(string)-1
      if(skipNext) then
        skipNext = .false.
      else
        ch1 = string(j:j)
        ch2 = string(j+1:j+1)

        if(isUTF8umlaut(ch1, ch2)) then
          detectUTF8 = .true.
          skipNext = .true.
        else if(isISO8859_15_Umlaut(ch1)) then
          detectISO8850 = .true.
        else if(isPrint(ch1)) then

        else
          write(*,*) "Unknown encoding im Feld 'Kommentar', Spalte", j, " Zeichen '", ch1, "' ASCII code ", iachar(ch1)
        endif
      endif
    end do

    if(detectUTF8) then
        write(*,*) "UTF8 encoding detected"
    endif

    if(detectISO8850) then
      write(*,*) "ISO8850 encoding detected"
    endif

    if(detectUTF8 .and. detectISO8850) then
      write(*,*) "Mixed encoding detected."
    endif
  end subroutine

No Comments

No comments yet.

RSS feed for comments on this post.

Sorry, the comment form is closed at this time.

Powered by WordPress