Fortran Spass mit unterschiedlichen Encodings für Umlaute in UTF8 und ISO8859
Kompletter Quellcode characterClassification.F90.zip
function isUTF8umlaut(ch1, ch2) implicit none character, intent(in) :: ch1, ch2 integer :: code1, code2 logical :: isUTF8umlaut isUTF8umlaut = .false. code1 = iachar(ch1) code2 = iachar(ch2) ! UTF8 prefix ist 0xc3 if(code1 == Z'c3') then ! ä 0xa4 if(code2 == Z'a4') then isUTF8umlaut = .true. ! ü 0xbc else if(code2 == Z'BC') then isUTF8umlaut = .true. ! ö 0xb6 else if(code2 == Z'B6') then isUTF8umlaut = .true. ! Ä 0x84 else if(code2 == Z'84') then isUTF8umlaut = .true. ! Ü 0x9c else if(code2 == Z'9C') then isUTF8umlaut = .true. ! Ö 0x96 else if(code2 == Z'96') then isUTF8umlaut = .true. ! ß 0x9f else if(code2 == Z'9f') then isUTF8umlaut = .true. ! ? 0xE1 0xBA 0x9E ! else if(code2 == Z'') then ! write(*,*) "UTF8 ?" ! isUTF8umlaut = .true. endif endif end function function isISO8859_15_Umlaut(ch) implicit none character, intent(in) :: ch integer :: code logical :: isISO8859_15_Umlaut isISO8859_15_Umlaut = .false. code = iachar(ch) ! ä if(code == Z'E4') then isISO8859_15_Umlaut = .true. ! ü else if(code == Z'FC') then isISO8859_15_Umlaut = .true. ! ö else if(code == Z'F6') then isISO8859_15_Umlaut = .true. ! Ä else if(code == Z'C4') then isISO8859_15_Umlaut = .true. ! Ü else if(code == Z'DC') then isISO8859_15_Umlaut = .true. ! Ö else if(code == Z'D6') then isISO8859_15_Umlaut = .true. ! ß else if(code == Z'DF') then isISO8859_15_Umlaut = .true. endif end function
Um das Encoding eines Strings herauszufinden, einfach über jedes Zeichen iterieren
subroutine checkEncoding(string) implicit none character(len=*), intent(in) :: string integer :: j logical :: skipNext, detectISO8850, detectUTF8 character :: ch1, ch2 skipNext = .false. detectUTF8 = .false. detectISO8850 = .false. do j=1, len(string)-1 if(skipNext) then skipNext = .false. else ch1 = string(j:j) ch2 = string(j+1:j+1) if(isUTF8umlaut(ch1, ch2)) then detectUTF8 = .true. skipNext = .true. else if(isISO8859_15_Umlaut(ch1)) then detectISO8850 = .true. else if(isPrint(ch1)) then else write(*,*) "Unknown encoding im Feld 'Kommentar', Spalte", j, " Zeichen '", ch1, "' ASCII code ", iachar(ch1) endif endif end do if(detectUTF8) then write(*,*) "UTF8 encoding detected" endif if(detectISO8850) then write(*,*) "ISO8850 encoding detected" endif if(detectUTF8 .and. detectISO8850) then write(*,*) "Mixed encoding detected." endif end subroutine