Fortran Spass mit unterschiedlichen Encodings für Umlaute in UTF8 und ISO8859
Kompletter Quellcode characterClassification.F90.zip
function isUTF8umlaut(ch1, ch2)
implicit none
character, intent(in) :: ch1, ch2
integer :: code1, code2
logical :: isUTF8umlaut
isUTF8umlaut = .false.
code1 = iachar(ch1)
code2 = iachar(ch2)
! UTF8 prefix ist 0xc3
if(code1 == Z'c3') then
! ä 0xa4
if(code2 == Z'a4') then
isUTF8umlaut = .true.
! ü 0xbc
else if(code2 == Z'BC') then
isUTF8umlaut = .true.
! ö 0xb6
else if(code2 == Z'B6') then
isUTF8umlaut = .true.
! Ä 0x84
else if(code2 == Z'84') then
isUTF8umlaut = .true.
! Ü 0x9c
else if(code2 == Z'9C') then
isUTF8umlaut = .true.
! Ö 0x96
else if(code2 == Z'96') then
isUTF8umlaut = .true.
! ß 0x9f
else if(code2 == Z'9f') then
isUTF8umlaut = .true.
! ? 0xE1 0xBA 0x9E
! else if(code2 == Z'') then
! write(*,*) "UTF8 ?"
! isUTF8umlaut = .true.
endif
endif
end function
function isISO8859_15_Umlaut(ch)
implicit none
character, intent(in) :: ch
integer :: code
logical :: isISO8859_15_Umlaut
isISO8859_15_Umlaut = .false.
code = iachar(ch)
! ä
if(code == Z'E4') then
isISO8859_15_Umlaut = .true.
! ü
else if(code == Z'FC') then
isISO8859_15_Umlaut = .true.
! ö
else if(code == Z'F6') then
isISO8859_15_Umlaut = .true.
! Ä
else if(code == Z'C4') then
isISO8859_15_Umlaut = .true.
! Ü
else if(code == Z'DC') then
isISO8859_15_Umlaut = .true.
! Ö
else if(code == Z'D6') then
isISO8859_15_Umlaut = .true.
! ß
else if(code == Z'DF') then
isISO8859_15_Umlaut = .true.
endif
end function
Um das Encoding eines Strings herauszufinden, einfach über jedes Zeichen iterieren
subroutine checkEncoding(string)
implicit none
character(len=*), intent(in) :: string
integer :: j
logical :: skipNext, detectISO8850, detectUTF8
character :: ch1, ch2
skipNext = .false.
detectUTF8 = .false.
detectISO8850 = .false.
do j=1, len(string)-1
if(skipNext) then
skipNext = .false.
else
ch1 = string(j:j)
ch2 = string(j+1:j+1)
if(isUTF8umlaut(ch1, ch2)) then
detectUTF8 = .true.
skipNext = .true.
else if(isISO8859_15_Umlaut(ch1)) then
detectISO8850 = .true.
else if(isPrint(ch1)) then
else
write(*,*) "Unknown encoding im Feld 'Kommentar', Spalte", j, " Zeichen '", ch1, "' ASCII code ", iachar(ch1)
endif
endif
end do
if(detectUTF8) then
write(*,*) "UTF8 encoding detected"
endif
if(detectISO8850) then
write(*,*) "ISO8850 encoding detected"
endif
if(detectUTF8 .and. detectISO8850) then
write(*,*) "Mixed encoding detected."
endif
end subroutine
