Commit f7052742 authored by Glen Mével's avatar Glen Mével

fix `fix-mixed-utf8`, whose automaton had infinite loops

parent b5bde5c2
......@@ -86,26 +86,24 @@ let output_cp1252_as_utf8 code =
Printf.eprintf "unknown code %02X ; inserting “%s” instead\n" code substitute ;
output_string stdout substitute
let rec read state =
let rec recode state =
match buf#input () with
| b when b < 0x80 && state = 0 ->
buf#discard () ;
output_utf8_byte b ;
read 0
recode 0
| b when b lsr 6 = 0b10 && state > 0 ->
if state = 1 then
buf#consume output_utf8_byte ;
read (state - 1)
| b when b lsr 5 = 0b110 && state = 0 -> read 1
| b when b lsr 4 = 0b1110 && state = 0 -> read 2
| b when b lsr 3 = 0b11110 && state = 0 -> read 3
recode (state - 1)
| b when b lsr 5 = 0b110 && state = 0 -> recode 1
| b when b lsr 4 = 0b1110 && state = 0 -> recode 2
| b when b lsr 3 = 0b11110 && state = 0 -> recode 3
| _ ->
buf#consume ~keep:1 output_cp1252_as_utf8 ;
read 0
| exception End_of_file when state > 0 ->
buf#consume ~keep:1 output_cp1252_as_utf8 ;
read 0
| exception End_of_file -> ()
buf#consume ~keep:(if state = 0 then 0 else 1) output_cp1252_as_utf8 ;
recode 0
| exception End_of_file ->
buf#consume ~keep:0 output_cp1252_as_utf8
let () =
read 0
recode 0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment