diff --git a/src/kernel_internals/parsing/logic_lexer.mll b/src/kernel_internals/parsing/logic_lexer.mll index bc63b51001c4f167093838eece20330d6eeab414..ca329de4aef0f70a65206da351f27f04d235e13c 100644 --- a/src/kernel_internals/parsing/logic_lexer.mll +++ b/src/kernel_internals/parsing/logic_lexer.mll @@ -71,6 +71,37 @@ fun s -> try Hashtbl.find h s with Not_found -> IDENTIFIER s + let all_digits s = + let is_digit = + function + | '0'..'9' | 'a'..'f' | 'A'..'F' -> () + | _ -> raise Exit + in + try String.iter is_digit s; true with Exit -> false + + let is_ucn s = + if String.length s <= 2 || s.[0] <> '\\' then false else begin + match s.[1] with + | 'U' -> String.length s = 10 && all_digits (String.sub s 2 8) + | 'u' -> String.length s = 6 && all_digits (String.sub s 2 4) + | _ -> false + end + + let int_of_digit chr = + match chr with + | '0'..'9' -> (Char.code chr) - (Char.code '0') + | 'a'..'f' -> (Char.code chr) - (Char.code 'a') + 10 + | 'A'..'F' -> (Char.code chr) - (Char.code 'A') + 10 + | _ -> assert false + + (* assumes is_ucn s *) + let unicode_char s = + let code = ref 0 in + let add_digit c = code := 16 * !code + int_of_digit c in + String.iter add_digit (String.sub s 2 (String.length s - 2)); + let c = Utf8_logic.from_unichar !code in + find_utf8 c + let identifier, is_acsl_keyword = let all_kw = Hashtbl.create 37 in let c_kw = Hashtbl.create 37 in @@ -242,18 +273,11 @@ ]; fun lexbuf -> let s = lexeme lexbuf in - try Hashtbl.find h s with Not_found -> - if Logic_env.typename_status s then TYPENAME s - else - IDENTIFIER s - - - let int_of_digit chr = - match chr with - '0'..'9' -> (Char.code chr) - (Char.code '0') - | 'a'..'f' -> (Char.code chr) - (Char.code 'a') + 10 - | 'A'..'F' -> (Char.code chr) - (Char.code 'A') + 10 - | _ -> assert false + if is_ucn s then unicode_char s else begin + try Hashtbl.find h s with Not_found -> + if Logic_env.typename_status s then TYPENAME s + else IDENTIFIER s + end (* Update lexer buffer. *) let update_line_loc lexbuf line = diff --git a/src/libraries/utils/utf8_logic.mli b/src/libraries/utils/utf8_logic.mli index 2a3cd4d3d9c5e417eeb3562d8cff688add9c774e..449e20476bd3f55a53b09dd0cf143395d1ce5fad 100644 --- a/src/libraries/utils/utf8_logic.mli +++ b/src/libraries/utils/utf8_logic.mli @@ -24,6 +24,9 @@ (** UTF-8 string for logic symbols. *) +(** given an unicode code point, returns the corresponding utf-8 encoding. *) +val from_unichar: int -> string + val forall : string val exists : string val eq : string