Commit 43b5db19 authored by Loïc Correnson's avatar Loïc Correnson
Browse files

[F] added subnormals

parent f4d234e9
Pipeline #619 passed with stage
......@@ -299,12 +299,20 @@ let f_mantissa = Int64.(sub f_unit 1L)
let f_exponent = Int64.(sub (shift_left 1L 11) 1L)
let of_float u =
if u = 0.0 then zero else
let a = Int64.bits_of_float u in
let m = Z.of_int64 (Int64.(add f_unit (logand a f_mantissa))) in
let e = Int64.(sub (logand (shift_right_logical a 52) f_exponent) 1075L) in
let s = Int64.(logand a f_sign) <> 0L in
oddify (if s then Z.neg m else m) (Int64.to_int e)
match classify_float u with
| FP_zero -> zero
| FP_nan | FP_infinite -> raise Undefined
| FP_normal ->
let a = Int64.bits_of_float u in
let m = Z.of_int64 (Int64.(add f_unit (logand a f_mantissa))) in
let e = Int64.(sub (logand (shift_right_logical a 52) f_exponent) 1075L) in
let s = Int64.(logand a f_sign) <> 0L in
oddify (if s then Z.neg m else m) (Int64.to_int e)
| FP_subnormal ->
let a = Int64.bits_of_float u in
let m = Z.of_int64 (Int64.(logand a f_mantissa)) in
let s = Int64.(logand a f_sign) <> 0L in
oddify (if s then Z.neg m else m) (-1074)
let to_float ?(mode=NE) u =
if u == zero then 0.0 else
......
......@@ -36,6 +36,8 @@ val exponent : t -> int
val equal : t -> t -> bool
val compare : t -> t -> int
exception Undefined (** for undefined operations *)
(** {3 Rounding} *)
(** Supported rounding modes *)
......@@ -70,7 +72,9 @@ val sub : t -> t -> t
val mul : t -> t -> t
val div : ?mode:mode -> ?bits:int -> t -> t -> t
(** Division rounded to [bits] digits (default is 80).
The default rounding mode is [NE]. *)
The default rounding mode is [NE].
@raise Undefined for division by zero.
*)
val shift_left : t -> int -> t (** Multiply with a positive or negative power of 2. *)
val shift_right : t -> int -> t (** Divide by a positive or negative power of 2. *)
......@@ -97,7 +101,7 @@ val to_int : t -> int (** Fractional part is truncated. *)
(** {3 Conversion with OCaml floats} *)
val of_float : float -> t (** Exact. *)
val of_float : float -> t (** Exact. @raise Undefined for NaN and infinites. *)
val to_float : ?mode:mode -> t -> float (** Rounded with default mode [NE]. *)
(** {3 Formatting}
......
let eps n = Pervasives.ldexp 1.0 n
let pp_class fmt u =
Format.pp_print_string fmt
begin
match classify_float u with
| FP_zero -> "zero"
| FP_normal -> "normal"
| FP_subnormal -> "sub-normal"
| FP_infinite -> "infinity"
| FP_nan -> "nan"
end
let test_of_float n =
let u = eps n in
try
let f = F.of_float u in
Format.printf "of-float 1p%d = %a (%a)@." n F.pp f pp_class u
with F.Undefined ->
Format.printf "of-float 1p%d = undefined (%a)@." n pp_class u
let test_to_float n =
begin
let u = eps n in
let f = F.power2 n in
let v = F.to_float f in
Format.printf "to-float %a = %f (%a)@." F.pp f v pp_class v ;
let fu,eu = Pervasives.frexp u in
let fv,ev = Pervasives.frexp v in
Format.printf " expected = %fp%d@\n" fu eu ;
Format.printf " obtained = %fp%d@." fv ev ;
end
let limits = [ 1023;1024;-1022;-1023;-1048;-1074;-1075 ]
let () =
begin
List.iter test_of_float limits ;
List.iter test_to_float limits ;
end
of-float 1p1023 = +1p1023 (normal)
of-float 1p1024 = undefined (infinity)
of-float 1p-1022 = +1p-1022 (normal)
of-float 1p-1023 = +1p-1023 (sub-normal)
of-float 1p-1048 = +1p-1048 (sub-normal)
of-float 1p-1074 = +1p-1074 (sub-normal)
of-float 1p-1075 = 0 (zero)
to-float +1p1023 = 89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608.000000 (normal)
expected = 0.500000p1024
obtained = 0.500000p1024
to-float +1p1024 = inf (infinity)
expected = infp0
obtained = infp0
to-float +1p-1022 = 0.000000 (normal)
expected = 0.500000p-1021
obtained = 0.500000p-1021
to-float +1p-1023 = 0.000000 (sub-normal)
expected = 0.500000p-1022
obtained = 0.500000p-1022
to-float +1p-1048 = 0.000000 (sub-normal)
expected = 0.500000p-1047
obtained = 0.500000p-1047
to-float +1p-1074 = 0.000000 (sub-normal)
expected = 0.500000p-1073
obtained = 0.500000p-1073
to-float +1p-1075 = 0.000000 (zero)
expected = 0.000000p0
obtained = 0.000000p0
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment