Commit 3170e09f authored by François Bobot's avatar François Bobot
Browse files

Merge branch 'feature/rounding' into 'master'

Feature/rounding

Et voici de beaux rounding modes pour F.
Un peu galère pour trouver des tests où les différents modes d'arrondis sur les _tie breaks_ sont visibles, mais je crois que j'ai trouvé 😅

See merge request !8
parents 8044392a 43b5db19
Pipeline #624 passed with stage
......@@ -24,6 +24,9 @@
type t = Z.t * int
let mantissa = fst
let exponent = snd
let zero = Z.zero,0
let one = Z.one,0
let two = Z.one,1
......@@ -32,24 +35,26 @@ let m8 = Z.of_int 255 (* mask for 8 1-bits 0b11111111 *)
let m4 = Z.of_int 15 (* mask for 4 1-bits 0b00001111 *)
let m2 = Z.of_int 3 (* mask for 2 1-bits 0b00000011 *)
(* 0 is uniquely représented by [zero] *)
(* 0 is uniquely represented by [zero] *)
let is_0mask a mask = Z.equal (Z.logand a mask) Z.zero
let oddify a p =
if Z.equal a Z.zero then zero else
let reduce_1 a p = (* 1 zero-bit or less *)
if Z.logand a Z.one = Z.zero
if is_0mask a Z.one
then (Z.shift_right_trunc a 1 , p+1 )
else (a,p) in
let reduce_2 a p = (* 3 zero-bits or less *)
if Z.logand a m2 = Z.zero
if is_0mask a m2
then reduce_1 (Z.shift_right_trunc a 2) (p+2)
else reduce_1 a p in
let reduce_4 a p = (* 7 zero-bits or less *)
if Z.logand a m4 = Z.zero
if is_0mask a m4
then reduce_2 (Z.shift_right_trunc a 4) (p+4)
else reduce_2 a p in
let rec reduce_8 a p = (* any number of zero-bits *)
if Z.logand a m8 = Z.zero
if is_0mask a m8
then reduce_8 (Z.shift_right_trunc a 8) (p+8)
else reduce_4 a p
in reduce_8 a p
......@@ -66,6 +71,8 @@ let compare (a,p) (b,q) =
if p < q then Z.compare a (Z.shift_left b (q-p)) else
Z.compare a b
let make = oddify
(* -------------------------------------------------------------------------- *)
(* --- Ring Operations --- *)
(* -------------------------------------------------------------------------- *)
......@@ -101,45 +108,109 @@ let mul u v =
(* multiplying two odds returns an odd *)
Z.mul a b , p+q
let (lsl) u n = if u == zero then zero else (fst u,snd u+n)
let (lsr) u n = if u == zero then zero else (fst u,snd u-n)
let shift_left u n = if u == zero then zero else (fst u,snd u+n)
let shift_right u n = if u == zero then zero else (fst u,snd u-n)
let power2 n = (Z.one,n)
let rec dicho a p q =
let cache = 128
let powers = Array.init cache (Z.shift_left Z.one)
let zp2 k = if k < cache then powers.(k) else Z.shift_left Z.one k
let b32 = 24
let b64 = 53
let m32 = powers.(b32)
let m64 = powers.(b64)
let rec log2_dicho a p q =
let r = (p + q) / 2 in
if r = p then q else
let b = Z.shift_right_trunc a r in
if b = Z.zero
then dicho a p r
else dicho a r q
if Z.lt a (zp2 r)
then log2_dicho a p r
else log2_dicho a r q
let rec shift a p =
let q = if p = 0 then 1 else 2*p in
let b = Z.shift_right_trunc a q in
if b = Z.zero then dicho a p q else shift a q
let log2_mantissa a =
let n = Z.size a in
let p = (n-1) lsl 6 in (* x64 *)
let q = n lsl 6 in
log2_dicho a p q
let log2 (a,p) =
if a = Z.zero then 0 else
if a = Z.one then p else
p + shift (Z.abs a) 0
if Z.equal a Z.zero then 0 else
if Z.equal a Z.one then p else
p + log2_mantissa (Z.abs a)
(* -------------------------------------------------------------------------- *)
(* --- Rounding --- *)
(* --- Rounding Mode --- *)
(* -------------------------------------------------------------------------- *)
let rec truncate m a p =
if Z.lt (Z.abs a) m
then oddify a p
else truncate m (Z.shift_right_trunc a 1) (succ p)
let m32 = Z.shift_left Z.one 24
let m64 = Z.shift_left Z.one 53
let round_f32 (a,p) = truncate m32 a p
let round_f64 (a,p) = truncate m64 a p
let is_even z = is_0mask z Z.one
type mode =
| NE (* Nearest to even *)
| ZR (* Toward zero *)
| DN (* Toward minus infinity *)
| UP (* Toward plus infinity *)
| NA (* Nearest away from zero *)
type tie = Floor | Tie | Ceil (* positive tie-break *)
[@@ deriving show]
let sign s a = if s then a else Z.neg a
(* s:sign, a: absolute truncated *)
let choose mode tie s a =
match mode , tie with
| ZR , _ -> sign s a
| DN , _ -> if s then a else Z.neg (Z.succ a)
| UP , _ -> if s then Z.succ a else Z.neg a
| NA , Tie -> sign s (Z.succ a)
| NE , Tie -> sign s (if is_even a then a else Z.succ a)
| (NE|NA) , Floor -> sign s a
| (NE|NA) , Ceil -> sign s (Z.succ a)
(* requires 2^n <= a *)
let truncate n a p tie =
let d = log2_mantissa a - n in
let mask = Z.pred (zp2 d) in
let half = zp2 (d-1) in
let r = Z.logand a mask in
let tie =
if Z.lt r half then Floor else
if Z.lt half r then Ceil else tie in
let a0 = Z.shift_right_trunc a d in
a0 , p + d , tie
(* requires m = 2^n *)
let rounding mode n m ((a,p) as u) =
let a0 = Z.abs a in
(* a is odd, m is even *)
if Z.lt a0 m then u (* EXACT *)
else
let a0,p,tie = truncate n a0 p Tie in
let a = choose mode tie (0 < Z.sign a) a0 in
oddify a p
let round ?(mode=NE) ?(bits=80) u =
rounding mode bits (zp2 bits) u
let round_f32 ?(mode=NE) u = rounding mode b32 m32 u
let round_f64 ?(mode=NE) u = rounding mode b64 m64 u
let seizing n m ((a,p) as u) =
let a0 = Z.abs a in
(* a is odd, m is even *)
if Z.lt a0 m then u,u (* EXACT *)
else
let a0,p,_ = truncate n a0 p Tie in
if 0 < Z.sign a then
oddify a0 p , oddify (Z.succ a0) p
else
let a0 = Z.neg a0 in
oddify (Z.pred a0) p , oddify a0 p
let round ?(bits=80) (a,p) = truncate (Z.shift_left Z.one bits) a p
let seize ?(bits=80) u = seizing bits (zp2 bits) u
let seize_f32 = seizing b32 m32
let seize_f64 = seizing b64 m64
(* -------------------------------------------------------------------------- *)
(* --- Z Conversion --- *)
......@@ -171,49 +242,82 @@ let rec magnitude q n =
if Q.geq q q_two then magnitude (Q.div_2exp q 1) (succ n) else
(q,n)
(* invariant m + q^n && 0 <= q < 1 *)
(* invariant m + q.2^n && 0 <= q < 1 *)
let rec decompose bits m q n =
if Q.equal q Q.zero || bits=0 then m else
if Q.equal q Q.zero || bits=0 then m,q else
let q = Q.mul_2exp q 1 in
let n = pred n in
let bits = pred bits in
if Q.lt q Q.one
then
decompose (pred bits) m q n
decompose bits m q n
else
let m = add m (power2 n) in
let q = Q.sub q Q.one in
decompose (pred bits) m q n
decompose bits m q n
exception Undefined
let of_qint ?(bits=80) r =
let half = Q.make Z.one (Z.of_int 2)
let of_qexp ~mode ~bits r n =
(* having one more bit for further rounding *)
let q,n = magnitude (Q.abs r) n in
(* have r = q.2^n /\ 1 <= q < 2 *)
let ((a,p) as m),e = decompose bits (power2 n) (Q.sub q Q.one) n in
(* returns (m,e) such that m + e = 2^n + (q-1).2^n = r, 0<=e<1 *)
let sign = 0 < Q.sign r in
let tie =
if Q.lt e half then Floor else
if Q.gt e half then Ceil else Tie in
let m = zp2 bits in
let a,p,tie =
if Z.lt a m then a,p,tie
else truncate bits a p tie in
oddify (choose mode tie sign a) p
let of_qint ?(mode=NE) ?(bits=80) r =
match Q.classify r with
| Q.ZERO -> zero
| Q.INF | Q.MINF | Q.UNDEF -> raise Undefined
| Q.NZERO ->
let s = Q.sign r in
let q,n = magnitude (Q.abs r) 0 in
(* r = q.2^n /\ 1 <= q < 2 *)
let m = decompose bits (power2 n) (Q.sub q Q.one) n in
(* m + o(e) = 2^n + (q-1).2^n = r *)
(if s < 0 then neg m else m)
| Q.NZERO -> of_qexp ~mode ~bits r 0
let div ?bits a b = of_qint ?bits (Q.div (to_qint a) (to_qint b))
let div ?(mode=NE) ?(bits=80) a b =
if a == zero then zero else
if b == zero then raise Undefined else
if b = one then a else
let p,n = a in let q,m = b in
of_qexp ~mode ~bits (Q.make p q) (n-m)
(* -------------------------------------------------------------------------- *)
(* --- Float Conversion --- *)
(* -------------------------------------------------------------------------- *)
let of_float f = of_qint (Q.of_float f)
let rec p2f f n =
if n > 0 then p2f (f *. 2.0) (pred n) else
if n < 0 then p2f (f /. 2.0) (succ n) else
f
let to_float (a,n) =
let (a,n) = truncate m64 a n in
p2f (Z.to_float a) n
let f_sign = Int64.shift_left 1L 63
let f_unit = Int64.shift_left 1L 52
let f_mantissa = Int64.(sub f_unit 1L)
let f_exponent = Int64.(sub (shift_left 1L 11) 1L)
let of_float u =
match classify_float u with
| FP_zero -> zero
| FP_nan | FP_infinite -> raise Undefined
| FP_normal ->
let a = Int64.bits_of_float u in
let m = Z.of_int64 (Int64.(add f_unit (logand a f_mantissa))) in
let e = Int64.(sub (logand (shift_right_logical a 52) f_exponent) 1075L) in
let s = Int64.(logand a f_sign) <> 0L in
oddify (if s then Z.neg m else m) (Int64.to_int e)
| FP_subnormal ->
let a = Int64.bits_of_float u in
let m = Z.of_int64 (Int64.(logand a f_mantissa)) in
let s = Int64.(logand a f_sign) <> 0L in
oddify (if s then Z.neg m else m) (-1074)
let to_float ?(mode=NE) u =
if u == zero then 0.0 else
let (a,n) = rounding NE b64 m64 u in
Pervasives.ldexp (Z.to_float a) n
(* -------------------------------------------------------------------------- *)
(* --- String Conversion --- *)
......
......@@ -27,29 +27,61 @@ val zero : t
val one : t
val two : t
val make : Z.t -> int -> t
(** The number [p.2^n]. *)
val mantissa : t -> Z.t
val exponent : t -> int
val equal : t -> t -> bool
val compare : t -> t -> int
exception Undefined (** for undefined operations *)
(** {3 Rounding} *)
(** Supported rounding modes *)
type mode =
| NE (** Nearest to even *)
| ZR (** Toward zero *)
| DN (** Toward minus infinity *)
| UP (** Toward plus infinity *)
| NA (** Nearest away from zero *)
val b32 : int (** mantissa of IEEE-32 [~bits:b32=24] *)
val b64 : int (** mantissa of IEEE-64 [~bits:b64=53] *)
val round : ?mode:mode -> ?bits:int -> t -> t
(** Rounded to [bits] binary significant digits, twoard zero (defaults to 80).
Default rounding mode is [NE]. *)
val round_f32 : ?mode:mode -> t -> t
(** Round to an IEEE float. Same as [round ~bits:28]. *)
val round_f64 : ?mode:mode -> t -> t
(** Round to an IEEE double. Same as [round ~bits:54]. *)
val seize : ?bits:int -> t -> t*t
(** Returns the two consecutive floats with [bits] precision around the given float. *)
(** {3 Arithmetics} *)
val neg : t -> t
val add : t -> t -> t
val sub : t -> t -> t
val mul : t -> t -> t
val div : ?bits:int -> t -> t -> t
val div : ?mode:mode -> ?bits:int -> t -> t -> t
(** Division rounded to [bits] digits (default is 80).
The rounding mode is toward zero. *)
The default rounding mode is [NE].
@raise Undefined for division by zero.
*)
val (lsl) : t -> int -> t (** Multiply with a positive or negative power of 2. *)
val (lsr) : t -> int -> t (** Divide by a positive or negative power of 2. *)
val shift_left : t -> int -> t (** Multiply with a positive or negative power of 2. *)
val shift_right : t -> int -> t (** Divide by a positive or negative power of 2. *)
val power2 : int -> t (** Return the positive or negative power of 2. *)
val log2 : t -> int (** Return the smallest upper bound in power of 2. *)
val round : ?bits:int -> t -> t
(** Rounded to [bits] binary significant digits, twoard zero (defaults to 80). *)
val round_f32 : t -> t (** Round to an IEEE float. Same as [round ~bits:28]. *)
val round_f64 : t -> t (** Round to an IEEE double. Same as [round ~bits:54]. *)
(** {3 Conversion with Z} *)
val of_zint : Z.t -> t (** Exact. *)
......@@ -57,7 +89,7 @@ val to_zint : t -> Z.t (** Rounded towards 0. *)
(** {3 Conversion with Q} *)
val of_qint : ?bits:int -> Q.t -> t
val of_qint : ?mode:mode -> ?bits:int -> Q.t -> t
(** Rounded to [bits] binary digits (default 80). *)
val to_qint : t -> Q.t (** Exact. *)
......@@ -65,12 +97,12 @@ val to_qint : t -> Q.t (** Exact. *)
(** {3 Conversion with OCaml integers} *)
val of_int : int -> t (** Exact. *)
val to_int : t -> int (** When fits to OCaml integers. Fractional part is truncated. *)
val to_int : t -> int (** Fractional part is truncated. *)
(** {3 Conversion with OCaml floats} *)
val of_float : float -> t (** Exact. *)
val to_float : t -> float (** Rounded. *)
val of_float : float -> t (** Exact. @raise Undefined for NaN and infinites. *)
val to_float : ?mode:mode -> t -> float (** Rounded with default mode [NE]. *)
(** {3 Formatting}
......
......@@ -57,6 +57,9 @@ localinstall: all .local
# --- Tests
# --------------------------------------------------------------------------
%.meld:
make -C tests $@
tests: localinstall
@echo "Running Tests"
@make -C tests all
......
......@@ -10,9 +10,11 @@ let bench_add x y =
let fy = B64.of_float y in
let qfx = F.of_float x in
let qfy = F.of_float y in
let add = F.add qfx qfy in
(** use x' and y' otherwise ocaml do constant propagation *)
let x' = B64.to_float fx in
let y' = B64.to_float fy in
let u = x' +. y' in
assert (x = x' && y = y');
Bench.Test.create_group ~name:(Printf.sprintf "%.2e+.%.2e" x y)
[
......@@ -22,13 +24,25 @@ let bench_add x y =
ignore (Farith.D.roundq ~mw:52 ~ew:11 Farith.NE (Q.add qx qy)));
Bench.Test.create ~name:"Farith.B64.add" (fun () ->
ignore (B64.add Farith.NE fx fy));
Bench.Test.create ~name:"Farith.B64.round" (fun () ->
ignore (B64.of_float u));
Bench.Test.create ~name:"F.add+F.round" (fun () ->
ignore (F.round_f64 (F.add qfx qfy)));
Bench.Test.create ~name:"F.add" (fun () ->
ignore (F.add qfx qfy));
Bench.Test.create ~name:"F.log2" (fun () ->
ignore (F.log2 add));
Bench.Test.create ~name:"F.round" (fun () ->
ignore (F.round_f64 add));
Bench.Test.create ~name:"F.of_float" (fun () ->
ignore (F.of_float u));
Bench.Test.create ~name:"Q.of_float" (fun () ->
ignore (Q.of_float u));
]
let main () =
Command.run (Bench.make_command [
(bench_add 1.0 1.0);
(*(bench_add 1.0 1.0);*)
(bench_add 1.0 0.00000001)
])
......
......@@ -141,7 +141,7 @@ module B64 = struct include Farith_F_aux.B64 include Common
(** unfortunately of_bits and to_bits wants positive argument (unsigned int64)
and Z.of_int64/Z.to_int64 wants signed argument (signed int64)
*)
*)
let mask_one = Z.pred (Z.shift_left Z.one 64)
let of_float f =
let fb = (Big_int_Z.big_int_of_int64 (Int64.bits_of_float f)) in
......
module G = Farith.B64
let gpp mode fmt q = G.pp fmt (G.of_q mode q)
let fpp mode fmt q = F.pp fmt (F.of_qint ~mode ~bits:F.b64 q)
let () =
begin
Format.printf "[G] 3.1 = %a@." G.pp (G.of_float 3.1) ;
Format.printf "[F] 3.1 = %a@." F.pp (F.of_float 3.1) ;
let q = Q.make (Z.of_int 31) (Z.of_int 10) in
Format.printf "[Fp] 3.1 = %a@." F.pp (F.round ~mode:F.ZR ~bits:25 (F.of_float 3.1)) ;
Format.printf "[Fq] 3.1 = %a@." F.pp (F.of_qint ~mode:F.ZR ~bits:25 q) ;
Format.printf "-----------------------@." ;
Format.printf " Simple Roundings@." ;
let job m m1 m2 q =
begin
Format.printf "-----------------------@." ;
Format.printf "Q=%a@." Q.pp_print q ;
Format.printf "[G-%s] %a@." m (gpp m1) q ;
Format.printf "[F-%s] %a@." m (fpp m2) q ;
Format.printf "[G-%s] %a@." m (gpp m1) (Q.neg q) ;
Format.printf "[F-%s] %a@." m (fpp m2) (Q.neg q) ;
end in
job "NE" Farith.NE F.NE q ;
job "NA" Farith.NA F.NA q ;
job "ZR" Farith.ZR F.ZR q ;
job "UP" Farith.UP F.UP q ;
job "DN" Farith.DN F.DN q ;
Format.printf "-----------------------@." ;
Format.printf " Tie Breaks (NE)@." ;
let eps = Z.shift_left Z.one 51 in
let e_ex = Q.make (Z.of_int 0b100) eps in
let e_lo = Q.make (Z.of_int 0b101) eps in
let e_ti = Q.make (Z.of_int 0b110) eps in
let e_up = Q.make (Z.of_int 0b111) eps in
job "NE-ex" Farith.NE F.NE (Q.add Q.one e_ex) ;
job "NE-lo" Farith.NE F.NE (Q.add Q.one e_lo) ;
job "NE-ti" Farith.NE F.NE (Q.add Q.one e_ti) ;
job "NE-up" Farith.NE F.NE (Q.add Q.one e_up) ;
Format.printf "-----------------------@." ;
Format.printf " Tie Breaks (NA)@." ;
job "NA-ex" Farith.NA F.NA (Q.add Q.one e_ex) ;
job "NA-lo" Farith.NA F.NA (Q.add Q.one e_lo) ;
job "NA-ti" Farith.NA F.NA (Q.add Q.one e_ti) ;
job "NA-up" Farith.NA F.NA (Q.add Q.one e_up) ;
end
[G] 3.1 = +6980579422424269p-51
[F] 3.1 = +6980579422424269p-51
[Fp] 3.1 = +6501171p-21
[Fq] 3.1 = +6501171p-21
-----------------------
Simple Roundings
-----------------------
Q=31/10
[G-NE] +6980579422424269p-51
[F-NE] +6980579422424269p-51
[G-NE] -6980579422424269p-51
[F-NE] -6980579422424269p-51
-----------------------
Q=31/10
[G-NA] +6980579422424269p-51
[F-NA] +6980579422424269p-51
[G-NA] -6980579422424269p-51
[F-NA] -6980579422424269p-51
-----------------------
Q=31/10
[G-ZR] +1745144855606067p-49
[F-ZR] +1745144855606067p-49
[G-ZR] -1745144855606067p-49
[F-ZR] -1745144855606067p-49
-----------------------
Q=31/10
[G-UP] +6980579422424269p-51
[F-UP] +6980579422424269p-51
[G-UP] -1745144855606067p-49
[F-UP] -1745144855606067p-49
-----------------------
Q=31/10
[G-DN] +1745144855606067p-49
[F-DN] +1745144855606067p-49
[G-DN] -6980579422424269p-51
[F-DN] -6980579422424269p-51
-----------------------
Tie Breaks (NE)
-----------------------
Q=562949953421313/562949953421312
[G-NE-ex] +562949953421313p-49
[F-NE-ex] +562949953421313p-49
[G-NE-ex] -562949953421313p-49
[F-NE-ex] -562949953421313p-49
-----------------------
Q=2251799813685253/2251799813685248
[G-NE-lo] +2251799813685253p-51
[F-NE-lo] +2251799813685253p-51
[G-NE-lo] -2251799813685253p-51
[F-NE-lo] -2251799813685253p-51
-----------------------
Q=1125899906842627/1125899906842624
[G-NE-ti] +1125899906842627p-50
[F-NE-ti] +1125899906842627p-50
[G-NE-ti] -1125899906842627p-50
[F-NE-ti] -1125899906842627p-50
-----------------------
Q=2251799813685255/2251799813685248
[G-NE-up] +2251799813685255p-51
[F-NE-up] +2251799813685255p-51
[G-NE-up] -2251799813685255p-51
[F-NE-up] -2251799813685255p-51
-----------------------
Tie Breaks (NA)
-----------------------
Q=562949953421313/562949953421312
[G-NA-ex] +562949953421313p-49
[F-NA-ex] +562949953421313p-49
[G-NA-ex] -562949953421313p-49
[F-NA-ex] -562949953421313p-49
-----------------------
Q=2251799813685253/2251799813685248
[G-NA-lo] +2251799813685253p-51
[F-NA-lo] +2251799813685253p-51
[G-NA-lo] -2251799813685253p-51
[F-NA-lo] -2251799813685253p-51
-----------------------
Q=1125899906842627/1125899906842624
[G-NA-ti] +1125899906842627p-50
[F-NA-ti] +1125899906842627p-50
[G-NA-ti] -1125899906842627p-50
[F-NA-ti] -1125899906842627p-50
-----------------------
Q=2251799813685255/2251799813685248
[G-NA-up] +2251799813685255p-51
[F-NA-up] +2251799813685255p-51
[G-NA-up] -2251799813685255p-51
[F-NA-up] -2251799813685255p-51
let eps n = Pervasives.ldexp 1.0 n
let pp_class fmt u =
Format.pp_print_string fmt
begin
match classify_float u with
| FP_zero -> "zero"
| FP_normal -> "normal"
| FP_subnormal -> "sub-normal"
| FP_infinite -> "infinity"
| FP_nan -> "nan"
end
let test_of_float n =
let u = eps n in
try
let f = F.of_float u in
Format.printf "of-float 1p%d = %a (%a)@." n F.pp f pp_class u
with F.Undefined ->
Format.printf "of-float 1p%d = undefined (%a)@." n pp_class u
let test_to_float n =
begin
let u = eps n in
let f = F.power2 n in
let v = F.to_float f in