From c78b2fdaeaebb99eb120734343621bff342f8510 Mon Sep 17 00:00:00 2001 From: Alban Grastien <alban.grastien@cea.fr> Date: Thu, 11 Jul 2024 17:41:42 +0200 Subject: [PATCH] Starting to move methods to Nir. --- lib/nir/gentensor.ml | 1 + lib/nir/gentensor.mli | 3 +++ lib/nir/node.ml | 50 +++++++++++++++++++++++++++++++++++++++++++ lib/nir/node.mli | 26 ++++++++++++++++++++++ lib/nir/tensor.ml | 3 +++ lib/nir/tensor.mli | 4 ++++ lib/ovo/ovo.ml | 36 ++++--------------------------- 7 files changed, 91 insertions(+), 32 deletions(-) diff --git a/lib/nir/gentensor.ml b/lib/nir/gentensor.ml index 8499662..b8adcbe 100644 --- a/lib/nir/gentensor.ml +++ b/lib/nir/gentensor.ml @@ -27,6 +27,7 @@ type t = let create_1_float f = Float (Tensor.create_1_float f) let create_1_int64 i = Int64 (Tensor.create_1_int64 i) +let create_const_float shape f = Float(Tensor.create_const_float shape f) let of_int64_array ?shape t = let sh = diff --git a/lib/nir/gentensor.mli b/lib/nir/gentensor.mli index 1b61793..f84fc04 100644 --- a/lib/nir/gentensor.mli +++ b/lib/nir/gentensor.mli @@ -26,6 +26,9 @@ type t = val create_1_float : float -> t val create_1_int64 : int64 -> t +val create_const_float : Shape.t -> float -> t +(** [create_const_float shape v] returns a tensor of shape [shape] +where each value is initialized to [v]. *) val of_float_array : ?shape:Shape.t -> float array -> t (** [of_float_array a shape] returns a Tensor with data contained in [l] and diff --git a/lib/nir/node.ml b/lib/nir/node.ml index 0fd4964..f09d211 100644 --- a/lib/nir/node.ml +++ b/lib/nir/node.ml @@ -498,3 +498,53 @@ let iter_rec f node = f n) in aux node + +let sum_list ?(shp=Shape.of_array [|1|]) ns = + match ns with + | [] -> create @@ (Constant { data = Gentensor.create_const_float shp 0.0}) + | hd::tl -> + List.fold tl + ~init:hd + ~f:( + ) + +let partial_dot_product ?shp arr1 arr2 first last = + let ioob str = failwith @@ "Index out of bound for arr" ^ str in + if last > Array.length arr1 then ioob "1" + else if last > Array.length arr2 then ioob "2" + else if last < first then + let rec aux index acc = + if index = last then acc else + let acc = acc + (arr1.(index) * arr2.(index)) + in + aux Int.(index+1) acc + in + aux Int.(first+1) (arr1.(first) * arr2.(first)) + else + let actual_shape = + if Array.length arr1 <> 0 then + compute_shape arr1.(0) + else if Array.length arr2 <> 0 then + compute_shape arr2.(0) + else match shp with + | Some s -> s + | None -> failwith "Cannot determine shape of tensor" + in + create @@ (Constant { data = Gentensor.create_const_float actual_shape 0.0}) + (* + if Array.length arr1 < last && Array.length arr2 < last then + failwith "Index out of bound" + else + let zero_node = + create @@ Constant { data = Gentensor.create_1_float 0.0 } + in + let rec aux index acc = + if index = last + then acc + else + let prod = arr1.(index) * arr2.(index) in + let new_acc = acc + prod in + aux Int.(index + 1) new_acc + in + aux first zero_node +*) + \ No newline at end of file diff --git a/lib/nir/node.mli b/lib/nir/node.mli index 39371f9..68918fe 100644 --- a/lib/nir/node.mli +++ b/lib/nir/node.mli @@ -175,3 +175,29 @@ val mul_float : t -> float -> t val div_float : ?encode:bool -> t -> float -> t val concat_0 : t list -> t val reshape : Shape.t -> t -> t + +val sum_list : ?shp:Shape.t -> t list -> t +(** [sum_list shp ns] is a node corresponding to the sum of the nodes in [ns]. +If [ns] is empty, this returns a tensor of shape [shp] filled with 0s. +By default, [shp] is a single float. *) + +val partial_dot_product : ?shp:Shape.t -> t array -> t array -> int -> int -> t +(** [partial_dot_product arr1 arr2 first last] +where [arr1 = [|n11,n12,...,n1k1|]] and [arr2 = [|n21,n22,...,n2k2|]] +is a node corresponding to +[(n1first * n2first) + (n1first+1 * n2first+1) + ... + (n1last-1 * n2last-1)] +if this exists. +It is assumed that [arr1] and [arr2] contain tensors of similar shape. +Edge cases include: + +{ul + {- if [last > length n1] or [last > length n2], then fails} + + {- if [last >= first], then returns a tensor where all values are initialized to 0. + The shape of this tensor is determined using the following order: + {ol + {- if [length arr1 <> 0] then use the shape of [arr1.(0)]} + {- if [length arr2 <> 0] then use the shape of [arr2.(0)]} + {- if [shp <> None], then use [shp]} + {- otherwise, fails}}}} +*) \ No newline at end of file diff --git a/lib/nir/tensor.ml b/lib/nir/tensor.ml index c326c57..2ba3ab1 100644 --- a/lib/nir/tensor.ml +++ b/lib/nir/tensor.ml @@ -38,6 +38,9 @@ let create_1_float v = Bigarray.Genarray.set t [| 0 |] v; t +let create_const_float shape v = + Bigarray.Genarray.init Bigarray.float64 Bigarray.c_layout (Shape.to_array shape) (fun _ -> v) + let create_1_int64 v = let t = Bigarray.Genarray.(create Bigarray.int64 Bigarray.c_layout [| 1 |]) in Bigarray.Genarray.set t [| 0 |] v; diff --git a/lib/nir/tensor.mli b/lib/nir/tensor.mli index f4487d8..f3c8e90 100644 --- a/lib/nir/tensor.mli +++ b/lib/nir/tensor.mli @@ -46,6 +46,10 @@ val create_1_int64 : int64 -> (int64, Bigarray.int64_elt) t (** [create_1_int64 i] returns an unidimentional tensor with one int64 value [i]. *) +val create_const_float : Shape.t -> float -> (float, Bigarray.float64_elt) t +(** [create_const_float shape v] returns a tensor of shape [shape] +where each value is initialized to [v]. *) + val shape : ('a, 'b) t -> Shape.t val flatten : ('a, 'b) t -> 'a list diff --git a/lib/ovo/ovo.ml b/lib/ovo/ovo.ml index f34c015..1835468 100644 --- a/lib/ovo/ovo.ml +++ b/lib/ovo/ovo.ml @@ -349,34 +349,6 @@ let compute_start_end ovo = module IR = Nir -let sum_list list = - List.fold list - ~init: - (IR.Node.create - @@ IR.Node.Constant { data = IR.Gentensor.create_1_float 0.0 }) - ~f:(fun acc el -> IR.Node.(acc + el)) - -let partial_dot_product arr1 arr2 first last = - (* Performs the dot product between arr1 and arr2 but only for the indices - between first and last *) - let zero_node = - IR.Node.create - @@ IR.Node.Constant { data = IR.Gentensor.create_1_float 0.0 } - in - let rec aux index acc = - if index = last - then acc - else - let prod = IR.Node.(arr1.(index) * arr2.(index)) in - let new_acc = IR.Node.(acc + prod) in - aux (index + 1) new_acc - in - aux first zero_node - -let dot_product arr1 arr2 = - (* Assumes arr1 and arr2 have the same arity. *) - partial_dot_product arr1 arr2 0 (Array.length arr1) - let float_array_constant arr = Array.init (Array.length arr) ~f:(fun index -> IR.Node.create @@ -411,7 +383,7 @@ let build_kernel ovo input_node = (* TODO: use Array.map *) Array.init (Array.length svs) ~f:(fun sv_number -> let (sv : IR.Node.t array) = svs.(sv_number) in - dot_product sv copy_input) + Nir.Node.partial_dot_product sv copy_input 0 (Array.length sv)) in result | Rbf _ -> raise Not_implemented_yet @@ -423,10 +395,10 @@ let one_v_one_scores ovo kernel dual_coefs intercept = let start_arr, end_arr = compute_start_end ovo in let specific_score c1 c2 intercept_index = let score1 = - partial_dot_product dual_coefs.(c1) kernel start_arr.(c2) end_arr.(c2) + IR.Node.partial_dot_product dual_coefs.(c1) kernel start_arr.(c2) end_arr.(c2) in let score2 = - partial_dot_product dual_coefs.(c2 - 1) kernel start_arr.(c1) end_arr.(c1) + IR.Node.partial_dot_product dual_coefs.(c2 - 1) kernel start_arr.(c1) end_arr.(c1) in let result = IR.Node.(score1 + score2 + intercept.(intercept_index)) in (c1, c2, result) @@ -486,7 +458,7 @@ let compute_points ovo scores = Array.init ovo.nb_classes ~f:(fun cl -> let pscores = scores_of_class cl in let points = turn_prec_into_points pscores in - sum_list points) + Nir.Node.sum_list points) let to_nn ovo = let input_node = -- GitLab