From c78b2fdaeaebb99eb120734343621bff342f8510 Mon Sep 17 00:00:00 2001
From: Alban Grastien <alban.grastien@cea.fr>
Date: Thu, 11 Jul 2024 17:41:42 +0200
Subject: [PATCH] Starting to move methods to Nir.

---
 lib/nir/gentensor.ml  |  1 +
 lib/nir/gentensor.mli |  3 +++
 lib/nir/node.ml       | 50 +++++++++++++++++++++++++++++++++++++++++++
 lib/nir/node.mli      | 26 ++++++++++++++++++++++
 lib/nir/tensor.ml     |  3 +++
 lib/nir/tensor.mli    |  4 ++++
 lib/ovo/ovo.ml        | 36 ++++---------------------------
 7 files changed, 91 insertions(+), 32 deletions(-)

diff --git a/lib/nir/gentensor.ml b/lib/nir/gentensor.ml
index 8499662..b8adcbe 100644
--- a/lib/nir/gentensor.ml
+++ b/lib/nir/gentensor.ml
@@ -27,6 +27,7 @@ type t =
 
 let create_1_float f = Float (Tensor.create_1_float f)
 let create_1_int64 i = Int64 (Tensor.create_1_int64 i)
+let create_const_float shape f = Float(Tensor.create_const_float shape f)
 
 let of_int64_array ?shape t =
   let sh =
diff --git a/lib/nir/gentensor.mli b/lib/nir/gentensor.mli
index 1b61793..f84fc04 100644
--- a/lib/nir/gentensor.mli
+++ b/lib/nir/gentensor.mli
@@ -26,6 +26,9 @@ type t =
 
 val create_1_float : float -> t
 val create_1_int64 : int64 -> t
+val create_const_float : Shape.t -> float -> t
+(** [create_const_float shape v] returns a tensor of shape [shape]
+where each value is initialized to [v]. *)
 
 val of_float_array : ?shape:Shape.t -> float array -> t
 (** [of_float_array a shape] returns a Tensor with data contained in [l] and
diff --git a/lib/nir/node.ml b/lib/nir/node.ml
index 0fd4964..f09d211 100644
--- a/lib/nir/node.ml
+++ b/lib/nir/node.ml
@@ -498,3 +498,53 @@ let iter_rec f node =
       f n)
   in
   aux node
+
+let sum_list ?(shp=Shape.of_array [|1|]) ns =
+  match ns with 
+  | [] -> create @@ (Constant { data = Gentensor.create_const_float shp 0.0})
+  | hd::tl -> 
+    List.fold tl 
+    ~init:hd 
+    ~f:( + )
+
+let partial_dot_product ?shp arr1 arr2 first last =
+  let ioob str = failwith @@ "Index out of bound for arr" ^ str in
+  if last > Array.length arr1 then ioob "1"
+  else if last > Array.length arr2 then ioob "2"
+  else if last < first then 
+    let rec aux index acc = 
+      if index = last then acc else 
+      let acc = acc + (arr1.(index) * arr2.(index))
+      in
+      aux Int.(index+1) acc
+    in
+    aux Int.(first+1) (arr1.(first) * arr2.(first))
+  else 
+    let actual_shape = 
+      if Array.length arr1 <> 0 then 
+        compute_shape arr1.(0)
+      else if Array.length arr2 <> 0 then
+        compute_shape arr2.(0)
+      else match shp with 
+      | Some s -> s
+      | None -> failwith "Cannot determine shape of tensor"
+    in 
+    create @@ (Constant { data = Gentensor.create_const_float actual_shape 0.0})
+    (*
+  if Array.length arr1 < last && Array.length arr2 < last then 
+    failwith "Index out of bound"
+  else
+  let zero_node =
+    create @@ Constant { data = Gentensor.create_1_float 0.0 }
+  in
+  let rec aux index acc =
+    if index = last
+    then acc
+    else
+      let prod = arr1.(index) * arr2.(index) in
+      let new_acc = acc + prod in
+      aux Int.(index + 1) new_acc
+  in
+  aux first zero_node
+*)
+ 
\ No newline at end of file
diff --git a/lib/nir/node.mli b/lib/nir/node.mli
index 39371f9..68918fe 100644
--- a/lib/nir/node.mli
+++ b/lib/nir/node.mli
@@ -175,3 +175,29 @@ val mul_float : t -> float -> t
 val div_float : ?encode:bool -> t -> float -> t
 val concat_0 : t list -> t
 val reshape : Shape.t -> t -> t
+
+val sum_list : ?shp:Shape.t -> t list -> t
+(** [sum_list shp ns] is a node corresponding to the sum of the nodes in [ns]. 
+If [ns] is empty, this returns a tensor of shape [shp] filled with 0s. 
+By default, [shp] is a single float. *)
+
+val partial_dot_product : ?shp:Shape.t -> t array -> t array -> int -> int -> t
+(** [partial_dot_product arr1 arr2 first last] 
+where [arr1 = [|n11,n12,...,n1k1|]] and [arr2 = [|n21,n22,...,n2k2|]]
+is a node corresponding to 
+[(n1first * n2first) + (n1first+1 * n2first+1) + ... + (n1last-1 * n2last-1)]
+if this exists.
+It is assumed that [arr1] and [arr2] contain tensors of similar shape.
+Edge cases include: 
+
+{ul 
+  {- if [last > length n1] or [last > length n2], then fails}
+
+  {- if [last >= first], then returns a tensor where all values are initialized to 0.
+    The shape of this tensor is determined using the following order:
+    {ol
+      {- if [length arr1 <> 0] then use the shape of [arr1.(0)]}
+      {- if [length arr2 <> 0] then use the shape of [arr2.(0)]}
+      {- if [shp <> None], then use [shp]}
+      {- otherwise, fails}}}}
+*)
\ No newline at end of file
diff --git a/lib/nir/tensor.ml b/lib/nir/tensor.ml
index c326c57..2ba3ab1 100644
--- a/lib/nir/tensor.ml
+++ b/lib/nir/tensor.ml
@@ -38,6 +38,9 @@ let create_1_float v =
   Bigarray.Genarray.set t [| 0 |] v;
   t
 
+let create_const_float shape v = 
+  Bigarray.Genarray.init Bigarray.float64 Bigarray.c_layout (Shape.to_array shape) (fun _ -> v)
+
 let create_1_int64 v =
   let t = Bigarray.Genarray.(create Bigarray.int64 Bigarray.c_layout [| 1 |]) in
   Bigarray.Genarray.set t [| 0 |] v;
diff --git a/lib/nir/tensor.mli b/lib/nir/tensor.mli
index f4487d8..f3c8e90 100644
--- a/lib/nir/tensor.mli
+++ b/lib/nir/tensor.mli
@@ -46,6 +46,10 @@ val create_1_int64 : int64 -> (int64, Bigarray.int64_elt) t
 (** [create_1_int64 i] returns an unidimentional tensor with one int64 value
     [i]. *)
 
+val create_const_float : Shape.t -> float -> (float, Bigarray.float64_elt) t
+(** [create_const_float shape v] returns a tensor of shape [shape]
+where each value is initialized to [v]. *)
+
 val shape : ('a, 'b) t -> Shape.t
 
 val flatten : ('a, 'b) t -> 'a list
diff --git a/lib/ovo/ovo.ml b/lib/ovo/ovo.ml
index f34c015..1835468 100644
--- a/lib/ovo/ovo.ml
+++ b/lib/ovo/ovo.ml
@@ -349,34 +349,6 @@ let compute_start_end ovo =
 
 module IR = Nir
 
-let sum_list list =
-  List.fold list
-    ~init:
-      (IR.Node.create
-      @@ IR.Node.Constant { data = IR.Gentensor.create_1_float 0.0 })
-    ~f:(fun acc el -> IR.Node.(acc + el))
-
-let partial_dot_product arr1 arr2 first last =
-  (* Performs the dot product between arr1 and arr2 but only for the indices
-     between first and last *)
-  let zero_node =
-    IR.Node.create
-    @@ IR.Node.Constant { data = IR.Gentensor.create_1_float 0.0 }
-  in
-  let rec aux index acc =
-    if index = last
-    then acc
-    else
-      let prod = IR.Node.(arr1.(index) * arr2.(index)) in
-      let new_acc = IR.Node.(acc + prod) in
-      aux (index + 1) new_acc
-  in
-  aux first zero_node
-
-let dot_product arr1 arr2 =
-  (* Assumes arr1 and arr2 have the same arity. *)
-  partial_dot_product arr1 arr2 0 (Array.length arr1)
-
 let float_array_constant arr =
   Array.init (Array.length arr) ~f:(fun index ->
     IR.Node.create
@@ -411,7 +383,7 @@ let build_kernel ovo input_node =
       (* TODO: use Array.map *)
       Array.init (Array.length svs) ~f:(fun sv_number ->
         let (sv : IR.Node.t array) = svs.(sv_number) in
-        dot_product sv copy_input)
+        Nir.Node.partial_dot_product sv copy_input 0 (Array.length sv))
     in
     result
   | Rbf _ -> raise Not_implemented_yet
@@ -423,10 +395,10 @@ let one_v_one_scores ovo kernel dual_coefs intercept =
   let start_arr, end_arr = compute_start_end ovo in
   let specific_score c1 c2 intercept_index =
     let score1 =
-      partial_dot_product dual_coefs.(c1) kernel start_arr.(c2) end_arr.(c2)
+      IR.Node.partial_dot_product dual_coefs.(c1) kernel start_arr.(c2) end_arr.(c2)
     in
     let score2 =
-      partial_dot_product dual_coefs.(c2 - 1) kernel start_arr.(c1) end_arr.(c1)
+      IR.Node.partial_dot_product dual_coefs.(c2 - 1) kernel start_arr.(c1) end_arr.(c1)
     in
     let result = IR.Node.(score1 + score2 + intercept.(intercept_index)) in
     (c1, c2, result)
@@ -486,7 +458,7 @@ let compute_points ovo scores =
   Array.init ovo.nb_classes ~f:(fun cl ->
     let pscores = scores_of_class cl in
     let points = turn_prec_into_points pscores in
-    sum_list points)
+    Nir.Node.sum_list points)
 
 let to_nn ovo =
   let input_node =
-- 
GitLab