diff --git a/Makefile b/Makefile index de002d3287462d7ba94b0a04550b67137987647a..bb056159a9c48afe25209861e42249d533732c19 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ all: - dune build --root=. @install caisar.opam caisar-nnet.opam caisar-onnx.opam caisar-ovo.opam + dune build --root=. @install caisar.opam caisar-nnet.opam caisar-onnx.opam caisar-ovo.opam caisar-ir.opam install: dune install diff --git a/caisar-ir.opam b/caisar-ir.opam new file mode 100644 index 0000000000000000000000000000000000000000..6841ff21dcc2a4a9b8763fa0ac51e1992ff66a87 --- /dev/null +++ b/caisar-ir.opam @@ -0,0 +1,39 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "0.1" +synopsis: "CAISAR's intermediate representation" +maintainer: [ + "LAISER team, Software Safety and Security Laboratory, CEA-List" +] +authors: ["LAISER team, Software Safety and Security Laboratory, CEA-List"] +license: "LGPL-2.1-only" +homepage: "https://git.frama-c.com/pub/caisar" +doc: "https://git.frama-c.com/pub/caisar" +bug-reports: "https://git.frama-c.com/pub/caisar/issues" +depends: [ + "ocaml" {>= "4.13"} + "dune" {>= "2.9" & >= "2.9.3"} + "base" {>= "v0.14.0"} + "ocaml-protoc-plugin" {= "4.2.0"} + "ocamlgraph" {>= "1.8.8"} + "ppx_inline_test" {>= "0.12.0"} + "ppx_deriving" {>= "4.4.1"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "--promote-install-files=false" + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] + ["dune" "install" "-p" name "--create-install-files" name] +] +dev-repo: "git+https://git.frama-c.com/pub/caisar.git" diff --git a/caisar.opam b/caisar.opam index 6bf5ad2fd63d1614b91fcdf1549690b2da6eb413..60e13a6be66cd8713e6761e7332afea6635a3806 100644 --- a/caisar.opam +++ b/caisar.opam @@ -34,6 +34,7 @@ depends: [ "caisar-nnet" {= version} "caisar-ovo" {= version} "caisar-onnx" {= version} + "caisar-ir" {= version} "odoc" {with-doc} ] build: [ diff --git a/dune-project b/dune-project index e2b7cc99e4891e854084ef877e19bc32d6fc8dd9..8d1d7d4e4b5e30813740dba7765442460b82774c 100644 --- a/dune-project +++ b/dune-project @@ -48,6 +48,20 @@ ) ) +(package + (name caisar-ir) + (synopsis "CAISAR's intermediate representation") + (depends + (ocaml (>= 4.13)) + (dune (>= 2.9.3)) + (base (>= v0.14.0)) + (ocaml-protoc-plugin (= 4.2.0)) + (ocamlgraph (>= 1.8.8)) + (ppx_inline_test (>= 0.12.0)) + (ppx_deriving (>= 4.4.1)) + ) +) + (package (name caisar) (synopsis "A platform for characterizing the safety and robustness of artificial intelligence based software") @@ -73,6 +87,7 @@ (caisar-nnet (= :version)) (caisar-ovo (= :version)) (caisar-onnx (= :version)) + (caisar-ir (= :version)) ) (sites (share stdlib) diff --git a/lib/ir/dune b/lib/ir/dune new file mode 100644 index 0000000000000000000000000000000000000000..e7d5f582a639f094be25b03fa2d47e57ba1628c0 --- /dev/null +++ b/lib/ir/dune @@ -0,0 +1,17 @@ +(library + (name ir) + (public_name caisar-ir) + (preprocess + (pps + ppx_inline_test + ppx_deriving.map + ppx_deriving.show + ppx_deriving.iter + ppx_deriving.fold)) + (inline_tests) + (libraries base ocplib-endian piqirun.pb zarith ocamlgraph stdio)) + +(env + (dev + (flags (:standard -warn-error -A)) + )) diff --git a/lib/ir/nier_cfg.ml b/lib/ir/nier_cfg.ml new file mode 100644 index 0000000000000000000000000000000000000000..9c9008e6c3b3ad05a638ffc3eff99bacdeaca58a --- /dev/null +++ b/lib/ir/nier_cfg.ml @@ -0,0 +1,371 @@ +open Base +open Stdio +open Bigarray + +module Tensor = struct + type ('a, 'b) t = ('a, 'b, c_layout) Genarray.t + type shape = int array [@@deriving show] + + type ('a, 'b) t_kind = + | K_int : (int64, int64_elt) t_kind + | K_float : (float, float64_elt) t_kind + + let create : type a b. shape -> (a, b) t_kind -> (a, b) t = + fun shape -> function + | K_float -> Genarray.create float64 c_layout shape + | K_int -> Genarray.create int64 c_layout shape + + let unsqueeze ~sh1 ~sh2 = + let sh1, sh2 = (Array.to_list sh1, Array.to_list sh2) in + let longest, shortest = + match List.length sh1 > List.length sh2 with + | true -> (sh1, sh2) + | false -> (sh2, sh1) + in + (*find the index of the potential additional dimension*) + let where_zero = + match List.nth_exn longest 0 with + | 0 -> Some 0 + | _ -> ( + match List.last_exn longest with + | 0 -> Some (List.length longest - 1) + | _ -> None) + in + match where_zero with + | Some idx -> ( + match List.sub longest ~pos:idx ~len:(List.length shortest) with + | [] -> None + | _ -> Some (Array.of_list longest)) + | None -> None + + let get t idx = Genarray.get t idx + let set t idx v = Genarray.set t idx v + + let all_coords sh = + let sh = Array.to_list sh in + let rec ranges acc shape = + match shape with + | x :: y -> ranges (List.init x ~f:(fun i -> i) :: acc) y + | [] -> acc + (* a list containing a list of all possible indexes, for each dimension *) + in + let xxs = ranges [] sh in + (* add to each element of the list of all possible coordinates all*) + (* * possible indexes ... *) + let aux acc xs = + List.concat + @@ List.map xs ~f:(fun x -> List.map ~f:(fun lt -> x :: lt) acc) + (* ... for each dimension, starting from an empty list of*) + (* * possible coordinates *) + in + List.fold xxs ~init:[ [] ] ~f:aux + + let flatten t = + let shape = Genarray.dims t in + let all_idxs = all_coords shape in + List.init (List.length all_idxs) ~f:(fun i -> + get t (Array.of_list @@ List.nth_exn all_idxs i)) + + let get_shape t = Genarray.dims t + + let equal f t1 t2 = + let t1_sh = get_shape t1 and t2_sh = get_shape t2 in + if Array.equal ( = ) t1_sh t2_sh + then + let all_idxs = all_coords (get_shape t1) in + List.fold + ~f:(fun acc x -> + if acc + then f (get t1 (Array.of_list x)) (get t2 (Array.of_list x)) + else false) + all_idxs ~init:true + else false + + let num_neurons sh = Array.fold ~init:1 ~f:(fun x y -> x * y) sh + + let get_flatnd_idx ~idx ~sh flt = + let sh = Array.to_list sh in + let pop_sh = List.tl_exn sh @ [ 1 ] in + let rec get_factors_from_sh sh_f l = + match sh_f with + | [] -> List.rev l + | _ -> + get_factors_from_sh (List.tl_exn sh_f) + (List.fold ~f:(fun x y -> x * y) ~init:1 sh_f :: l) + in + let factors = get_factors_from_sh pop_sh [] in + let coord_in_data = + match + List.fold2 + ~f:(fun x y z -> x + (y * z)) + ~init:0 (Array.to_list idx) factors + with + | List.Or_unequal_lengths.Ok i -> i + | List.Or_unequal_lengths.Unequal_lengths -> failwith "Unequal lengths" + in + List.nth_exn flt coord_in_data + + let transpose_2d _t = assert false +end + +module Node = struct + type shape = int array + + let show_shape sh = + let sh = Array.to_list sh in + "[" + ^ Int.to_string (List.hd_exn sh) + ^ List.fold_left + ~f:(fun acc e -> acc ^ ", " ^ Int.to_string e) + ~init:"" (List.tl_exn sh) + ^ "]" + + type operator = + | Add + | Mul + | Matmul + | LogSoftmax + | ReLu + | Transpose + | Squeeze + | MaxPool + | Conv + | Identity + | NO_OP + | RW_Linearized_ReLu + + let str_op o = + match o with + | Add -> "Add" + | Mul -> "Mul" + | Matmul -> "Matmul" + | LogSoftmax -> "LogSoftmax" + | ReLu -> "ReLu" + | Transpose -> "Transpose" + | Squeeze -> "Squeeze" + | MaxPool -> "MaxPool" + | Conv -> "Conv" + | Identity -> "Identity" + | NO_OP -> "NO_OP" + | RW_Linearized_ReLu -> "RW_Linearized_ReLu" + + let str_shape sh = + match sh with + | [] -> "[]" + | _ -> + "[" + ^ Int.to_string (List.hd_exn sh) + ^ List.fold_left + ~f:(fun acc e -> acc ^ ", " ^ Int.to_string e) + ~init:"" (List.tl_exn sh) + ^ "]" + + type ksize = Ksize of shape + type stride = Stride of shape + type pads = Pads of shape + type dilations = Dilations of shape + + type operator_parameters = + | Pool_params of (ksize * stride option * pads option * dilations option) + | Conv_params of (ksize * stride option * pads option * dilations option) + | Transpose_params of shape + | RW_Linearized_ReLu_params of + (bool list list * ((string, float) Base.Hashtbl.t list * int)) + + let str_op_params p = + match p with + | Transpose_params s -> + let str_sh = show_shape s in + "Transpose params: " ^ str_sh + | Pool_params (Ksize k, s, p, d) | Conv_params (Ksize k, s, p, d) -> + let str_k = show_shape k + and str_s = match s with None -> "" | Some (Stride ss) -> show_shape ss + and str_p = match p with None -> "" | Some (Pads pp) -> show_shape pp + and str_d = + match d with None -> "" | Some (Dilations dd) -> show_shape dd + in + "Pool params: KSIZE: " ^ str_k ^ ", Pads: " ^ str_p ^ ", Stride: " ^ str_s + ^ ", Dilations: " ^ str_d + | RW_Linearized_ReLu_params l -> + (* Only displays the activation scheme on the ReLU node *) + let activations = fst l in + let act' = + List.map + ~f:(fun l1 -> + List.map + ~f:(fun b -> match b with true -> "true" | false -> "false") + l1) + activations + in + let act'' = + List.map ~f:(fun l -> "[" ^ String.concat ~sep:";" l ^ "]") act' + in + let act''' = "[" ^ String.concat ~sep:";" act'' ^ "]" in + "RW_Linearized_ReLu_params: " ^ act''' + + type ('a, 'b) t = { + id : int; + name : string option; + mutable shape : shape; + operator : operator; + operator_parameters : operator_parameters option; + pred : string list; + succ : string list; + tensor : ('a, 'b) Tensor.t option; + } + + let compare v1 v2 = Stdlib.compare v1.id v2.id + let hash (v : ('a, 'b) t) = v.id + let equal v1 v2 = v1.id = v2.id + + let create ~id ~name ~sh ~op ~op_p ~pred ~succ ~tensor = + { + id; + name; + shape = sh; + operator = op; + operator_parameters = op_p; + pred; + succ; + tensor; + } + + let get_shape t = t.shape + let get_op t = t.operator + let get_tensor t = t.tensor + let is_data_node t = match get_tensor t with None -> false | Some _ -> true + + let num_neurons t = + match get_shape t with + | [||] -> 0 + | l -> Array.fold ~init:1 ~f:(fun x acc -> x * acc) l +end + +module type VInput = sig + type l + type r +end + +module MakeVertex (I : VInput) = struct + type t = (I.l, I.r) Node.t + + let compare = Node.compare + let hash = Node.hash + let equal = Node.equal + + type label = string + + let label (n : t) = match n.Node.name with Some n -> n | None -> "" + let create _name = assert false +end + +module Edge = struct + type t = string + + let compare = Stdlib.compare + let equal = phys_equal + let default = "" +end + +module NierCFG (I : VInput) = struct + module Vertex = MakeVertex (I) + include Graph.Imperative.Digraph.ConcreteBidirectionalLabeled (Vertex) (Edge) + + let vertex_list g = fold_vertex (fun x l -> x :: l) g [] + + let input_nodes g = + let input_criterion (v : ('a, 'b) Node.t) acc = + match v.id with 0 -> Some v | _ -> acc + in + match fold_vertex (fun v acc -> input_criterion v acc) g None with + | Some r -> [ r ] + | None -> failwith "Something strange, no node for describing inputs found" + + let preds g v = pred g v + + let preds_names g v = + let preds_list = pred_e g v in + List.fold ~init:[] ~f:(fun acc (_, n, _) -> n :: acc) preds_list + + let succs_names g v = + let succs_list = succ_e g v in + List.fold ~init:[] ~f:(fun acc (_, n, _) -> n :: acc) succs_list + + let succs g v = pred g v + let init_cfg = create () + + let find_vertices g f = + fold_vertex (fun x l -> if f x then x :: l else l) g [] +end + +module NierCFGInt = NierCFG (struct + type l = int64 + type r = int64_elt +end) + +module NierCFGFloat = NierCFG (struct + type l = float + type r = float64_elt +end) + +module NierCFGDot = Graph.Graphviz.Dot (struct + include NierCFGFloat (* use the graph module from above *) + + let node_label (v : vertex) = + let id = Int.to_string v.id in + let name = match v.name with Some n -> n | None -> "C_NODE" + and operator = Node.str_op v.operator + and operator_parameters = + match v.operator_parameters with + | Some p -> Node.str_op_params p + | None -> "no parameters" + and shape = Node.show_shape v.shape + and prevs = List.fold_left ~f:(fun x y -> x ^ "," ^ y) ~init:"" v.pred + and nexts = List.fold_left ~f:(fun x y -> x ^ "," ^ y) ~init:"" v.succ + and tensor = + match v.tensor with + (*limit of size for tensor strings, complying with + * dot string size limit of 16Ko *) + | Some t -> + let display_indices = + let all_indices = Tensor.all_coords (Tensor.get_shape t) in + if List.length all_indices > 10 + then + let rec firstk k xs = + match xs with + | [] -> failwith "firstk" + | x :: xs -> if k = 1 then [ x ] else x :: firstk (k - 1) xs + in + firstk 10 all_indices + else all_indices + in + let t_value_string = + List.fold_left + ~f:(fun acc l -> + acc ^ Node.str_shape l ^ ": " + ^ Float.to_string (Tensor.get t (Array.of_list l)) + ^ "\n") + ~init:"" display_indices + in + "Tensor value\n: " ^ t_value_string ^ "\nShape: " + ^ Node.str_shape (Array.to_list @@ Tensor.get_shape t) + | None -> "No tensor in node" + in + "ID :" ^ id ^ "\nNAME: " ^ name ^ "\nOP: " ^ operator ^ "\nOP PARAMS:" + ^ operator_parameters ^ "\nSHAPE: " ^ shape ^ "\nPREVS: " ^ prevs + ^ "\nNEXTS: " ^ nexts ^ "\nTENSORS INFOS:" ^ tensor + + let edge_attributes (_, e, _) = [ `Label e; `Color 4711 ] + let default_edge_attributes _ = [] + let get_subgraph _ = None + let vertex_attributes v = [ `Shape `Box; `Label (node_label v) ] + let vertex_name (v : vertex) = Int.to_string v.id + let default_vertex_attributes _ = [] + let graph_attributes _ = [] +end) + +let print_cfg_graph g = NierCFGDot.fprint_graph Caml.Format.std_formatter g + +let out_cfg_graph g = + let file = Out_channel.create "cfg.dot" in + NierCFGDot.output_graph file g diff --git a/lib/ir/nier_cfg.mli b/lib/ir/nier_cfg.mli new file mode 100644 index 0000000000000000000000000000000000000000..2c5896ef49653c1eefe5443675895d93522fe1ba --- /dev/null +++ b/lib/ir/nier_cfg.mli @@ -0,0 +1,221 @@ +(** This module defines the structure and interfaces for a Neural IntermediatE + Representation (NIER). + + It is primarly designed as an intermediate state into producing verifiable + terms from an ONNX model. *) + +open Base +open Bigarray + +(** {1 Tensor module} *) + +(** Tensors are multidimensional arrays used to represent numerical such as a + neural network weight *) + +module Tensor : sig + type ('a, 'b) t = ('a, 'b, c_layout) Genarray.t + type shape = int array [@@deriving show] + + val all_coords : shape -> int list list + + (** [create sh] initialize a tensor with the given shape [sh] with a default + value, depending of the type of the tensor*) + + type ('a, 'b) t_kind = + | K_int : (int64, int64_elt) t_kind + | K_float : (float, float64_elt) t_kind + + val create : shape -> ('a, 'b) t_kind -> ('a, 'b) t + + (** [get_idx t idx] returns the value in tensor [t] stored at coordinates + [idx]. Throw an error if the coordinate is invalid.*) + + val get : ('a, 'b) t -> shape -> 'a + + (** [set_idx t idx v] sets value [v] for tensor [t] at [idx]. Throw an error + if the coordinate is invalid.*) + + val set : ('a, 'b) t -> shape -> 'a -> unit + + (** [equal f t1 t2] applies [f] to all values of [t1] and [t2], and returns + true if all applications of f returned true. *) + + val equal : ('a -> 'a -> bool) -> ('a, 'b) t -> ('a, 'b) t -> bool + + (** [get_shape t] returns the shape of [t]. *) + + val get_shape : ('a, 'b) t -> shape + + (** [flatten t] returns a flattened version of [t]. *) + + val flatten : ('a, 'b) t -> 'a list + + (** [num_neurons sh] returns the total number of neurons given a shape *) + + val num_neurons : shape -> int + + (** [get flatnd_idx idx sh flt] returns the value that would be stored at + index [idx] under a tensor of shape [sh], given the flattened version of + this tensor [flt].*) + + val get_flatnd_idx : idx:shape -> sh:shape -> 'a list -> 'a + + (** [transpose_2d t] returns a copy of the tensor [t] with its two last + dimension exchanged.*) + + val transpose_2d : ('a, 'b) t -> ('a, 'b) t + + (** [unsqueeze sh1 sh2] returns the lowest common shape between [sh1] and + [sh2], and None if there is no common shape. A common shape is when a + shape of higher dimension has only 1 coordinates on non-shared dimensions + with the other. *) + + val unsqueeze : sh1:shape -> sh2:shape -> shape option +end + +(** {1 Modules for graph generation} *) + +module Node : sig + type shape = int array + + type operator = + | Add + | Mul + | Matmul + | LogSoftmax + | ReLu + | Transpose + | Squeeze + | MaxPool + | Conv + | Identity + | NO_OP + | RW_Linearized_ReLu + + (** Type describing the different operations handled. Those operations are + inspired by those defined in the ONNX documentation. + + @see <https://github.com/onnx/onnx/blob/master/docs/Operators.md> + for more informations. They are to be coupled with the relevant + operators parameters. *) + + val str_op : operator -> string + val show_shape : shape -> string + + type ksize = Ksize of shape + type stride = Stride of shape + type pads = Pads of shape + type dilations = Dilations of shape + + type operator_parameters = + | Pool_params of (ksize * stride option * pads option * dilations option) + | Conv_params of (ksize * stride option * pads option * dilations option) + | Transpose_params of shape + | RW_Linearized_ReLu_params of + (bool list list * ((string, float) Base.Hashtbl.t list * int)) + + val str_op_params : operator_parameters -> string + + type ('a, 'b) t = { + id : int; + name : string option; + mutable shape : shape; + operator : operator; + operator_parameters : operator_parameters option; + pred : string list; + succ : string list; + tensor : ('a, 'b) Tensor.t option; + } + (** Type encapsulating parameters for operations. For Convolutions and + Pooling, kernel size, padding, strides For Transpose, shape *) + + val compare : ('a, 'b) t -> ('a, 'b) t -> int + val hash : ('a, 'b) t -> int + val equal : ('a, 'b) t -> ('a, 'b) t -> bool + + val create : + id:int -> + name:string option -> + sh:shape -> + op:operator -> + op_p:operator_parameters option -> + pred:string list -> + succ:string list -> + tensor:('a, 'b) Tensor.t option -> + ('a, 'b) t + + val get_shape : ('a, 'b) t -> shape + val get_op : ('a, 'b) t -> operator + val get_tensor : ('a, 'b) t -> ('a, 'b) Tensor.t option + val is_data_node : ('a, 'b) t -> bool + val num_neurons : ('a, 'b) t -> int +end + +module type VInput = sig + type l + type r +end + +module MakeVertex (I : VInput) : sig + include Graph.Sig.VERTEX with type t = (I.l, I.r) Node.t +end + +module Edge : sig + type t = string + + val compare : 'a -> 'a -> int + val equal : 'a -> 'a -> bool + val default : t +end + +(** NIER is a graph {b (V,E)} where {b V} is the set of vertices (nodes) and + {b E} is the set of edges (connections between nodes). Nodes contains the + following informations: + + - unique id + - name coming from the original model, if it exists + - shape of the tensor resulting from the application of the node operation, + if it exist + - operation performed + - parameters of the operation + - an optional tensor storing the data + + Note that tensor have their own shape; they must be equal to the NIER's node + shape however. *) + +module NierCFG (I : VInput) : sig + include + Graph.Sig.I + with type V.t = MakeVertex(I).t + and type V.label = MakeVertex(I).t + and type E.t = MakeVertex(I).t * Edge.t * MakeVertex(I).t + and type E.label = Edge.t + + val init_cfg : t + val vertex_list : t -> vertex list + val preds : t -> vertex -> vertex list + + (** [preds_names g v] returns a list of names of predecessors nodes *) + + val preds_names : t -> vertex -> string list + val succs : t -> vertex -> vertex list + + (** [succs_names g v] returns a list of names of predecessors nodes *) + + val succs_names : t -> vertex -> string list + + (** [input_node g] returns the nodes considered as describing the inputs of + the neural network. *) + + val input_nodes : t -> vertex list + val find_vertices : t -> (vertex -> bool) -> vertex list +end + +module NierCFGFloat : sig + type t +end + +(** {1 Pretty printers} *) + +val print_cfg_graph : NierCFGFloat.t -> unit +val out_cfg_graph : NierCFGFloat.t -> unit diff --git a/lib/onnx/dune b/lib/onnx/dune index 1008baf43c6b2f5790fbed38985ca6e9e364c3ca..aff7ac603d9995096fa6fdbf265cab8eec9f6475 100644 --- a/lib/onnx/dune +++ b/lib/onnx/dune @@ -1,7 +1,7 @@ (library (name onnx) (public_name caisar-onnx) - (libraries base stdio ocaml-protoc-plugin) + (libraries base stdio ocaml-protoc-plugin caisar-ir) (synopsis "ONNX parser for CAISAR")) (rule diff --git a/lib/onnx/onnx.ml b/lib/onnx/onnx.ml index 2ee9221d2a1eba14f78c7e8629034192ff234879..a499684ad7aa33e6b8baa1637b60d72db5f8096b 100644 --- a/lib/onnx/onnx.ml +++ b/lib/onnx/onnx.ml @@ -25,6 +25,14 @@ module Format = Caml.Format module Fun = Caml.Fun module Oproto = Onnx_protoc (* Autogenerated during compilation *) module Oprotom = Oproto.Onnx.ModelProto +module NCFG = Ir.Nier_cfg + +module N = NCFG.NierCFG (struct + type l = float + type r = Bigarray.float64_elt +end) + +exception ParseError of string type t = { n_inputs : int; (* Number of inputs. *) @@ -32,6 +40,30 @@ type t = { } (* ONNX format handling. *) +type op_attribute = Oproto.Onnx.AttributeProto.t +type tensordata = Raw of bytes | Float of float list + +let (no_attr : op_attribute) = + { + name = None; + ref_attr_name = None; + doc_string = None; + type' = None; + f = None; + i = None; + s = None; + t = None; + g = None; + floats = []; + ints = []; + strings = []; + tensors = []; + graphs = []; + sparse_tensor = None; + tp = None; + sparse_tensors = []; + type_protos = []; + } let get_nested_dims (s : Oproto.Onnx.ValueInfoProto.t list) = match List.nth s 0 with @@ -64,6 +96,413 @@ let get_input_output_dim (model : Oprotom.t) = let output_flat_dim = flattened_dim output_shape in (input_flat_dim, output_flat_dim) +let produce_cfg (g : Oproto.Onnx.GraphProto.t) = + let open Oproto.Onnx in + let nodes = g.node + and inputs = g.input + and outputs = g.output + and initi = g.initializer' in + let fold_vip_names acc n = + match n.ValueInfoProto.name with + | Some str -> Some str :: acc + | None -> None :: acc + in + let i_nodes, o_nodes = + ( List.fold inputs ~init:[] ~f:fold_vip_names, + List.fold outputs ~init:[] ~f:fold_vip_names ) + and c_nodes = List.init (List.length nodes) ~f:(fun _ -> None) in + let fold_nodes_ops_cfg ns = + let get_node_operator_cfg x = + match x.NodeProto.op_type with + | None -> NCFG.Node.NO_OP + | Some o -> ( + match o with + | "Add" -> NCFG.Node.Add + | "Mul" -> NCFG.Node.Mul + | "Relu" -> NCFG.Node.ReLu + | "MatMul" -> NCFG.Node.Matmul + | "LogSoftmax" -> NCFG.Node.LogSoftmax + | "Transpose" -> NCFG.Node.Transpose + | "Squeeze" -> NCFG.Node.Squeeze + | "MaxPool" -> NCFG.Node.MaxPool + | "Conv" -> NCFG.Node.Conv + | "Identity" -> NCFG.Node.Identity + | _ -> + raise (ParseError ("Unsupported ONNX Operator in\n Parser: " ^ o))) + in + List.fold ~f:(fun acc n -> get_node_operator_cfg n :: acc) ~init:[] ns + in + let c_ops = fold_nodes_ops_cfg nodes + and i_ops, o_ops = + ( List.init ~f:(fun _ -> NCFG.Node.NO_OP) (List.length i_nodes), + List.init ~f:(fun _ -> NCFG.Node.NO_OP) (List.length o_nodes) ) + in + let fold_nodes_attr ns = + let get_node_attr n = n.NodeProto.attribute in + List.fold ~f:(fun acc n -> get_node_attr n :: acc) ~init:[] ns + in + + let c_attr = fold_nodes_attr nodes + and i_attr, o_attr = + ( List.init ~f:(fun _ -> [ no_attr ]) (List.length i_nodes), + List.init ~f:(fun _ -> [ no_attr ]) (List.length o_nodes) ) + in + let c_nodes_inputs, c_nodes_outputs = + List.unzip + @@ List.fold + ~f:(fun acc n -> (n.NodeProto.input, n.NodeProto.output) :: acc) + ~init:[] nodes + and i_nodes_inputs, i_nodes_outputs, o_nodes_inputs, o_nodes_outputs = + ( List.init ~f:(fun _ -> [ "NO_INPUT" ]) (List.length i_nodes), + List.init ~f:(fun _ -> [ "NO_OUTPUT" ]) (List.length i_nodes), + List.init ~f:(fun _ -> [ "NO_INPUT" ]) (List.length o_nodes), + List.init ~f:(fun _ -> [ "NO_OUTPUT" ]) (List.length o_nodes) ) + in + let data_dict = + let dict_tensors_cfg ts = + let get_float_from_index index data sh = + let index = Array.to_list index and sh = Array.to_list sh in + let pop_sh = List.tl_exn sh @ [ 1 ] in + (* Returns the factors by which multiply each coordinate *) + let rec get_factors_from_sh sh_f l = + match sh_f with + | [] -> List.rev l + | _ -> + get_factors_from_sh (List.tl_exn sh_f) + (List.fold ~f:(fun x y -> x * y) ~init:1 sh_f :: l) + in + let factors = get_factors_from_sh pop_sh [] in + let coord_in_data = + List.fold2_exn ~f:(fun x y z -> x + (y * z)) ~init:0 index factors + in + match data with + | Raw raw -> + let offset = 4 * coord_in_data in + (* Each float is coded on 4 bytes*) + let res = EndianBytes.LittleEndian.get_float raw offset in + res + | Float f -> List.nth_exn f coord_in_data + in + let build_tensor_from_data sh data = + let open NCFG.Tensor in + let sh = Array.of_list @@ sh in + let tensor = create sh K_float in + let coords = all_coords (get_shape tensor) in + let rec init_tensor t idx r = + match idx with + | x :: y -> + let value = + get_float_from_index (Array.of_list x) r (get_shape t) + in + set t (Array.of_list x) value; + init_tensor t y r + | [] -> t + in + init_tensor tensor coords data + in + let t_name x = + match x.TensorProto.name with Some n -> n | None -> "C_NODE" + in + let t_dim x = x.TensorProto.dims in + let t_data x = + match x.TensorProto.raw_data with + | Some rd -> Some (build_tensor_from_data (t_dim x) (Raw rd)) + | None -> ( + match x.TensorProto.float_data with + | [] -> None + | f -> Some (build_tensor_from_data (t_dim x) (Float f))) + in + List.fold + ~f:(fun m x -> Map.add_exn m ~key:(t_name x) ~data:(t_data x)) + ~init:(Map.empty (module String)) + ts + in + dict_tensors_cfg initi + in + let unpack v = + match v with + | Some v -> v + | None -> failwith "error, unpack found an unexpected None" + in + let tensor_list = + List.init + ~f:(fun i -> + match Map.find data_dict (unpack (List.nth_exn i_nodes i)) with + | Some v -> v + | None -> None) + (List.length i_nodes) + in + let tensor_list_full = Map.to_alist data_dict in + let tensor_list_rev = List.rev tensor_list in + let vip_dims v = + let val_t = + match v.ValueInfoProto.type' with + | Some t -> t + | None -> failwith "No type in value info" + in + let tns_t = + match val_t.TypeProto.value with + | `Tensor_type t -> t + | `not_set -> + failwith "No tensor type in value info" + (* TODO: support more tensor types *) + | _ -> raise (ParseError "Unknown tensor type.") + in + let tns_s = + match tns_t.shape with + | Some s -> s + | None -> failwith "No tensor shape in value info" + in + List.fold tns_s ~init:[] ~f:(fun acc d -> + match d.value with `Dim_value d -> d :: acc | `not_set | _ -> 0 :: acc) + in + + let c_tensordim_list = List.init (List.length nodes) ~f:(fun _ -> []) + and c_tensorraw_list = List.init (List.length nodes) ~f:(fun _ -> None) + and o_tensordim_list = + List.fold ~f:(fun acc n -> vip_dims n :: acc) ~init:[] outputs + and o_tensorraw_list = List.init (List.length o_nodes) ~f:(fun _ -> None) + and i_tensordim_list = + List.fold ~f:(fun acc n -> vip_dims n :: acc) ~init:[] inputs + and i_tensorraw_list = tensor_list_rev in + let nodes_names = i_nodes @ c_nodes @ o_nodes in + let ops = i_ops @ c_ops @ o_ops in + let attrs = i_attr @ c_attr @ o_attr in + let prevs_list = i_nodes_inputs @ c_nodes_inputs @ o_nodes_inputs in + let nexts_list = i_nodes_outputs @ c_nodes_outputs @ o_nodes_outputs in + let tensor_dims = i_tensordim_list @ c_tensordim_list @ o_tensordim_list in + let tensors = i_tensorraw_list @ c_tensorraw_list @ o_tensorraw_list in + let operator_parameters (attr : AttributeProto.t list) op = + match op with + | NCFG.Node.Transpose -> + let ints_params = Array.of_list (List.nth_exn attr 0).ints in + Some (NCFG.Node.Transpose_params ints_params) + (*TODO: maxpool and conv operators: match attr.name in attributes to + * create the correct value for each attribute*) + (* | NCFG.Vertex.MaxPool -> *) + (* | NCFG.Vertex.Conv -> *) + | _ -> None + in + let rec build_op_param_list attrs ops l = + match (attrs, ops) with + | a :: b, c :: d -> build_op_param_list b d (operator_parameters a c :: l) + | [], [] -> + List.rev l + (*All other list constructions are folding right, so we need to put a + final revert *) + | _ -> + raise + (ParseError + "Error, operators and attributes list have not\n the same size") + in + let op_params_cfg = build_op_param_list attrs ops [] in + let cfg = N.init_cfg in + (* adding inputs, outputs and cnodes to the cfg *) + let unkerasize l = List.map ~f:(fun x -> if x = 0 then 1 else x) l in + for i = 0 to List.length nodes_names - 1 do + let (v : N.V.t) = + NCFG.Node.create ~id:i + ~name:(List.nth_exn nodes_names i) + ~sh:(Array.of_list @@ unkerasize (List.nth_exn tensor_dims i)) + ~op:(List.nth_exn ops i) + ~op_p:(List.nth_exn op_params_cfg i) + ~pred:(List.nth_exn prevs_list i) + ~succ:(List.nth_exn nexts_list i) + ~tensor:(List.nth_exn tensors i) + in + N.add_vertex cfg v + done; + (* Adding edges between vertices *) + (* For each unnamed vertex (= a calculation node) in the cfg ... *) + (* return true if l1 has at least one common element wih l2 *) + let rec shared_elm l1 l2 = + match l1 with + | x :: y -> List.mem l2 x ~equal:String.equal || shared_elm y l2 + | [] -> false + in + List.iter + ~f:(fun (v : N.V.t) -> + match v.name with + | None -> + let pred = v.pred and succ = v.succ in + let prev_v = + (* ... get all vertices in cfg that have the current vertex preds + * in their succ (at least one of their succ is inside our preds )*) + N.find_vertices cfg (fun (x : N.V.t) -> + if shared_elm pred x.succ then true else false) + (* ... get all vertices in cfg that have the current vertex preds + * in their name (they are named the same as one of our preds )*) + and named_pred = + N.find_vertices cfg (fun (x : N.V.t) -> + match x.name with + | Some name -> if shared_elm pred [ name ] then true else false + | None -> false) + (* ... get all vertices in cfg that have the current vertex succ + * in their name (they are named the same as one of our succs )*) + and named_succ = + N.find_vertices cfg (fun (x : N.V.t) -> + match x.name with + | Some name -> if shared_elm succ [ name ] then true else false + | None -> false) + (* get all vertices in cfg that have the current vertex succs + * in their preds (at least one of their preds is inside our succ )*) + and next_v = + N.find_vertices cfg (fun (x : N.V.t) -> + if shared_elm succ x.pred then true else false) + in + (* add edges between current vertex and identified preds and succs *) + let v_predecessors = prev_v @ named_pred + and v_successors = next_v @ named_succ in + let unpack_tname (x : N.V.t) = + match x.NCFG.Node.name with Some n -> n | None -> "" + in + List.iter + ~f:(fun (x : N.V.t) -> + let label = + match List.nth x.succ 0 with + | Some "NO_OUTPUT" -> + let pred_name = unpack_tname x in + if List.mem ~equal:String.equal v.NCFG.Node.pred pred_name + then pred_name + else "" + | Some l -> l + | None -> "" + in + N.add_edge_e cfg (x, label, v)) + v_predecessors; + (* add successors edges after filtering those + * that are already an edge*) + List.iter + ~f:(fun (x : N.V.t) -> + let all_preds = N.preds cfg x and all_succs = N.succs cfg x in + if List.mem ~equal:(NCFG.Node.equal) all_preds v || List.mem + ~equal:(NCFG.Node.equal) all_succs v + then () + else + let label = + match List.nth_exn x.pred 0 with + | "NO_INPUT" -> + let succ_name = unpack_tname x in + if List.mem ~equal:String.equal v.NCFG.Node.succ succ_name + then succ_name + else "" + | l -> l + in + N.add_edge_e cfg (v, label, x)) + v_successors + | _ -> ()) + (N.vertex_list cfg); + + (*rationale of the following: + * PyTorch stores network nodes in the field "inputs" of + * the ONNX graph g, and network parameters as a list of tensors + * in the ONNX initializer_. + * To make the two correspond, elements of g.inputs and g.initializer_ + * share the same value in the field "name". + * In Keras, elements of g.initializer_ have a name, but they do not + * correspond to any name in g.inputs. + * What we did before was then to create the actual nier cfg following the + * PyTorch way. + * Below, we complete the cfg with keras data by doing the following: + * * create a node for NIER for each tensor in onnx initializer_ + * * for each NIER node, check if there is a node sharing the same name + * pred + * * if yes, remove the one with highest ID (those are initi nodes, but since + * there is already a node in CFG with this name we do not + * need those) + * * if not, for each NIER node, chck if there is a node + * which name is contained in prevs. add it to the prev + * *) + + (* adding initi vertices to the cfg *) + for i = 0 to List.length tensor_list_full - 1 do + let shape = + match snd (List.nth_exn tensor_list_full i) with + | Some t -> unkerasize (Array.to_list @@ NCFG.Tensor.get_shape t) + | None -> [] + in + let (v : N.V.t) = + NCFG.Node.create + ~id:(i + List.length nodes_names) + ~name:(Some (fst (List.nth_exn tensor_list_full i))) + ~sh:(Array.of_list @@ unkerasize shape) + ~op:NO_OP ~op_p:None ~pred:[] ~succ:[] + ~tensor:(snd (List.nth_exn tensor_list_full i)) + in + N.add_vertex cfg v + done; + (* build a list of nodes + * sharing name but with different ids *) + let same_name_diff_ids = + let aux (x : N.V.t) = + N.fold_vertex + (fun y acc -> + match (x.name, y.name) with + | Some xa, Some ya -> + if (not (y.id = x.id)) && String.equal xa ya + then (x, y) :: acc + else acc + | _ -> acc) + cfg [] + in + N.fold_vertex (fun x l -> aux x :: l) cfg [] + in + let highest_ids = + List.fold + ~f:(fun acc x -> + match x with + | a :: _ -> + let maxval = max (fst a).NCFG.Node.id (snd a).NCFG.Node.id in + maxval :: acc + | [] -> acc) + ~init:[] same_name_diff_ids + in + (* (* removing nodes with highest id, those are the*) (* * ones we just added + *)*) + List.iter + ~f:(fun x -> + match x with + | l :: _ -> + let v1 = fst l in + if List.mem ~equal:( = ) highest_ids v1.NCFG.Node.id + then + (* Printf.printf "Removing id %d \n%!" *) + (* v1.NCFG.Vertex.id; *) + N.remove_vertex cfg v1 + else () + | [] -> ()) + same_name_diff_ids; + (* Now it is Keras time. + * Look for nodes sharing name and preds, + * then create edge *) + let shared_name_preds = + let aux (x : N.V.t) = + match x.name with + (* look in other vertices if name is among + * predecessors *) + | Some n -> N.find_vertices cfg (fun x -> shared_elm [ n ] x.pred) + | None -> [] + in + N.fold_vertex (fun x l -> (x, aux x) :: l) cfg [] + in + List.iter + ~f:(fun x -> + let orgn = fst x and to_edge = snd x in + List.iter + ~f:(fun t -> + if not (N.mem_edge cfg orgn t) + then N.add_edge_e cfg (orgn, unpack orgn.NCFG.Node.name, t) + else ()) + to_edge) + shared_name_preds; + (* else (); *) + cfg + +let nier_of_onnx_protoc (model : Oprotom.t) = + match model.graph with + | Some g -> produce_cfg g + | None -> raise (ParseError "No graph in ONNX input file!") + let parse_in_channel in_channel = let open Result in try @@ -72,7 +511,8 @@ let parse_in_channel in_channel = match Oprotom.from_proto reader with | Ok r -> let n_inputs, n_outputs = get_input_output_dim r in - Ok { n_inputs; n_outputs } + let nier = nier_of_onnx_protoc r in + Ok ({ n_inputs; n_outputs }, nier) | _ -> Error "Error parsing protobuf" with | Sys_error s -> Error s diff --git a/lib/onnx/onnx.mli b/lib/onnx/onnx.mli index 776e3124dfe291e614ce0d09fa127cd195f111bd..e31b0af0da9841c6fa28e900d0760fe218770912 100644 --- a/lib/onnx/onnx.mli +++ b/lib/onnx/onnx.mli @@ -20,11 +20,17 @@ (* *) (**************************************************************************) +module N : sig + type t +end + type t = private { n_inputs : int; (** Number of inputs. *) n_outputs : int; (** Number of outputs. *) } (** ONNX model metadata. *) -val parse : string -> (t, string) Result.t -(** Parse an ONNX file. *) +(** Parse an ONNX file to get metadata for CAISAR as well as its inner + intermediate representation for the network. *) + +val parse : string -> (t * N.t, string) Result.t diff --git a/src/language.ml b/src/language.ml index bbca3a69daad7882cc0352d155f8f66629ce7abe..ca792eeb5c05368e3556d0c5a6155009482fa750 100644 --- a/src/language.ml +++ b/src/language.ml @@ -92,7 +92,7 @@ let onnx_parser env _ filename _ = let model = Onnx.parse filename in match model with | Error s -> Loc.errorm "%s" s - | Ok model -> register_nn_as_tuple model.n_inputs model.n_outputs filename env + | Ok (model,_nier) -> register_nn_as_tuple model.n_inputs model.n_outputs filename env let ovo_parser env _ filename _ = let model = Ovo.parse filename in