[stdlib] Some rework and renaming for dataset classification theories.

ede4c07b · Michele Alberti · f4d97fd0 · ede4c07b · ede4c07b · ede4c07b
Commit ede4c07b authored 2 years ago by Michele Alberti
--- a/doc/mnist.rst
+++ b/doc/mnist.rst
@@ -68,7 +68,7 @@ The CAISAR standard library `caisar.mlw
 <https://git.frama-c.com/pub/caisar/-/blob/master/stdlib/caisar.mlw>`_ provides
 some utilities for dealing with verification problems about classification
 datasets. Of particular interest for us is the ``robust`` predicate, defined in
-the theory ``DataSetProps`` as follows:
+the theory ``DatasetClassificationProps`` as follows:
 .. code-block:: whyml
@@ -79,7 +79,7 @@ Note that the predicate is defined over a ``model``, a ``dataset`` and a
 floating-point value ``eps``. The latter determines the perturbation
 :math:`\epsilon`. The other two are custom WhyML types that respectively
 formalize the notions of classifier and dataset in CAISAR. These types are
-respectively defined in the ``Model`` and ``DataSetClassification`` theories.
+both defined in the ``DatasetClassification`` theory.
 Moreover, it is defined in terms of the predicate ``robust_at`` that formalizes
 the local robustness property:
@@ -104,7 +104,7 @@ values of types ``model`` and ``dataset``. For the former, CAISAR makes
 available the constant ``model`` upon interpreting the ``AsArray`` sub-theory
 that is built by the extension of the Why3 standard library for recognizing
 neural network ONNX and NNet formats. For the latter, the CAISAR standard
-library provides the constant ``dataset`` in ``DataSetClassification`` that will
+library provides the constant ``dataset`` in ``DatasetClassification`` that will
 be interpreted as the actual dataset the user needs to provide via the
 command-line interface when launching a CAISAR verification.
@@ -117,8 +117,8 @@ dataset, with each element's feature pertubed by :math:`1 \%`, looks like this:
   theory MNIST
     use MNIST_256_2.AsArray
     use ieee_float.Float64
-     use caisar.DataSetClassification
+     use caisar.DatasetClassification
-     use caisar.DataSetProps
+     use caisar.DatasetClassificationProps
     goal robustness:
      let normalized_dataset = min_max_scale true (0.0:t) (1.0:t) dataset in
@@ -126,7 +126,7 @@ dataset, with each element's feature pertubed by :math:`1 \%`, looks like this:
   end
 Note the presence of the ``min_max_scale`` function defined in
-``DataSetClassification`` for normalizing all feature values in :math:`[0,1]`.
+``DatasetClassification`` for normalizing all feature values in :math:`[0,1]`.
 Besides classic *Min-Max scaling*, CAISAR also provides ``z_norm`` function for
 *Z-normalization*.

--- a/examples/mnist/property.why
+++ b/examples/mnist/property.why
 theory MNIST
  use MNIST_256_2.AsArray
  use ieee_float.Float64
-  use caisar.DataSetClassification
+  use caisar.DatasetClassification
-  use caisar.DataSetProps
+  use caisar.DatasetClassificationProps
  goal robustness:
   let normalized_dataset = min_max_scale true (0.0:t) (1.0:t) dataset in

--- a/src/dataset.ml
+++ b/src/dataset.ml
@@ -141,7 +141,7 @@ let interpret_normalization =
    []
 let term_of_normalization env normalization =
-  let th = Pmodule.read_module env [ "caisar" ] "DataSetClassification" in
+  let th = Pmodule.read_module env [ "caisar" ] "DatasetClassification" in
  let t_dataset =
    let ls_dataset = Theory.ns_find_ls th.mod_theory.th_export [ "dataset" ] in
    Term.t_app_infer ls_dataset []
@@ -193,7 +193,7 @@ let string_of_predicate_kind = function
  | CondRobust _ -> "condrobust"
 let find_predicate_ls env p =
-  let th = Pmodule.read_module env [ "caisar" ] "DataSetProps" in
+  let th = Pmodule.read_module env [ "caisar" ] "DatasetClassificationProps" in
  Theory.ns_find_ls th.mod_theory.th_export [ p ]
 let failwith_unsupported_term t =

--- a/src/language.ml
+++ b/src/language.ml
@@ -72,7 +72,7 @@ let register_nn_as_tuple env nb_inputs nb_outputs filename ?nier mstr =
 let register_nn_as_array env nb_inputs nb_outputs filename ?nier mstr =
  let name = "AsArray" in
  let th_uc = Pmodule.create_module env (Ident.id_fresh name) in
-  let nn = Pmodule.read_module env [ "caisar" ] "Model" in
+  let nn = Pmodule.read_module env [ "caisar" ] "DatasetClassification" in
  let th_uc = Pmodule.use_export th_uc nn in
  let ty_data =
    Ty.ty_app Theory.(ns_find_ts nn.mod_theory.th_export [ "model" ]) []
@@ -89,7 +89,7 @@ let register_nn_as_array env nb_inputs nb_outputs filename ?nier mstr =
 let register_svm_as_array env nb_inputs nb_classes filename mstr =
  let name = "AsArray" in
  let th_uc = Pmodule.create_module env (Ident.id_fresh name) in
-  let svm = Pmodule.read_module env [ "caisar" ] "Model" in
+  let svm = Pmodule.read_module env [ "caisar" ] "DatasetClassification" in
  let th_uc = Pmodule.use_export th_uc svm in
  let svm_type =
    Ty.ty_app Theory.(ns_find_ts svm.mod_theory.th_export [ "model" ]) []

--- a/src/verification.ml
+++ b/src/verification.ml
@@ -90,20 +90,19 @@ module JSON = struct
      (Wstdlib.Mstr.find "AsArray" pmod).mod_theory
    in
    let th_float64 = Env.read_theory env [ "ieee_float" ] "Float64" in
-    let th_ds_classification =
+    let th_dsc = Pmodule.read_module env [ "caisar" ] "DatasetClassification" in
-      Pmodule.read_module env [ "caisar" ] "DataSetClassification"
+    let th_dsc_props =
+      Pmodule.read_module env [ "caisar" ] "DatasetClassificationProps"
    in
-    let th_ds_props = Pmodule.read_module env [ "caisar" ] "DataSetProps" in
    let name = "JSON" in
    let th_uc = Theory.create_theory (Ident.id_fresh name) in
    let th_uc = Theory.use_export th_uc th_as_array in
    let th_uc = Theory.use_export th_uc th_float64 in
-    let th_uc = Theory.use_export th_uc th_ds_classification.mod_theory in
+    let th_uc = Theory.use_export th_uc th_dsc.mod_theory in
-    let th_uc = Theory.use_export th_uc th_ds_props.mod_theory in
+    let th_uc = Theory.use_export th_uc th_dsc_props.mod_theory in
    let t_dataset =
      let ls_dataset =
-        Theory.ns_find_ls th_ds_classification.mod_theory.th_export
+        Theory.ns_find_ls th_dsc.mod_theory.th_export [ "dataset" ]
-          [ "dataset" ]
      in
      Term.t_app_infer ls_dataset []
    in
@@ -120,8 +119,7 @@ module JSON = struct
        let ls_dataset_norm =
          let ty =
            Ty.ty_app
-              (Theory.ns_find_ts th_ds_classification.mod_theory.th_export
+              (Theory.ns_find_ts th_dsc.mod_theory.th_export [ "dataset" ])
-                 [ "dataset" ])
              []
          in
          Term.create_fsymbol (Ident.id_fresh "dataset'") [] ty
@@ -138,12 +136,12 @@ module JSON = struct
        match t.property.kind with
        | Dataset.Correct ->
          let ls_correct =
-            Theory.ns_find_ls th_ds_props.mod_theory.th_export [ "correct" ]
+            Theory.ns_find_ls th_dsc_props.mod_theory.th_export [ "correct" ]
          in
          Term.t_app_infer ls_correct [ t_model; t_dataset ]
        | Robust eps ->
          let ls_robust =
-            Theory.ns_find_ls th_ds_props.mod_theory.th_export [ "robust" ]
+            Theory.ns_find_ls th_dsc_props.mod_theory.th_export [ "robust" ]
          in
          let t_eps = Dataset.term_of_eps env eps in
          Term.t_app_infer ls_robust [ t_model; t_dataset; t_eps ]

--- a/stdlib/caisar.mlw
+++ b/stdlib/caisar.mlw
@@ -20,7 +20,7 @@
 (*                                                                        *)
 (**************************************************************************)
-theory DataSetClassification
+theory DatasetClassification
  use ieee_float.Float64
  use int.Int
  use array.Array
@@ -32,7 +32,7 @@ theory DataSetClassification
  type dataset = {
    nb_features: int;
-    nb_label_s: int;
+    nb_labels: int;
    data: array datum
  }
@@ -40,11 +40,6 @@ theory DataSetClassification
  function min_max_scale (clip: bool) (min: t) (max: t) (d: dataset): dataset
  function z_norm (mean: t) (std_dev: t) (d: dataset): dataset
-end
-theory Model
-  use int.Int
-  use DataSetClassification
  type model = {
    nb_inputs: int;
@@ -54,12 +49,11 @@ theory Model
  function predict: model -> features -> label_
 end
-theory DataSetProps
+theory DatasetClassificationProps
  use ieee_float.Float64
  use int.Int
  use array.Array
-  use Model
+  use DatasetClassification
-  use DataSetClassification
  predicate linfty_distance (a: features) (b: features) (eps: t) =
    a.length = b.length /\
@@ -91,7 +85,6 @@ end
 theory NN
  (** Module defining commonly-met operations in neural networks. *)
  use ieee_float.Float64
-  use Model
  type input_type = t

--- a/tests/dataset.t
+++ b/tests/dataset.t
@@ -18,8 +18,8 @@ Test verify
  > theory T
  >   use TestNetwork.AsArray as TN
  >   use ieee_float.Float64
-  >   use caisar.DataSetClassification
+  >   use caisar.DatasetClassification
-  >   use caisar.DataSetProps
+  >   use caisar.DatasetClassificationProps
  > 
  >   goal G: correct TN.model dataset
  > 
@@ -35,8 +35,8 @@ Test verify
  > theory T
  >   use TestNetwork.AsArray as TN
  >   use ieee_float.Float64
-  >   use caisar.DataSetClassification
+  >   use caisar.DatasetClassification
-  >   use caisar.DataSetProps
+  >   use caisar.DatasetClassificationProps
  > 
  >   goal G: correct TN.model dataset
  > 

--- a/tests/saver.t
+++ b/tests/saver.t
@@ -15,8 +15,8 @@ Test verify
  > theory T
  >   use TestSVM.AsArray
  >   use ieee_float.Float64
-  >   use caisar.DataSetClassification
+  >   use caisar.DatasetClassification
-  >   use caisar.DataSetProps
+  >   use caisar.DatasetClassificationProps
  > 
  >   goal G: robust model dataset (8.0:t)
  >   goal H: correct model dataset