Skip to content
Snippets Groups Projects
Commit b585671d authored by Michele Alberti's avatar Michele Alberti
Browse files

[dataset] Remove normalization feature.

Some design rework needs to be done first.
parent 45c587c0
No related branches found
No related tags found
No related merge requests found
......@@ -35,7 +35,6 @@ type input = {
and property = {
dataset : string;
normalization : Dataset.normalization option;
kind : Dataset.property;
}
[@@deriving yojson, show]
......@@ -106,26 +105,6 @@ let theory_of_input env input =
let ls_model = Theory.ns_find_ls th_as_array.th_export [ "model" ] in
Term.t_app_infer ls_model []
in
(* Add dataset normalization. *)
let t_dataset, th_uc =
match input.property.normalization with
| None -> (t_dataset, th_uc)
| Some n ->
let t_normalization = Dataset.term_of_normalization env n in
let ls_dataset_norm =
let ty =
Ty.ty_app
(Theory.ns_find_ts th_dsc.mod_theory.th_export [ "dataset" ])
[]
in
Term.create_fsymbol (Ident.id_fresh "dataset'") [] ty
in
let ld_dataset_norm =
Decl.make_ls_defn ls_dataset_norm [] t_normalization
in
( Term.t_app_infer ls_dataset_norm [],
Theory.add_logic_decl th_uc [ ld_dataset_norm ] )
in
(* Create goal formula about specified property. *)
let th_uc =
let t_property =
......
......@@ -34,7 +34,6 @@ type input = private {
and property = private {
dataset : string;
normalization : Dataset.normalization option;
kind : Dataset.property;
}
[@@deriving yojson { strict = false }, show]
......
......@@ -40,131 +40,6 @@ let real_constant_of_float value =
let float_of_real_constant rc =
Float.of_string (Fmt.str "%a" Number.(print_real_constant full_support) rc)
(* min-max scaler *)
type minmax = {
clip : bool; (* whether needs clipping *)
min : float;
max : float;
}
[@@deriving yojson, show]
(* z-normalization *)
type znorm = {
mean : float; (* mean *)
std_dev : float; (* standard deviation *)
}
[@@deriving yojson, show]
type normalization =
| MinMax of minmax
| Znorm of znorm
[@@deriving yojson, show]
let do_clip ~min ~max x =
if Float.( <= ) x min then min else if Float.( >= ) x max then max else x
let clip = function
| MinMax { clip; min; max } ->
fun x -> if clip then do_clip ~min ~max x else x
| Znorm _ -> Fn.id
let apply_normalization dataset normalization =
let normalize =
match normalization with
| MinMax { clip; min; max } ->
let min_dataset, max_dataset =
let minimum = List.min_elt ~compare:Float.compare in
let maximum = List.max_elt ~compare:Float.compare in
List.fold_left dataset ~init:(Float.max_value, Float.min_value)
~f:(fun (min, max) record ->
match record with
| [] -> (min, max)
| _ ->
let features = List.(map (tl_exn record) ~f:Float.of_string) in
let min_record = Option.value ~default:min (minimum features) in
let max_record = Option.value ~default:max (maximum features) in
(Float.min min min_record, Float.max max max_record))
in
fun x ->
let x_std = (x -. min_dataset) /. (max_dataset -. min_dataset) in
let x_scaled = (x_std *. (max -. min)) +. min in
if clip then do_clip ~min ~max x_scaled else x_scaled
| Znorm { mean; std_dev } -> fun e -> (e -. mean) /. std_dev
in
List.map dataset ~f:(fun record ->
let label, features = (List.hd_exn record, List.tl_exn record) in
let features =
List.map features ~f:(fun s ->
let t = normalize (Float.of_string s) in
Float.to_string t)
in
label :: features)
let interpret_normalization =
Trans.fold_decl
(fun decl acc ->
match decl.d_node with
| Dlogic ldecl ->
List.fold ldecl ~init:acc ~f:(fun acc (ls, ls_defn) ->
let _, term = Decl.open_ls_defn ls_defn in
let normalization =
match term.t_node with
| Term.Tapp
( { ls_name = { id_string = "min_max_scale"; _ }; _ },
[
{ t_node = Tapp (clip, []); _ };
{ t_node = Tconst (ConstReal rc_min); _ };
{ t_node = Tconst (ConstReal rc_max); _ };
{ t_node = Tapp (_dataset, []); _ };
] ) ->
let clip = Term.(ls_equal fs_bool_true clip) in
let min = float_of_real_constant rc_min in
let max = float_of_real_constant rc_max in
if Float.( <= ) min max
then Some (MinMax { clip; min; max })
else None
| Term.Tapp
( { ls_name = { id_string = "z_norm"; _ }; _ },
[
{ t_node = Tconst (ConstReal mean); _ };
{ t_node = Tconst (ConstReal std_dev); _ };
{ t_node = Tapp (_dataset, []); _ };
] ) ->
let mean = float_of_real_constant mean in
let std_dev = float_of_real_constant std_dev in
Some (Znorm { mean; std_dev })
| _ -> None
in
Option.value_map normalization ~default:acc ~f:(fun n ->
(ls, n) :: acc))
| _ -> acc)
[]
let term_of_normalization env normalization =
let th = Pmodule.read_module env [ "caisar" ] "DatasetClassification" in
let t_dataset =
let ls_dataset = Theory.ns_find_ls th.mod_theory.th_export [ "dataset" ] in
Term.t_app_infer ls_dataset []
in
let ty =
let th = Env.read_theory env [ "ieee_float" ] "Float64" in
Ty.ty_app (Theory.ns_find_ts th.th_export [ "t" ]) []
in
match normalization with
| MinMax { clip; min; max } ->
let t_clip = if clip then Term.t_bool_true else Term.t_bool_false in
let t_min = Term.t_const (real_constant_of_float min) ty in
let t_max = Term.t_const (real_constant_of_float max) ty in
let ls_min_max_scale =
Theory.ns_find_ls th.mod_theory.th_export [ "min_max_scale" ]
in
Term.t_app_infer ls_min_max_scale [ t_clip; t_min; t_max; t_dataset ]
| Znorm { mean; std_dev } ->
let t_mean = Term.t_const (real_constant_of_float mean) ty in
let t_std_dev = Term.t_const (real_constant_of_float std_dev) ty in
let ls_z_norm = Theory.ns_find_ls th.mod_theory.th_export [ "z_norm" ] in
Term.t_app_infer ls_z_norm [ t_mean; t_std_dev; t_dataset ]
type eps = float [@@deriving yojson, show]
let string_of_eps eps = Float.to_string eps
......@@ -183,7 +58,6 @@ type property =
type ('a, 'b) predicate = {
model : 'a;
dataset : 'b;
normalization : normalization option;
property : property;
}
......@@ -204,7 +78,6 @@ let failwith_unsupported_ls ls =
let interpret_predicate env ~on_model ~on_dataset task =
let task = Trans.apply Introduction.introduce_premises task in
let ls_with_normalization = Trans.apply interpret_normalization task in
let term = Task.task_goal_fmla task in
match term.t_node with
| Term.Tapp
......@@ -230,18 +103,16 @@ let interpret_predicate env ~on_model ~on_dataset task =
else failwith_unsupported_ls ls
| _ -> failwith_unsupported_term term
in
let normalization =
List.find_map ls_with_normalization ~f:(fun (ls, normalization) ->
if Term.ls_equal ls dataset then Some normalization else None)
in
let dataset = on_dataset dataset in
let model = on_model model in
{ model; dataset; normalization; property }
{ model; dataset; property }
| _ ->
(* No other term node is supported. *)
failwith_unsupported_term term
let add_axioms ?eps ~clip th task input value =
let clip = Float.(clamp_exn ~min:zero ~max:one)
let add_axioms ?eps th task input value =
let eps = Float.(abs (of_string (Option.value eps ~default:"0"))) in
let value = Float.of_string value in
(* Add axiom [input >= value]. *)
......@@ -271,7 +142,7 @@ let add_axioms ?eps ~clip th task input value =
in
Task.add_prop_decl task Decl.Paxiom pr_le t_le
let add_input_decls ~clip predicate_kind ~inputs ~record th task =
let add_input_decls predicate_kind ~inputs ~record th task =
(* Add input constraints as axioms. *)
let features = List.tl_exn record in
let eps =
......@@ -280,7 +151,7 @@ let add_input_decls ~clip predicate_kind ~inputs ~record th task =
| Robust eps -> Some (string_of_eps eps)
| CondRobust _ -> failwith "Unsupported conditional robustness predicate"
in
List.fold2_exn inputs features ~init:task ~f:(add_axioms ~clip ?eps th)
List.fold2_exn inputs features ~init:task ~f:(add_axioms ?eps th)
let add_output_decl ~id predicate_kind ~outputs ~record th task =
(* Add goal formula. *)
......@@ -330,10 +201,6 @@ let add_decls ~kind task =
let tasks_of_nn_csv_predicate env
(predicate : (Language.nn_shape, Csv.t) predicate) =
let dataset =
Option.value_map predicate.normalization ~default:predicate.dataset
~f:(apply_normalization predicate.dataset)
in
let th_real = Env.read_theory env [ "real" ] "Real" in
let task = Task.use_export None th_real in
(* Add input declarations. *)
......@@ -345,7 +212,7 @@ let tasks_of_nn_csv_predicate env
add_decls ~kind:(`Out predicate.model.Language.nb_outputs) task
in
let tasks =
List.foldi dataset ~init:[] ~f:(fun id tasks record ->
List.foldi predicate.dataset ~init:[] ~f:(fun id tasks record ->
let nb_features = List.length record - 1 in
if nb_features <> predicate.model.Language.nb_inputs
then (
......@@ -356,11 +223,8 @@ let tasks_of_nn_csv_predicate env
predicate.model.Language.nb_inputs nb_features id);
tasks)
else
let clip =
Option.value_map predicate.normalization ~default:Fn.id ~f:clip
in
let task =
add_input_decls ~clip predicate.property ~inputs ~record th_real task
add_input_decls predicate.property ~inputs ~record th_real task
|> add_output_decl ~id predicate.property ~outputs ~record th_real
in
task :: tasks)
......
......@@ -33,14 +33,9 @@ type property = private
| CondRobust of eps
[@@deriving yojson, show]
type normalization [@@deriving yojson, show]
val term_of_normalization : Env.env -> normalization -> Term.term
type ('model, 'dataset) predicate = private {
model : 'model;
dataset : 'dataset;
normalization : normalization option;
property : property;
}
......
......@@ -32,9 +32,6 @@ theory DatasetClassification
constant dataset: dataset
function min_max_scale (clip: bool) (min: t) (max: t) (d: dataset): dataset
function z_norm (mean: t) (std_dev: t) (d: dataset): dataset
type model
function predict: model -> features -> label_
......
Test verify
Test verify on dataset
$ cat - > test_data.csv << EOF
> 1,0,255,200,5,198
> 0,240,0,0,3,10
> 1,0.0,1.0,0.784313725,0.019607843,0.776470588
> 0,0.941176471,0,0,0.011764706,0.039215686
> EOF
$ chmod u+x bin/pyrat.py bin/Marabou
......@@ -24,7 +24,6 @@ Test verify
> goal G: correct TN.model dataset
>
> goal H:
> let dataset = min_max_scale true (0.0:t) (1.0:t) dataset in
> robust TN.model dataset (0.0100000000000000002081668171172168513294309377670288085937500000:t)
> end
> EOF
......@@ -41,7 +40,6 @@ Test verify
> goal G: correct TN.model dataset
>
> goal H:
> let dataset = min_max_scale true (0.0:t) (1.0:t) dataset in
> robust TN.model dataset (0.0100000000000000002081668171172168513294309377670288085937500000:t)
> end
> EOF
......
Test verify-json
$ cat - > test_data.csv << EOF
> 1,0,255,200,5,198
> 1,0.0,1.0,0.784313725,0.019607843,0.776470588
> EOF
$ cat - > config.json << EOF
> {"id":"abcd","prover":["PyRAT"],"model":"TestNetwork.nnet","property":{"dataset":"test_data.csv","normalization":["MinMax",{"clip":true,"min":0.0,"max":1.0}],"kind":["Robust",0.01]},"time_limit":null,"memory_limit":null,"output_file":null}
> {"id":"foo","prover":["PyRAT"],"model":"TestNetwork.nnet","property":{"dataset":"test_data.csv","kind":["Robust",0.3]},"time_limit":null,"memory_limit":null,"output_file":null}
> EOF
$ chmod u+x bin/pyrat.py
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment