Skip to content
Snippets Groups Projects
Commit c40f5194 authored by Michele Alberti's avatar Michele Alberti
Browse files

[dataset] Rework minmax scaling.

- Parameter min and max of min_max_scale are now the intended final range
- Add clip parameter to min_max_scale stdlib function (if enabled, clipping is
  also done after the added perturbation)
parent c7c948bf
No related branches found
No related tags found
No related merge requests found
...@@ -40,27 +40,65 @@ let real_constant_of_float value = ...@@ -40,27 +40,65 @@ let real_constant_of_float value =
let float_of_real_constant rc = let float_of_real_constant rc =
Float.of_string (Fmt.str "%a" Number.(print_real_constant full_support) rc) Float.of_string (Fmt.str "%a" Number.(print_real_constant full_support) rc)
(* min-max scaler *)
type minmax = {
clip : bool; (* whether needs clipping *)
min : float;
max : float;
}
[@@deriving yojson, show]
(* z-normalization *)
type znorm = {
mean : float; (* mean *)
std_dev : float; (* standard deviation *)
}
[@@deriving yojson, show]
type normalization = type normalization =
| MinMax of float * float (* min-max scaling *) | MinMax of minmax
| Znorm of float * float (* z-normalization (mean, standard deviation) *) | Znorm of znorm
[@@deriving yojson, show] [@@deriving yojson, show]
let require_clipping = function MinMax _ -> true | Znorm _ -> false let do_clip ~min ~max x =
if Float.( <= ) x min then min else if Float.( >= ) x max then max else x
let clip = function
| MinMax { clip; min; max } ->
fun x -> if clip then do_clip ~min ~max x else x
| Znorm _ -> Fn.id
let apply_normalization csv ds = let apply_normalization (dataset : Csv.t) normalization =
let transform = let normalize =
match ds with match normalization with
| MinMax (min, max) -> fun e -> (e -. min) /. (max -. min) | MinMax { clip; min; max } ->
| Znorm (mean, std_dev) -> fun e -> (e -. mean) /. std_dev let min_dataset, max_dataset =
let minimum = List.min_elt ~compare:Float.compare in
let maximum = List.max_elt ~compare:Float.compare in
List.fold_left dataset ~init:(Float.max_value, Float.min_value)
~f:(fun (min, max) record ->
match record with
| [] -> (min, max)
| _ ->
let features = List.(map (tl_exn record) ~f:Float.of_string) in
let min_record = Option.value ~default:min (minimum features) in
let max_record = Option.value ~default:max (maximum features) in
(Float.min min min_record, Float.max max max_record))
in
fun x ->
let x_std = (x -. min_dataset) /. (max_dataset -. min_dataset) in
let x_scaled = (x_std *. (max -. min)) +. min in
if clip then do_clip ~min ~max x_scaled else x_scaled
| Znorm { mean; std_dev } -> fun e -> (e -. mean) /. std_dev
in in
List.map csv ~f:(fun record -> List.map dataset ~f:(fun record ->
let class_, features = (List.hd_exn record, List.tl_exn record) in let label, features = (List.hd_exn record, List.tl_exn record) in
let features = let features =
List.map features ~f:(fun s -> List.map features ~f:(fun s ->
let t = transform (Float.of_string s) in let t = normalize (Float.of_string s) in
Float.to_string t) Float.to_string t)
in in
class_ :: features) label :: features)
let interpret_normalization = let interpret_normalization =
Trans.fold_decl Trans.fold_decl
...@@ -74,13 +112,17 @@ let interpret_normalization = ...@@ -74,13 +112,17 @@ let interpret_normalization =
| Term.Tapp | Term.Tapp
( { ls_name = { id_string = "min_max_scale"; _ }; _ }, ( { ls_name = { id_string = "min_max_scale"; _ }; _ },
[ [
{ t_node = Tapp (clip, []); _ };
{ t_node = Tconst (ConstReal rc_min); _ }; { t_node = Tconst (ConstReal rc_min); _ };
{ t_node = Tconst (ConstReal rc_max); _ }; { t_node = Tconst (ConstReal rc_max); _ };
{ t_node = Tapp (_dataset, []); _ }; { t_node = Tapp (_dataset, []); _ };
] ) -> ] ) ->
let rc_min = float_of_real_constant rc_min in let clip = Term.(ls_equal fs_bool_true clip) in
let rc_max = float_of_real_constant rc_max in let min = float_of_real_constant rc_min in
Some (MinMax (rc_min, rc_max)) let max = float_of_real_constant rc_max in
if Float.( <= ) min max
then Some (MinMax { clip; min; max })
else None
| Term.Tapp | Term.Tapp
( { ls_name = { id_string = "z_norm"; _ }; _ }, ( { ls_name = { id_string = "z_norm"; _ }; _ },
[ [
...@@ -90,7 +132,7 @@ let interpret_normalization = ...@@ -90,7 +132,7 @@ let interpret_normalization =
] ) -> ] ) ->
let mean = float_of_real_constant mean in let mean = float_of_real_constant mean in
let std_dev = float_of_real_constant std_dev in let std_dev = float_of_real_constant std_dev in
Some (Znorm (mean, std_dev)) Some (Znorm { mean; std_dev })
| _ -> None | _ -> None
in in
Option.value_map normalization ~default:acc ~f:(fun n -> Option.value_map normalization ~default:acc ~f:(fun n ->
...@@ -109,14 +151,15 @@ let term_of_normalization env normalization = ...@@ -109,14 +151,15 @@ let term_of_normalization env normalization =
Ty.ty_app (Theory.ns_find_ts th.th_export [ "t" ]) [] Ty.ty_app (Theory.ns_find_ts th.th_export [ "t" ]) []
in in
match normalization with match normalization with
| MinMax (min, max) -> | MinMax { clip; min; max } ->
let t_clip = if clip then Term.t_bool_true else Term.t_bool_false in
let t_min = Term.t_const (real_constant_of_float min) ty in let t_min = Term.t_const (real_constant_of_float min) ty in
let t_max = Term.t_const (real_constant_of_float max) ty in let t_max = Term.t_const (real_constant_of_float max) ty in
let ls_min_max_scale = let ls_min_max_scale =
Theory.ns_find_ls th.mod_theory.th_export [ "min_max_scale" ] Theory.ns_find_ls th.mod_theory.th_export [ "min_max_scale" ]
in in
Term.t_app_infer ls_min_max_scale [ t_min; t_max; t_dataset ] Term.t_app_infer ls_min_max_scale [ t_clip; t_min; t_max; t_dataset ]
| Znorm (mean, std_dev) -> | Znorm { mean; std_dev } ->
let t_mean = Term.t_const (real_constant_of_float mean) ty in let t_mean = Term.t_const (real_constant_of_float mean) ty in
let t_std_dev = Term.t_const (real_constant_of_float std_dev) ty in let t_std_dev = Term.t_const (real_constant_of_float std_dev) ty in
let ls_z_norm = Theory.ns_find_ls th.mod_theory.th_export [ "z_norm" ] in let ls_z_norm = Theory.ns_find_ls th.mod_theory.th_export [ "z_norm" ] in
...@@ -140,7 +183,7 @@ type property = ...@@ -140,7 +183,7 @@ type property =
type ('a, 'b) predicate = { type ('a, 'b) predicate = {
model : 'a; model : 'a;
dataset : 'b; dataset : 'b;
normalization : normalization list; normalization : normalization option;
property : property; property : property;
} }
...@@ -188,7 +231,7 @@ let interpret_predicate env ~on_model ~on_dataset task = ...@@ -188,7 +231,7 @@ let interpret_predicate env ~on_model ~on_dataset task =
| _ -> failwith_unsupported_term term | _ -> failwith_unsupported_term term
in in
let normalization = let normalization =
List.filter_map ls_with_normalization ~f:(fun (ls, normalization) -> List.find_map ls_with_normalization ~f:(fun (ls, normalization) ->
if Term.ls_equal ls dataset then Some normalization else None) if Term.ls_equal ls dataset then Some normalization else None)
in in
let dataset = on_dataset dataset in let dataset = on_dataset dataset in
...@@ -198,11 +241,6 @@ let interpret_predicate env ~on_model ~on_dataset task = ...@@ -198,11 +241,6 @@ let interpret_predicate env ~on_model ~on_dataset task =
(* No other term node is supported. *) (* No other term node is supported. *)
failwith_unsupported_term term failwith_unsupported_term term
let clip_value value =
(* Clip to [0, 1]. *)
let value = if Float.is_negative value then Float.zero else value in
if Float.(is_negative (one -. value)) then Float.one else value
let add_axioms ?eps ~clip th task input value = let add_axioms ?eps ~clip th task input value =
let eps = Float.(abs (of_string (Option.value eps ~default:"0"))) in let eps = Float.(abs (of_string (Option.value eps ~default:"0"))) in
let value = Float.of_string value in let value = Float.of_string value in
...@@ -214,7 +252,7 @@ let add_axioms ?eps ~clip th task input value = ...@@ -214,7 +252,7 @@ let add_axioms ?eps ~clip th task input value =
in in
let t_ge = let t_ge =
let value = value -. eps in let value = value -. eps in
let value = if clip then clip_value value else value in let value = clip value in
let cst = real_constant_of_float value in let cst = real_constant_of_float value in
Term.(ps_app le [ t_const cst Ty.ty_real; fs_app input [] Ty.ty_real ]) Term.(ps_app le [ t_const cst Ty.ty_real; fs_app input [] Ty.ty_real ])
in in
...@@ -226,7 +264,7 @@ let add_axioms ?eps ~clip th task input value = ...@@ -226,7 +264,7 @@ let add_axioms ?eps ~clip th task input value =
in in
let t_le = let t_le =
let value = value +. eps in let value = value +. eps in
let value = if clip then clip_value value else value in let value = clip value in
let cst = real_constant_of_float value in let cst = real_constant_of_float value in
Term.(ps_app le [ fs_app input [] Ty.ty_real; t_const cst Ty.ty_real ]) Term.(ps_app le [ fs_app input [] Ty.ty_real; t_const cst Ty.ty_real ])
in in
...@@ -245,18 +283,18 @@ let add_input_decls ~clip predicate_kind ~inputs ~record th task = ...@@ -245,18 +283,18 @@ let add_input_decls ~clip predicate_kind ~inputs ~record th task =
let add_output_decl ~id predicate_kind ~outputs ~record th task = let add_output_decl ~id predicate_kind ~outputs ~record th task =
(* Add goal formula. *) (* Add goal formula. *)
let class_ = List.hd_exn record in let label = List.hd_exn record in
let goal_fmla = let goal_fmla =
let ls_gt = Theory.(ns_find_ls th.th_export [ Ident.op_infix ">" ]) in let ls_gt = Theory.(ns_find_ls th.th_export [ Ident.op_infix ">" ]) in
let ls_class = List.nth_exn outputs (Int.of_string class_) in let ls_label = List.nth_exn outputs (Int.of_string label) in
List.fold outputs ~init:Term.t_true ~f:(fun fmla output -> List.fold outputs ~init:Term.t_true ~f:(fun fmla output ->
if Term.ls_equal ls_class output if Term.ls_equal ls_label output
then fmla then fmla
else else
let gt_fmla = let gt_fmla =
Term.( Term.(
ps_app ls_gt ps_app ls_gt
[ fs_app ls_class [] Ty.ty_real; fs_app output [] Ty.ty_real ]) [ fs_app ls_label [] Ty.ty_real; fs_app output [] Ty.ty_real ])
in in
Term.t_and fmla gt_fmla) Term.t_and fmla gt_fmla)
in in
...@@ -265,7 +303,7 @@ let add_output_decl ~id predicate_kind ~outputs ~record th task = ...@@ -265,7 +303,7 @@ let add_output_decl ~id predicate_kind ~outputs ~record th task =
Ident.id_fresh Ident.id_fresh
(Fmt.str "%s_record_%d_Y_%s" (Fmt.str "%s_record_%d_Y_%s"
(string_of_predicate_kind predicate_kind) (string_of_predicate_kind predicate_kind)
id class_) id label)
in in
Decl.create_prsymbol id Decl.create_prsymbol id
in in
...@@ -280,10 +318,9 @@ let add_decls ~id task n = ...@@ -280,10 +318,9 @@ let add_decls ~id task n =
let tasks_of_nn_csv_predicate env let tasks_of_nn_csv_predicate env
(predicate : (Language.nn_shape, Csv.t) predicate) = (predicate : (Language.nn_shape, Csv.t) predicate) =
let clip, dataset = let dataset =
List.fold predicate.normalization ~init:(false, predicate.dataset) Option.value_map predicate.normalization ~default:predicate.dataset
~f:(fun (rc, dataset) fs -> ~f:(apply_normalization predicate.dataset)
(rc || require_clipping fs, apply_normalization dataset fs))
in in
let th_real = Env.read_theory env [ "real" ] "Real" in let th_real = Env.read_theory env [ "real" ] "Real" in
let task = Task.use_export None th_real in let task = Task.use_export None th_real in
...@@ -307,6 +344,9 @@ let tasks_of_nn_csv_predicate env ...@@ -307,6 +344,9 @@ let tasks_of_nn_csv_predicate env
predicate.model.Language.nb_inputs nb_features id); predicate.model.Language.nb_inputs nb_features id);
tasks) tasks)
else else
let clip =
Option.value_map predicate.normalization ~default:Fn.id ~f:clip
in
let task = let task =
add_input_decls ~clip predicate.property ~inputs ~record th_real task add_input_decls ~clip predicate.property ~inputs ~record th_real task
|> add_output_decl ~id predicate.property ~outputs ~record th_real |> add_output_decl ~id predicate.property ~outputs ~record th_real
......
...@@ -40,7 +40,7 @@ val term_of_normalization : Env.env -> normalization -> Term.term ...@@ -40,7 +40,7 @@ val term_of_normalization : Env.env -> normalization -> Term.term
type ('model, 'dataset) predicate = private { type ('model, 'dataset) predicate = private {
model : 'model; model : 'model;
dataset : 'dataset; dataset : 'dataset;
normalization : normalization list; normalization : normalization option;
property : property; property : property;
} }
......
...@@ -38,7 +38,7 @@ theory DataSetClassification ...@@ -38,7 +38,7 @@ theory DataSetClassification
constant dataset: dataset constant dataset: dataset
function min_max_scale (min: t) (max: t) (d: dataset): dataset function min_max_scale (clip: bool) (min: t) (max: t) (d: dataset): dataset
function z_norm (mean: t) (std_dev: t) (d: dataset): dataset function z_norm (mean: t) (std_dev: t) (d: dataset): dataset
end end
......
...@@ -4,7 +4,7 @@ Test verify-json ...@@ -4,7 +4,7 @@ Test verify-json
> EOF > EOF
$ cat - > config.json << EOF $ cat - > config.json << EOF
> {"prover":["Pyrat"],"model":"TestNetwork.nnet","property":{"dataset":"test_data.csv","normalization":["MinMax",0.0,255.0],"kind":["Robust",0.01]}} > {"prover":["Pyrat"],"model":"TestNetwork.nnet","property":{"dataset":"test_data.csv","normalization":["MinMax",{"clip":true,"min":0.0,"max":1.0}],"kind":["Robust",0.01]}}
> EOF > EOF
$ chmod u+x bin/pyrat.py $ chmod u+x bin/pyrat.py
......
Test verify Test verify
$ cat - > test_data.csv << EOF $ cat - > test_data.csv << EOF
> 1,0,255,200,5,198 > 1,0,255,200,5,198
> 0,240,0,0,3,10
> EOF > EOF
$ chmod u+x bin/pyrat.py $ chmod u+x bin/pyrat.py
...@@ -20,7 +21,7 @@ Test verify ...@@ -20,7 +21,7 @@ Test verify
> goal G: correct TN.model dataset > goal G: correct TN.model dataset
> >
> goal H: > goal H:
> let dataset = min_max_scale (0.0:t) (255.0:t) dataset in > let dataset = min_max_scale true (0.0:t) (1.0:t) dataset in
> robust TN.model dataset (0.0100000000000000002081668171172168513294309377670288085937500000:t) > robust TN.model dataset (0.0100000000000000002081668171172168513294309377670288085937500000:t)
> end > end
> EOF > EOF
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment