diff --git a/src/JSON.ml b/src/JSON.ml index 012332f4f53181aa67bc64b7c0227660862deff9..26b9a7c9c546c8290324fb61dc57e9b73f046303 100644 --- a/src/JSON.ml +++ b/src/JSON.ml @@ -35,7 +35,6 @@ type input = { and property = { dataset : string; - normalization : Dataset.normalization option; kind : Dataset.property; } [@@deriving yojson, show] @@ -106,26 +105,6 @@ let theory_of_input env input = let ls_model = Theory.ns_find_ls th_as_array.th_export [ "model" ] in Term.t_app_infer ls_model [] in - (* Add dataset normalization. *) - let t_dataset, th_uc = - match input.property.normalization with - | None -> (t_dataset, th_uc) - | Some n -> - let t_normalization = Dataset.term_of_normalization env n in - let ls_dataset_norm = - let ty = - Ty.ty_app - (Theory.ns_find_ts th_dsc.mod_theory.th_export [ "dataset" ]) - [] - in - Term.create_fsymbol (Ident.id_fresh "dataset'") [] ty - in - let ld_dataset_norm = - Decl.make_ls_defn ls_dataset_norm [] t_normalization - in - ( Term.t_app_infer ls_dataset_norm [], - Theory.add_logic_decl th_uc [ ld_dataset_norm ] ) - in (* Create goal formula about specified property. *) let th_uc = let t_property = diff --git a/src/JSON.mli b/src/JSON.mli index 2c02254f4a92a554c19c9818fe354a28c33ccd82..96a7b6cb67879d747a0e4e0c7b8cc807cb56d7a7 100644 --- a/src/JSON.mli +++ b/src/JSON.mli @@ -34,7 +34,6 @@ type input = private { and property = private { dataset : string; - normalization : Dataset.normalization option; kind : Dataset.property; } [@@deriving yojson { strict = false }, show] diff --git a/src/dataset.ml b/src/dataset.ml index cf7586483aaf3955c5ad32bead31b61b797b6204..9193295c267765c511a25a865bdad3ff22aa2c58 100644 --- a/src/dataset.ml +++ b/src/dataset.ml @@ -40,131 +40,6 @@ let real_constant_of_float value = let float_of_real_constant rc = Float.of_string (Fmt.str "%a" Number.(print_real_constant full_support) rc) -(* min-max scaler *) -type minmax = { - clip : bool; (* whether needs clipping *) - min : float; - max : float; -} -[@@deriving yojson, show] - -(* z-normalization *) -type znorm = { - mean : float; (* mean *) - std_dev : float; (* standard deviation *) -} -[@@deriving yojson, show] - -type normalization = - | MinMax of minmax - | Znorm of znorm -[@@deriving yojson, show] - -let do_clip ~min ~max x = - if Float.( <= ) x min then min else if Float.( >= ) x max then max else x - -let clip = function - | MinMax { clip; min; max } -> - fun x -> if clip then do_clip ~min ~max x else x - | Znorm _ -> Fn.id - -let apply_normalization dataset normalization = - let normalize = - match normalization with - | MinMax { clip; min; max } -> - let min_dataset, max_dataset = - let minimum = List.min_elt ~compare:Float.compare in - let maximum = List.max_elt ~compare:Float.compare in - List.fold_left dataset ~init:(Float.max_value, Float.min_value) - ~f:(fun (min, max) record -> - match record with - | [] -> (min, max) - | _ -> - let features = List.(map (tl_exn record) ~f:Float.of_string) in - let min_record = Option.value ~default:min (minimum features) in - let max_record = Option.value ~default:max (maximum features) in - (Float.min min min_record, Float.max max max_record)) - in - fun x -> - let x_std = (x -. min_dataset) /. (max_dataset -. min_dataset) in - let x_scaled = (x_std *. (max -. min)) +. min in - if clip then do_clip ~min ~max x_scaled else x_scaled - | Znorm { mean; std_dev } -> fun e -> (e -. mean) /. std_dev - in - List.map dataset ~f:(fun record -> - let label, features = (List.hd_exn record, List.tl_exn record) in - let features = - List.map features ~f:(fun s -> - let t = normalize (Float.of_string s) in - Float.to_string t) - in - label :: features) - -let interpret_normalization = - Trans.fold_decl - (fun decl acc -> - match decl.d_node with - | Dlogic ldecl -> - List.fold ldecl ~init:acc ~f:(fun acc (ls, ls_defn) -> - let _, term = Decl.open_ls_defn ls_defn in - let normalization = - match term.t_node with - | Term.Tapp - ( { ls_name = { id_string = "min_max_scale"; _ }; _ }, - [ - { t_node = Tapp (clip, []); _ }; - { t_node = Tconst (ConstReal rc_min); _ }; - { t_node = Tconst (ConstReal rc_max); _ }; - { t_node = Tapp (_dataset, []); _ }; - ] ) -> - let clip = Term.(ls_equal fs_bool_true clip) in - let min = float_of_real_constant rc_min in - let max = float_of_real_constant rc_max in - if Float.( <= ) min max - then Some (MinMax { clip; min; max }) - else None - | Term.Tapp - ( { ls_name = { id_string = "z_norm"; _ }; _ }, - [ - { t_node = Tconst (ConstReal mean); _ }; - { t_node = Tconst (ConstReal std_dev); _ }; - { t_node = Tapp (_dataset, []); _ }; - ] ) -> - let mean = float_of_real_constant mean in - let std_dev = float_of_real_constant std_dev in - Some (Znorm { mean; std_dev }) - | _ -> None - in - Option.value_map normalization ~default:acc ~f:(fun n -> - (ls, n) :: acc)) - | _ -> acc) - [] - -let term_of_normalization env normalization = - let th = Pmodule.read_module env [ "caisar" ] "DatasetClassification" in - let t_dataset = - let ls_dataset = Theory.ns_find_ls th.mod_theory.th_export [ "dataset" ] in - Term.t_app_infer ls_dataset [] - in - let ty = - let th = Env.read_theory env [ "ieee_float" ] "Float64" in - Ty.ty_app (Theory.ns_find_ts th.th_export [ "t" ]) [] - in - match normalization with - | MinMax { clip; min; max } -> - let t_clip = if clip then Term.t_bool_true else Term.t_bool_false in - let t_min = Term.t_const (real_constant_of_float min) ty in - let t_max = Term.t_const (real_constant_of_float max) ty in - let ls_min_max_scale = - Theory.ns_find_ls th.mod_theory.th_export [ "min_max_scale" ] - in - Term.t_app_infer ls_min_max_scale [ t_clip; t_min; t_max; t_dataset ] - | Znorm { mean; std_dev } -> - let t_mean = Term.t_const (real_constant_of_float mean) ty in - let t_std_dev = Term.t_const (real_constant_of_float std_dev) ty in - let ls_z_norm = Theory.ns_find_ls th.mod_theory.th_export [ "z_norm" ] in - Term.t_app_infer ls_z_norm [ t_mean; t_std_dev; t_dataset ] - type eps = float [@@deriving yojson, show] let string_of_eps eps = Float.to_string eps @@ -183,7 +58,6 @@ type property = type ('a, 'b) predicate = { model : 'a; dataset : 'b; - normalization : normalization option; property : property; } @@ -204,7 +78,6 @@ let failwith_unsupported_ls ls = let interpret_predicate env ~on_model ~on_dataset task = let task = Trans.apply Introduction.introduce_premises task in - let ls_with_normalization = Trans.apply interpret_normalization task in let term = Task.task_goal_fmla task in match term.t_node with | Term.Tapp @@ -230,18 +103,16 @@ let interpret_predicate env ~on_model ~on_dataset task = else failwith_unsupported_ls ls | _ -> failwith_unsupported_term term in - let normalization = - List.find_map ls_with_normalization ~f:(fun (ls, normalization) -> - if Term.ls_equal ls dataset then Some normalization else None) - in let dataset = on_dataset dataset in let model = on_model model in - { model; dataset; normalization; property } + { model; dataset; property } | _ -> (* No other term node is supported. *) failwith_unsupported_term term -let add_axioms ?eps ~clip th task input value = +let clip = Float.(clamp_exn ~min:zero ~max:one) + +let add_axioms ?eps th task input value = let eps = Float.(abs (of_string (Option.value eps ~default:"0"))) in let value = Float.of_string value in (* Add axiom [input >= value]. *) @@ -271,7 +142,7 @@ let add_axioms ?eps ~clip th task input value = in Task.add_prop_decl task Decl.Paxiom pr_le t_le -let add_input_decls ~clip predicate_kind ~inputs ~record th task = +let add_input_decls predicate_kind ~inputs ~record th task = (* Add input constraints as axioms. *) let features = List.tl_exn record in let eps = @@ -280,7 +151,7 @@ let add_input_decls ~clip predicate_kind ~inputs ~record th task = | Robust eps -> Some (string_of_eps eps) | CondRobust _ -> failwith "Unsupported conditional robustness predicate" in - List.fold2_exn inputs features ~init:task ~f:(add_axioms ~clip ?eps th) + List.fold2_exn inputs features ~init:task ~f:(add_axioms ?eps th) let add_output_decl ~id predicate_kind ~outputs ~record th task = (* Add goal formula. *) @@ -330,10 +201,6 @@ let add_decls ~kind task = let tasks_of_nn_csv_predicate env (predicate : (Language.nn_shape, Csv.t) predicate) = - let dataset = - Option.value_map predicate.normalization ~default:predicate.dataset - ~f:(apply_normalization predicate.dataset) - in let th_real = Env.read_theory env [ "real" ] "Real" in let task = Task.use_export None th_real in (* Add input declarations. *) @@ -345,7 +212,7 @@ let tasks_of_nn_csv_predicate env add_decls ~kind:(`Out predicate.model.Language.nb_outputs) task in let tasks = - List.foldi dataset ~init:[] ~f:(fun id tasks record -> + List.foldi predicate.dataset ~init:[] ~f:(fun id tasks record -> let nb_features = List.length record - 1 in if nb_features <> predicate.model.Language.nb_inputs then ( @@ -356,11 +223,8 @@ let tasks_of_nn_csv_predicate env predicate.model.Language.nb_inputs nb_features id); tasks) else - let clip = - Option.value_map predicate.normalization ~default:Fn.id ~f:clip - in let task = - add_input_decls ~clip predicate.property ~inputs ~record th_real task + add_input_decls predicate.property ~inputs ~record th_real task |> add_output_decl ~id predicate.property ~outputs ~record th_real in task :: tasks) diff --git a/src/dataset.mli b/src/dataset.mli index c79f0e51f774886bbb7c83f87f9b906175395443..ef90cafb58675813ff0af71eda92b7b1c79d928c 100644 --- a/src/dataset.mli +++ b/src/dataset.mli @@ -33,14 +33,9 @@ type property = private | CondRobust of eps [@@deriving yojson, show] -type normalization [@@deriving yojson, show] - -val term_of_normalization : Env.env -> normalization -> Term.term - type ('model, 'dataset) predicate = private { model : 'model; dataset : 'dataset; - normalization : normalization option; property : property; } diff --git a/stdlib/caisar.mlw b/stdlib/caisar.mlw index fd0eacf17c2c6837767a315e6c2c1d2ab21f0cf8..88422668ce03cd48622b9fd95c481eecc0dc009c 100644 --- a/stdlib/caisar.mlw +++ b/stdlib/caisar.mlw @@ -32,9 +32,6 @@ theory DatasetClassification constant dataset: dataset - function min_max_scale (clip: bool) (min: t) (max: t) (d: dataset): dataset - function z_norm (mean: t) (std_dev: t) (d: dataset): dataset - type model function predict: model -> features -> label_ diff --git a/tests/dataset.t b/tests/dataset.t index 4c14126dd05a4c451e1c826c3ee323367c9ab0f1..33e886ed577adfb7608d5b9dbdf8277ac72c1e18 100644 --- a/tests/dataset.t +++ b/tests/dataset.t @@ -1,7 +1,7 @@ -Test verify +Test verify on dataset $ cat - > test_data.csv << EOF - > 1,0,255,200,5,198 - > 0,240,0,0,3,10 + > 1,0.0,1.0,0.784313725,0.019607843,0.776470588 + > 0,0.941176471,0,0,0.011764706,0.039215686 > EOF $ chmod u+x bin/pyrat.py bin/Marabou @@ -24,7 +24,6 @@ Test verify > goal G: correct TN.model dataset > > goal H: - > let dataset = min_max_scale true (0.0:t) (1.0:t) dataset in > robust TN.model dataset (0.0100000000000000002081668171172168513294309377670288085937500000:t) > end > EOF @@ -41,7 +40,6 @@ Test verify > goal G: correct TN.model dataset > > goal H: - > let dataset = min_max_scale true (0.0:t) (1.0:t) dataset in > robust TN.model dataset (0.0100000000000000002081668171172168513294309377670288085937500000:t) > end > EOF diff --git a/tests/pyrat_verify_json.t b/tests/pyrat_verify_json.t index 208a41aa80b27465d487417ccbcd2ecffa9f3d92..b625f989323acebc668bc912b62e1080cbce1655 100644 --- a/tests/pyrat_verify_json.t +++ b/tests/pyrat_verify_json.t @@ -1,10 +1,10 @@ Test verify-json $ cat - > test_data.csv << EOF - > 1,0,255,200,5,198 + > 1,0.0,1.0,0.784313725,0.019607843,0.776470588 > EOF $ cat - > config.json << EOF - > {"id":"abcd","prover":["PyRAT"],"model":"TestNetwork.nnet","property":{"dataset":"test_data.csv","normalization":["MinMax",{"clip":true,"min":0.0,"max":1.0}],"kind":["Robust",0.01]},"time_limit":null,"memory_limit":null,"output_file":null} + > {"id":"foo","prover":["PyRAT"],"model":"TestNetwork.nnet","property":{"dataset":"test_data.csv","kind":["Robust",0.3]},"time_limit":null,"memory_limit":null,"output_file":null} > EOF $ chmod u+x bin/pyrat.py