From 10d1b830fd6bdd58e684ed791f0f191161615afc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois@bobot.eu>
Date: Tue, 19 Feb 2013 18:06:35 +0100
Subject: [PATCH] [Dimacs] better parser using a real lexer

---
 Makefile                                  |   6 +-
 benchs/pigeonhole/.gitignore              |   2 +
 src/cmd/popop_dimacs.ml                   |  10 +
 src/inputlang/altergo/popop_of_altergo.ml |  12 +-
 src/inputlang/dimacs_cnf/dimacs.mli       |  27 ++
 src/inputlang/dimacs_cnf/dimacs.mll       | 125 +++++
 src/util/loc.ml                           |   8 +
 src/util/loc.mli                          |   2 +
 tests/dimacs/sat/par8-1-c.cnf             | 530 ++++++++++++++++++++++
 9 files changed, 710 insertions(+), 12 deletions(-)
 create mode 100644 benchs/pigeonhole/.gitignore
 create mode 100644 src/cmd/popop_dimacs.ml
 create mode 100644 src/inputlang/dimacs_cnf/dimacs.mli
 create mode 100644 src/inputlang/dimacs_cnf/dimacs.mll
 create mode 100644 tests/dimacs/sat/par8-1-c.cnf

diff --git a/Makefile b/Makefile
index 530f9f818..eafcb8082 100644
--- a/Makefile
+++ b/Makefile
@@ -8,8 +8,8 @@ DIRECTORIES=-I src -I src/util -I src/inputlang/altergo -I src/inputlang/dimacs_
 
 all: tests
 
-tests.native:
-	ocamlbuild tests/tests.native $(PACKAGES) $(OPTIONS) \
+popop_dimacs.native tests.native:
+	ocamlbuild src/cmd/popop_dimacs.native tests/tests.native $(PACKAGES) $(OPTIONS) \
 	$(DIRECTORIES)
 
 tests: tests.native
@@ -24,3 +24,5 @@ tests_debug: tests.native
 doc:
 	rubber -d doc.tex
 
+clean:
+	ocamlbuild -clean
diff --git a/benchs/pigeonhole/.gitignore b/benchs/pigeonhole/.gitignore
new file mode 100644
index 000000000..2daaa10ec
--- /dev/null
+++ b/benchs/pigeonhole/.gitignore
@@ -0,0 +1,2 @@
+pigeon-*.cnf
+pigeonhole
\ No newline at end of file
diff --git a/src/cmd/popop_dimacs.ml b/src/cmd/popop_dimacs.ml
new file mode 100644
index 000000000..501e90c2b
--- /dev/null
+++ b/src/cmd/popop_dimacs.ml
@@ -0,0 +1,10 @@
+let () =
+  try
+    let f = Sys.argv.(1) in
+    match Dimacs.check_file f with
+    | Dimacs.Sat -> Format.printf "Sat@."
+    | Dimacs.Unsat -> Format.printf "Unsat@."
+  with e when not (Debug.test_flag Debug.stack_trace) ->
+    Format.eprintf "%a@." Exn_printer.exn_printer e;
+    exit 1
+
diff --git a/src/inputlang/altergo/popop_of_altergo.ml b/src/inputlang/altergo/popop_of_altergo.ml
index 4514d89f5..f34bc2f15 100644
--- a/src/inputlang/altergo/popop_of_altergo.ml
+++ b/src/inputlang/altergo/popop_of_altergo.ml
@@ -84,26 +84,18 @@ let check_goal l =
   with Scheduler.Scheduler.Contradiction ->
     true
 
-let loc lb = Loc.extract (Lexing.lexeme_start_p lb, Lexing.lexeme_end_p lb)
-
-let with_location f lb =
-  if Debug.test_flag Debug.stack_trace then f lb else
-    try f lb with
-    | Loc.Located _ as e -> raise e
-    | e -> raise (Loc.Located (loc lb, e))
-
 let read_file s =
   let cin = open_in s in
   let lb = Lexing.from_channel cin in
   Loc.set_file s lb;
-  let _, decls = with_location (Why_parser.file Why_lexer.token) lb in
+  let _, decls = Loc.with_location (Why_parser.file Why_lexer.token) lb in
   decls
 
 let read_split s =
   let cin = open_in s in
   let lb = Lexing.from_channel cin in
   Loc.set_file s lb;
-  with_location (Why_parser.split_file Why_lexer.token) lb
+  Loc.with_location (Why_parser.split_file Why_lexer.token) lb
 
 
 
diff --git a/src/inputlang/dimacs_cnf/dimacs.mli b/src/inputlang/dimacs_cnf/dimacs.mli
new file mode 100644
index 000000000..01e191b38
--- /dev/null
+++ b/src/inputlang/dimacs_cnf/dimacs.mli
@@ -0,0 +1,27 @@
+(**************************************************************************)
+(*                                                                        *)
+(*  This file is part of Frama-C.                                         *)
+(*                                                                        *)
+(*  Copyright (C) 2013                                                    *)
+(*    CEA (Commissariat à l'énergie atomique et aux énergies              *)
+(*         alternatives)                                                  *)
+(*                                                                        *)
+(*  you can redistribute it and/or modify it under the terms of the GNU   *)
+(*  Lesser General Public License as published by the Free Software       *)
+(*  Foundation, version 2.1.                                              *)
+(*                                                                        *)
+(*  It is distributed in the hope that it will be useful,                 *)
+(*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *)
+(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *)
+(*  GNU Lesser General Public License for more details.                   *)
+(*                                                                        *)
+(*  See the GNU Lesser General Public License version 2.1                 *)
+(*  for more details (enclosed in the file licenses/LGPLv2.1).            *)
+(*                                                                        *)
+(**************************************************************************)
+
+type answer =
+| Sat
+| Unsat
+
+val check_file : string -> answer
diff --git a/src/inputlang/dimacs_cnf/dimacs.mll b/src/inputlang/dimacs_cnf/dimacs.mll
new file mode 100644
index 000000000..1194a52c8
--- /dev/null
+++ b/src/inputlang/dimacs_cnf/dimacs.mll
@@ -0,0 +1,125 @@
+(**************************************************************************)
+(*                                                                        *)
+(*  This file is part of Frama-C.                                         *)
+(*                                                                        *)
+(*  Copyright (C) 2013                                                    *)
+(*    CEA (Commissariat à l'énergie atomique et aux énergies              *)
+(*         alternatives)                                                  *)
+(*                                                                        *)
+(*  you can redistribute it and/or modify it under the terms of the GNU   *)
+(*  Lesser General Public License as published by the Free Software       *)
+(*  Foundation, version 2.1.                                              *)
+(*                                                                        *)
+(*  It is distributed in the hope that it will be useful,                 *)
+(*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *)
+(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *)
+(*  GNU Lesser General Public License for more details.                   *)
+(*                                                                        *)
+(*  See the GNU Lesser General Public License version 2.1                 *)
+(*  for more details (enclosed in the file licenses/LGPLv2.1).            *)
+(*                                                                        *)
+(**************************************************************************)
+
+{
+open Lexing
+
+type vars = Solver.Cl.t array
+let get_indice i =
+  if i > 0 then (i-1)*2
+  else if i < 0 then (-i-1)*2+1
+  else assert false
+
+let init_vars env nb_var =
+  let a = Array.create (nb_var*2) (Bool._true env) in
+  for i = nb_var downto -nb_var do
+    if i <> 0 then
+      if i > 0 then
+        a.(get_indice i) <- Uninterp.fresh env (Printf.sprintf "x%i" i)
+      else if i < 0 then
+        a.(get_indice i) <- Bool._not env a.(get_indice (-i))
+  done;
+  a
+
+
+let get_lit vars i =
+  let i = int_of_string i in
+  vars.(get_indice i)
+
+exception SyntaxError of string
+
+let syntax_error s = raise (SyntaxError s)
+
+let () = Exn_printer.register (fun fmt e -> match e with
+  | SyntaxError s -> Format.pp_print_string fmt s
+  | _ -> raise e)
+
+}
+
+let newline = '\n'
+let space = [' ' '\t' '\r']+
+let digit = ['0'-'9']
+let sign = '-' | '+'
+let integer = sign? digit+
+
+rule find_header = parse
+| newline  { Lexing.new_line lexbuf; find_header lexbuf }
+| space    { find_header lexbuf }
+| 'p'
+    space+ "cnf"
+    space+ (digit+ as nb_var)
+    space+ (digit+ as nb_cls) { int_of_string nb_var,
+                                int_of_string nb_cls }
+| 'c' [^'\n']* '\n'  { Lexing.new_line lexbuf; find_header lexbuf }
+| _
+      { syntax_error "Can't find header" }
+
+and clause vars acc = parse
+| newline  { Lexing.new_line lexbuf; clause vars acc lexbuf }
+| space { clause vars acc lexbuf }
+| '0'  { List.rev acc }
+| integer as i { clause vars ((get_lit vars i)::acc) lexbuf }
+| _ { syntax_error "Bad clause" }
+
+and file sched vars = parse
+| newline  { Lexing.new_line lexbuf; file sched vars lexbuf }
+| space { file sched vars lexbuf }
+| '0'  { file sched vars lexbuf }
+| integer as i { let cl = clause vars [get_lit vars i] lexbuf in
+                 let cl = Bool._or (Scheduler.get_t sched) cl in
+                 let d,stop = Scheduler.Scheduler.get_delayed sched in
+                 let cl = Solver.Delayed.propagate d cl in
+                 Bool.set_true d cl;
+                 stop ();
+                 file sched vars lexbuf
+               }
+| 'c' [^'\n']* ('\n' | eof)  { Lexing.new_line lexbuf; file sched vars lexbuf }
+| eof { () }
+| _ { syntax_error "Bad clauses" }
+
+{
+
+let parse sched s =
+  let cin = open_in s in
+  let lb = Lexing.from_channel cin in
+  Loc.set_file s lb;
+  Loc.with_location (fun lexbuf ->
+    let nb_vars, _ = find_header lexbuf in
+    let vars = init_vars (Scheduler.get_t sched) nb_vars in
+    file sched vars lexbuf) lb
+
+type answer =
+| Sat
+| Unsat
+
+let check_file filename =
+  let sched = Scheduler.new_env [Bool.propagate] () in
+  try
+    parse sched filename;
+    Scheduler.Scheduler.run_inf_step sched;
+    Sat
+  with Scheduler.Scheduler.Contradiction ->
+    Unsat
+
+
+}
+
diff --git a/src/util/loc.ml b/src/util/loc.ml
index bcf05235e..e04fda8e1 100644
--- a/src/util/loc.ml
+++ b/src/util/loc.ml
@@ -122,3 +122,11 @@ let () = Exn_printer.register
   | Parsing.Parse_error -> Format.fprintf fmt "syntax error"
   | _ -> raise exn)
 
+
+let loc lb = extract (Lexing.lexeme_start_p lb, Lexing.lexeme_end_p lb)
+
+let with_location f lb =
+  if Debug.test_flag Debug.stack_trace then f lb else
+    try f lb with
+    | Located _ as e -> raise e
+    | e -> raise (Located (loc lb, e))
diff --git a/src/util/loc.mli b/src/util/loc.mli
index 135027c25..f2b7fe0a2 100644
--- a/src/util/loc.mli
+++ b/src/util/loc.mli
@@ -56,3 +56,5 @@ val error: ?loc:position -> exn -> 'a
 exception Message of string
 
 val errorm: ?loc:position -> ('a, Format.formatter, unit, 'b) format4 -> 'a
+
+val with_location: (Lexing.lexbuf -> 'a) -> (Lexing.lexbuf -> 'a)
diff --git a/tests/dimacs/sat/par8-1-c.cnf b/tests/dimacs/sat/par8-1-c.cnf
new file mode 100644
index 000000000..90a8ddb21
--- /dev/null
+++ b/tests/dimacs/sat/par8-1-c.cnf
@@ -0,0 +1,530 @@
+c FILE:  par8-1-c.cnf
+c
+c SOURCE: James Crawford (jc@research.att.com)
+c
+c DESCRIPTION: Instance arises from the problem of learning the parity
+c              function.  
+c
+c     parxx-y denotes a parity problem on xx bits.  y is simply the
+c     intance number.
+c
+c     parxx-y-c denotes an instance identical to parxx-y except that
+c     the instances have been simplified (to create an equivalent
+c     problem). 
+c
+c NOTE: Satisfiable (checked for 8 and 16 size instances. All
+c       instances are satisfiable by construction)
+c
+c NOTE: Number of clauses corrected August 3, 1993
+c
+c Converted from tableau format Tue Aug  3 09:55:20 EDT 1993
+p cnf 64 254
+ -2 1
+ 0
+ -3 -2
+ 0
+ -3 -2 -1
+ 0
+ 3 2 -1
+ 0
+ -3 2 1
+ 0
+ 3 -2 1
+ 0
+ -4 2
+ 0
+ -5 -4
+ 0
+ -5 -4 -2
+ 0
+ 5 4 -2
+ 0
+ -5 4 2
+ 0
+ 5 -4 2
+ 0
+ -6 4
+ 0
+ -7 -6
+ 0
+ -7 -6 -4
+ 0
+ 7 6 -4
+ 0
+ -7 6 4
+ 0
+ 7 -6 4
+ 0
+ -8 6
+ 0
+ -9 -8
+ 0
+ -9 -8 -6
+ 0
+ 9 8 -6
+ 0
+ -9 8 6
+ 0
+ 9 -8 6
+ 0
+ -10 8
+ 0
+ -11 -10
+ 0
+ -11 -10 -8
+ 0
+ 11 10 -8
+ 0
+ -11 10 8
+ 0
+ 11 -10 8
+ 0
+ -12 10
+ 0
+ -13 -12
+ 0
+ -13 -12 -10
+ 0
+ 13 12 -10
+ 0
+ -13 12 10
+ 0
+ 13 -12 10
+ 0
+ -14 12
+ 0
+ -15 -14
+ 0
+ -15 -14 -12
+ 0
+ 15 14 -12
+ 0
+ -15 14 12
+ 0
+ 15 -14 12
+ 0
+ -16 14
+ 0
+ -17 -16
+ 0
+ -17 -16 -14
+ 0
+ 17 16 -14
+ 0
+ -17 16 14
+ 0
+ 17 -16 14
+ 0
+ -18 16
+ 0
+ -19 -18
+ 0
+ -19 -18 -16
+ 0
+ 19 18 -16
+ 0
+ -19 18 16
+ 0
+ 19 -18 16
+ 0
+ -20 18
+ 0
+ -21 -20
+ 0
+ -21 -20 -18
+ 0
+ 21 20 -18
+ 0
+ -21 20 18
+ 0
+ 21 -20 18
+ 0
+ -22 20
+ 0
+ -23 -22
+ 0
+ -23 -22 -20
+ 0
+ 23 22 -20
+ 0
+ -23 22 20
+ 0
+ 23 -22 20
+ 0
+ -24 22
+ 0
+ -25 -24
+ 0
+ -25 -24 -22
+ 0
+ 25 24 -22
+ 0
+ -25 24 22
+ 0
+ 25 -24 22
+ 0
+ -26 24
+ 0
+ -27 -26
+ 0
+ -27 -26 -24
+ 0
+ 27 26 -24
+ 0
+ -27 26 24
+ 0
+ 27 -26 24
+ 0
+ -28 26
+ 0
+ -29 -28
+ 0
+ -29 -28 -26
+ 0
+ 29 28 -26
+ 0
+ -29 28 26
+ 0
+ 29 -28 26
+ 0
+ 28 -30
+ 0
+ -31 -30
+ 0
+ -31 -28 -30
+ 0
+ 31 -28 30
+ 0
+ -31 28 30
+ 0
+ 31 28 -30
+ 0
+ -33 -32 -3
+ 0
+ 33 32 -3
+ 0
+ -33 32 3
+ 0
+ 33 -32 3
+ 0
+ -35 -34 -32
+ 0
+ 35 34 -32
+ 0
+ -35 34 32
+ 0
+ 35 -34 32
+ 0
+ -37 -34 36
+ 0
+ 37 -34 -36
+ 0
+ -37 34 -36
+ 0
+ 37 34 36
+ 0
+ -39 -38 -5
+ 0
+ 39 38 -5
+ 0
+ -39 38 5
+ 0
+ 39 -38 5
+ 0
+ -35 -40 -38
+ 0
+ 35 40 -38
+ 0
+ -35 40 38
+ 0
+ 35 -40 38
+ 0
+ -42 -41 -40
+ 0
+ 42 41 -40
+ 0
+ -42 41 40
+ 0
+ 42 -41 40
+ 0
+ -36 -41 43
+ 0
+ 36 -41 -43
+ 0
+ -36 41 -43
+ 0
+ 36 41 43
+ 0
+ -44 -7 29
+ 0
+ 44 -7 -29
+ 0
+ 44 7 29
+ 0
+ -44 7 -29
+ 0
+ -33 -45 -44
+ 0
+ 33 45 -44
+ 0
+ -33 45 44
+ 0
+ 33 -45 44
+ 0
+ -37 -36 -45
+ 0
+ 37 36 -45
+ 0
+ -37 36 45
+ 0
+ 37 -36 45
+ 0
+ -37 -46 -9
+ 0
+ 37 46 -9
+ 0
+ -37 46 9
+ 0
+ 37 -46 9
+ 0
+ -36 -43 -46
+ 0
+ 36 43 -46
+ 0
+ -36 43 46
+ 0
+ 36 -43 46
+ 0
+ -39 -47 -11
+ 0
+ 39 47 -11
+ 0
+ -39 47 11
+ 0
+ 39 -47 11
+ 0
+ -33 -48 -47
+ 0
+ 33 48 -47
+ 0
+ -33 48 47
+ 0
+ 33 -48 47
+ 0
+ -37 -36 -48
+ 0
+ 37 36 -48
+ 0
+ -37 36 48
+ 0
+ 37 -36 48
+ 0
+ -39 -49 -13
+ 0
+ 39 49 -13
+ 0
+ -39 49 13
+ 0
+ 39 -49 13
+ 0
+ -33 -36 -49
+ 0
+ 33 36 -49
+ 0
+ -33 36 49
+ 0
+ 33 -36 49
+ 0
+ -50 -15 29
+ 0
+ 50 -15 -29
+ 0
+ 50 15 29
+ 0
+ -50 15 -29
+ 0
+ -35 -37 -50
+ 0
+ 35 37 -50
+ 0
+ -35 37 50
+ 0
+ 35 -37 50
+ 0
+ -39 -35 -17
+ 0
+ 39 35 -17
+ 0
+ -39 35 17
+ 0
+ 39 -35 17
+ 0
+ -39 -51 -19
+ 0
+ 39 51 -19
+ 0
+ -39 51 19
+ 0
+ 39 -51 19
+ 0
+ -35 -52 -51
+ 0
+ 35 52 -51
+ 0
+ -35 52 51
+ 0
+ 35 -52 51
+ 0
+ -37 -52 42
+ 0
+ 37 -52 -42
+ 0
+ -37 52 -42
+ 0
+ 37 52 42
+ 0
+ -53 -21 29
+ 0
+ 53 -21 -29
+ 0
+ 53 21 29
+ 0
+ -53 21 -29
+ 0
+ -33 -54 -53
+ 0
+ 33 54 -53
+ 0
+ -33 54 53
+ 0
+ 33 -54 53
+ 0
+ -35 -54 42
+ 0
+ 35 -54 -42
+ 0
+ -35 54 -42
+ 0
+ 35 54 42
+ 0
+ -33 -23 42
+ 0
+ 33 -23 -42
+ 0
+ -33 23 -42
+ 0
+ 33 23 42
+ 0
+ -55 -25 29
+ 0
+ 55 -25 -29
+ 0
+ 55 25 29
+ 0
+ -55 25 -29
+ 0
+ -33 -56 -55
+ 0
+ 33 56 -55
+ 0
+ -33 56 55
+ 0
+ 33 -56 55
+ 0
+ -35 -56 36
+ 0
+ 35 -56 -36
+ 0
+ -35 56 -36
+ 0
+ 35 56 36
+ 0
+ -39 -57 -27
+ 0
+ 39 57 -27
+ 0
+ -39 57 27
+ 0
+ 39 -57 27
+ 0
+ -58 -57 29
+ 0
+ 58 -57 -29
+ 0
+ 58 57 29
+ 0
+ -58 57 -29
+ 0
+ -35 -59 -58
+ 0
+ 35 59 -58
+ 0
+ -35 59 58
+ 0
+ 35 -59 58
+ 0
+ -37 -59 -36
+ 0
+ 37 -59 36
+ 0
+ -37 59 36
+ 0
+ 37 59 -36
+ 0
+ -37 -60 -31
+ 0
+ 37 60 -31
+ 0
+ -37 60 31
+ 0
+ 37 -60 31
+ 0
+ -42 -61 -60
+ 0
+ 42 61 -60
+ 0
+ -42 61 60
+ 0
+ 42 -61 60
+ 0
+ -36 -61 43
+ 0
+ 36 -61 -43
+ 0
+ -36 61 -43
+ 0
+ 36 61 43
+ 0
+ -39 -62 -30
+ 0
+ 39 62 -30
+ 0
+ -39 62 30
+ 0
+ 39 -62 30
+ 0
+ -33 -63 -62
+ 0
+ 33 63 -62
+ 0
+ -33 63 62
+ 0
+ 33 -63 62
+ 0
+ -42 -64 -63
+ 0
+ 42 64 -63
+ 0
+ -42 64 63
+ 0
+ 42 -64 63
+ 0
+ -36 -64 -43
+ 0
+ 36 -64 43
+ 0
+ -36 64 43
+ 0
+ 36 64 -43
+ 0
+
-- 
GitLab