From c89894a010415f33673c8df67b53bdf7f3400c50 Mon Sep 17 00:00:00 2001
From: Andre Maroneze <andre.maroneze@cea.fr>
Date: Mon, 25 Mar 2024 09:34:43 +0100
Subject: [PATCH] [Libc] add stub and tests for vfscanf

---
 share/libc/stdio.c                         | 269 ++++++++++++++++++++-
 src/plugins/eva/utils/library_functions.ml |   2 +
 tests/libc/stdio_c.c                       |  26 ++
 tests/libc/stdio_h.c                       |  17 +-
 4 files changed, 312 insertions(+), 2 deletions(-)

diff --git a/share/libc/stdio.c b/share/libc/stdio.c
index e98bd53400d..5db9ebdf4af 100644
--- a/share/libc/stdio.c
+++ b/share/libc/stdio.c
@@ -21,12 +21,13 @@
 /**************************************************************************/
 
 #include "__fc_builtin.h"
+#include "errno.h"
+#include "float.h" // for DBL_MAX
 #include "stdbool.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "stdint.h" // for SIZE_MAX
 #include "sys/types.h" // for ssize_t
-#include "errno.h"
 __PUSH_FC_STDLIB
 
 FILE __fc_initial_stdout = {.__fc_FILE_id=1}; 
@@ -243,4 +244,270 @@ FILE *fmemopen(void *restrict buf, size_t size,
   return &__fc_fopen[Frama_C_interval(0, __FC_FOPEN_MAX-1)];
 }
 
+#include "stdarg.h"
+
+enum length_modifier {
+  NONE, HH, H, L, LL, J, Z, T, UPPER_L
+};
+
+int vfscanf(FILE * restrict stream, const char * restrict format, va_list arg) {
+  char *p = format;
+  char conversion_counter = 0;
+  while (*p) {
+    if (*p == '%') {
+      enum length_modifier lm = NONE;
+      char asterisks = 0;
+      p++;
+      if (*p == '%') {
+        break;
+      }
+      // skip any flags
+      while (1) {
+        switch (*p) {
+        case '-':
+        case '+':
+        case ' ':
+        case '#':
+        case '0':
+          break;
+        default:
+          goto post_flags;
+        }
+        p++;
+      }
+    post_flags:
+      // skip field width
+      while (*p >= '0' && *p <= '9') {
+        p++;
+      }
+      // special field width
+      if (*p == '*') {
+        asterisks++;
+        p++;
+      }
+      if (*p == '.') {
+        // skip precision
+        p++;
+        while (*p >= '0' && *p <= '9') {
+          p++;
+        }
+        // special precision
+        if (*p == '*') {
+          asterisks++;
+          p++;
+        }
+      }
+      // length modifier
+      switch (*p) {
+      case 'h':
+        p++;
+        if (*p == 'h') {
+          p++;
+          lm = HH;
+        } else {
+          lm = H;
+        }
+        break;
+      case 'l':
+        p++;
+        if (*p == 'l') {
+          p++;
+          lm = LL;
+        } else {
+          lm = L;
+        }
+        break;
+      case 'j':
+        p++;
+        lm = J;
+        break;
+      case 'z':
+        p++;
+        lm = Z;
+        break;
+      case 't':
+        p++;
+        lm = T;
+        break;
+      case 'L':
+        p++;
+        lm = UPPER_L;
+        break;
+      }
+      // read asterisks
+      while (asterisks) {
+        // reading the arguments ensures that initialization errors are detected
+        int ignored = va_arg(arg, int);
+        (void)(ignored); // avoid GCC warning about unused variable
+        asterisks--;
+      }
+      // conversion specifier
+      switch (*p) {
+      case 'd':
+      case 'i':
+        switch (lm) {
+        case NONE:
+          *va_arg(arg, int*) = Frama_C_interval(INT_MIN, INT_MAX);
+          break;
+        case HH:
+          *va_arg(arg, char*) = Frama_C_char_interval(CHAR_MIN, CHAR_MAX);
+          break;
+        case H:
+          *va_arg(arg, short*) = Frama_C_short_interval(SHRT_MIN, SHRT_MAX);
+          break;
+        case L:
+          *va_arg(arg, long*) = Frama_C_long_interval(LONG_MIN, LONG_MAX);
+          break;
+        case LL:
+        case UPPER_L: // 'Ld' is not in ISO C, but GCC/Clang treat it like 'lld'
+          *va_arg(arg, long long*) =
+            Frama_C_long_long_interval(LLONG_MIN, LLONG_MAX);
+          break;
+        case J:
+          *va_arg(arg, intmax_t*) =
+            Frama_C_intmax_t_interval(INTMAX_MIN, INTMAX_MAX);
+          break;
+        case Z:
+          *va_arg(arg, size_t*) = Frama_C_size_t_interval(0, SIZE_MAX);
+          break;
+        case T:
+          *va_arg(arg, ptrdiff_t*) =
+            Frama_C_ptrdiff_t_interval(PTRDIFF_MIN, PTRDIFF_MAX);
+          break;
+        }
+        break;
+      case 'o':
+      case 'u':
+      case 'x':
+      case 'X':
+        switch (lm) {
+        case NONE:
+          *va_arg(arg, unsigned*) =
+            Frama_C_unsigned_int_interval(0, UINT_MAX);
+          break;
+        case HH:
+          *va_arg(arg, unsigned char*) =
+            Frama_C_unsigned_char_interval(0, UCHAR_MAX);
+          break;
+        case H:
+          *va_arg(arg, unsigned short*) =
+            Frama_C_unsigned_short_interval(0, USHRT_MAX);
+          break;
+        case L:
+          *va_arg(arg, unsigned long*) =
+            Frama_C_unsigned_long_interval(0, ULONG_MAX);
+          break;
+        case LL:
+        case UPPER_L: // 'Ld' is not in ISO C, but GCC/Clang treat it like 'lld'
+          *va_arg(arg, unsigned long long*) =
+            Frama_C_unsigned_long_long_interval(0, ULLONG_MAX);
+          break;
+        case J:
+          *va_arg(arg, uintmax_t*) = Frama_C_uintmax_t_interval(0, UINTMAX_MAX);
+          break;
+        case Z:
+          *va_arg(arg, size_t*) = Frama_C_size_t_interval(0, SIZE_MAX);
+          break;
+        case T:
+          *va_arg(arg, ptrdiff_t*) =
+            Frama_C_ptrdiff_t_interval(PTRDIFF_MIN, PTRDIFF_MAX);
+          break;
+        }
+        break;
+      case 'f':
+      case 'F':
+      case 'e':
+      case 'E':
+      case 'g':
+      case 'G':
+      case 'a':
+      case 'A':
+        switch (lm) {
+        case NONE:
+        case L:
+          // no effect
+          *va_arg(arg, double*) = Frama_C_double_interval(-DBL_MAX, DBL_MAX);
+          break;
+        case UPPER_L:
+          *va_arg(arg, long double*) =
+            Frama_C_long_double_interval(-LDBL_MAX, LDBL_MAX);
+          break;
+        default:
+          // Undefined behavior
+          //@ assert invalid_scanf_specifier: \false;
+          ;
+        }
+        break;
+      case 'c':
+        switch (lm) {
+        case NONE:
+          *va_arg(arg, char*) = Frama_C_char_interval(CHAR_MIN, CHAR_MAX);
+          break;
+        case L:
+          *va_arg(arg, wint_t*) = Frama_C_wint_t_interval(WINT_MIN, WINT_MAX);
+        default:
+          // Undefined behavior
+          //@ assert invalid_scanf_specifier: \false;
+          ;
+        }
+        break;
+      case 's':
+        switch (lm) {
+        case NONE:
+          // TODO: take into account field width
+          Frama_C_make_unknown(va_arg(arg, char*),
+                               Frama_C_size_t_interval(0, SIZE_MAX));
+          break;
+        case L:
+          // TODO: take into account field width
+          Frama_C_make_unknown_wchar(va_arg(arg, wchar_t*),
+                                     Frama_C_size_t_interval(0, SIZE_MAX/sizeof(wchar_t)));
+        default:
+          // Undefined behavior
+          //@ assert invalid_scanf_specifier: \false;
+          ;
+        }
+        break;
+      case 'n':
+        switch (lm) {
+        case NONE:
+          *va_arg(arg, int*) = conversion_counter;
+          break;
+        case HH:
+          *va_arg(arg, char*) = conversion_counter;
+          break;
+        case H:
+          *va_arg(arg, short*) = conversion_counter;
+          break;
+        case L:
+          *va_arg(arg, long*) = conversion_counter;
+          break;
+        case LL:
+        case UPPER_L: // 'Ld' is not in ISO C, but GCC/Clang treat it like 'lld'
+          *va_arg(arg, long long*) = conversion_counter;
+          break;
+        case J:
+          *va_arg(arg, intmax_t*) = conversion_counter;
+          break;
+        case Z:
+          *va_arg(arg, size_t*) = conversion_counter;
+          break;
+        case T:
+          *va_arg(arg, ptrdiff_t*) = conversion_counter;
+          break;
+        }
+        break;
+        //TODO
+      }
+      conversion_counter++;
+    }
+    p++;
+  }
+  return conversion_counter;
+}
+
+int vscanf(const char * restrict format, va_list arg) {
+  return vfscanf(__fc_stdin, format, arg);
+}
+
 __POP_FC_STDLIB
diff --git a/src/plugins/eva/utils/library_functions.ml b/src/plugins/eva/utils/library_functions.ml
index 96e112d45f1..98e63c0048c 100644
--- a/src/plugins/eva/utils/library_functions.ml
+++ b/src/plugins/eva/utils/library_functions.ml
@@ -91,6 +91,8 @@ let unsupported_specifications =
     "strndup", "string.c";
     "unsetenv", "stdlib.c";
     "vasprintf", "stdio.c";
+    "vfscanf", "stdio.c";
+    "vscanf", "stdio.c";
     "wcsdup", "wchar.c";
   ]
 
diff --git a/tests/libc/stdio_c.c b/tests/libc/stdio_c.c
index 8e3b6b209c7..c52eb395c76 100644
--- a/tests/libc/stdio_c.c
+++ b/tests/libc/stdio_c.c
@@ -2,7 +2,11 @@
 #include <string.h>
 #include "stdio.c"
 #include "__fc_builtin.h"
+
 volatile int nondet;
+
+int caller_stub_for_vscanf(const char * restrict format, ...);
+
 int main() {
   FILE *stream;
   char *line = NULL;
@@ -52,5 +56,27 @@ int main() {
     //@ assert at_least_one_char: \initialized(&buf[0]);
   }
 
+  int vscanf_d;
+  char vscanf_c;
+  long double vscanf_Ld;
+  char vscanf_s[30];
+  ptrdiff_t vscanf_t;
+  intmax_t vscanf_j;
+  size_t vscanf_z;
+  int vscanf_res = caller_stub_for_vscanf("%+d %-2c % 41.999Lf %s %ti %jx %zu", &vscanf_d, &vscanf_c, &vscanf_Ld, vscanf_s, &vscanf_t, &vscanf_j, &vscanf_z);
+  if (vscanf_res == 4) {
+    //@ check \initialized(&vscanf_d);
+    //@ check \initialized(&vscanf_s);
+    Frama_C_show_each_must_be_reachable(vscanf_d, &vscanf_c, &vscanf_Ld, vscanf_s, vscanf_t, vscanf_j, vscanf_z);
+  }
+
   return 0;
 }
+
+int caller_stub_for_vscanf(const char * restrict format, ...) {
+  va_list args;
+  va_start(args, format);
+  int res = vscanf(format, args);
+  va_end(args);
+  return res;
+}
diff --git a/tests/libc/stdio_h.c b/tests/libc/stdio_h.c
index 6c3805c0503..d3b7ff77bfa 100644
--- a/tests/libc/stdio_h.c
+++ b/tests/libc/stdio_h.c
@@ -2,8 +2,8 @@
 #include <stdlib.h>
 #include "__fc_builtin.h"
 volatile int nondet;
-
 int caller_stub_for_vasprintf(char **strp, const char *fmt, ...);
+int caller_stub_for_vscanf(const char * restrict format, ...);
 
 int main() {
   FILE *f = fopen("/dev/urandom", "r");
@@ -74,6 +74,13 @@ int main() {
 
   fmemopen(0, 1, "w+"); // test to check that Eva emits warning about stdio.c
 
+  char vscanf_s[10];
+  char vscanf_d;
+  r = caller_stub_for_vscanf("%d %s", &vscanf_d, vscanf_s);
+  if (r) {
+    //@ assert \initialized(&vscanf_d);
+    Frama_C_show_each_must_be_reachable(vscanf_d);
+  }
   return 0;
 }
 
@@ -85,3 +92,11 @@ int caller_stub_for_vasprintf(char **strp, const char *fmt, ...) {
   va_end(args);
   return res;
 }
+
+int caller_stub_for_vscanf(const char * restrict format, ...) {
+  va_list args;
+  va_start(args, format);
+  int res = vscanf(format, args);
+  va_end(args);
+  return res;
+}
-- 
GitLab