Skip to content
Snippets Groups Projects
Commit cdfa79ec authored by Patrick Baudin's avatar Patrick Baudin
Browse files

Merge branch 'feature/lint/get-non-utf8-position' into 'master'

[lint] get non utf8 position

See merge request frama-c/frama-c!4064
parents 2af7c8fd 8f033f52
No related branches found
No related tags found
No related merge requests found
......@@ -346,6 +346,8 @@ README* header_spec=.ignore
/src/libraries/utils/utf8_logic.ml header_spec=CEA_INRIA_LGPL
/src/libraries/utils/utf8_logic.mli header_spec=CEA_INRIA_LGPL
/tools/lint/UTF8.ml header_spec=MODIFIED_CAMOMILE
#########################
# HEADER_SPEC: CEA_LGPL #
#########################
......
This file was originally part of Camomile library.
Copyright (C) 2002, 2003 Yamagata Yoriyuki.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
As a special exception to the GNU Library General Public License, you
may link, statically or dynamically, a "work that uses this library"
with a publicly distributed version of this library to produce an
executable file containing portions of this library, and distribute
that executable file under terms of your choice, without any of the
additional requirements listed in clause 6 of the GNU Library General
Public License. By "a publicly distributed version of this library",
we mean either the unmodified Library as distributed by the authors,
or a modified version of this library that is distributed under the
conditions defined in clause 3 of the GNU Library General Public
License. This exception does not however invalidate any other reasons
why the executable file might be covered by the GNU Library General
Public License .
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA
You can contact the authour by sending email to
yoriyuki.y@gmail.com
File modified by CEA (Commissariat à l'énergie atomique et aux
énergies alternatives).
This file was originally part of Camomile library.
Copyright (C) 2002, 2003 Yamagata Yoriyuki.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
As a special exception to the GNU Library General Public License, you
may link, statically or dynamically, a "work that uses this library"
with a publicly distributed version of this library to produce an
executable file containing portions of this library, and distribute
that executable file under terms of your choice, without any of the
additional requirements listed in clause 6 of the GNU Library General
Public License. By "a publicly distributed version of this library",
we mean either the unmodified Library as distributed by the authors,
or a modified version of this library that is distributed under the
conditions defined in clause 3 of the GNU Library General Public
License. This exception does not however invalidate any other reasons
why the executable file might be covered by the GNU Library General
Public License .
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA
You can contact the authour by sending email to
yoriyuki.y@gmail.com
File modified by CEA (Commissariat à l'énergie atomique et aux
énergies alternatives).
(***************************************************************************)
(* This file was originally part of Camomile library. *)
(* *)
(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)
(* *)
(* This library is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Lesser General Public License *)
(* as published by the Free Software Foundation; either version 2 of *)
(* the License, or (at your option) any later version. *)
(* *)
(* As a special exception to the GNU Library General Public License, you *)
(* may link, statically or dynamically, a "work that uses this library" *)
(* with a publicly distributed version of this library to produce an *)
(* executable file containing portions of this library, and distribute *)
(* that executable file under terms of your choice, without any of the *)
(* additional requirements listed in clause 6 of the GNU Library General *)
(* Public License. By "a publicly distributed version of this library", *)
(* we mean either the unmodified Library as distributed by the authors, *)
(* or a modified version of this library that is distributed under the *)
(* conditions defined in clause 3 of the GNU Library General Public *)
(* License. This exception does not however invalidate any other reasons *)
(* why the executable file might be covered by the GNU Library General *)
(* Public License . *)
(* *)
(* This library is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)
(* Lesser General Public License for more details. *)
(* *)
(* You should have received a copy of the GNU Lesser General Public *)
(* License along with this library; if not, write to the Free Software *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA *)
(* *)
(* You can contact the authour by sending email to *)
(* yoriyuki.y@gmail.com *)
(* *)
(* *)
(* File modified by CEA (Commissariat à l'énergie atomique et aux *)
(* énergies alternatives). *)
(***************************************************************************)
(* Function extracted from Camomile library and modified by CEA to get the
first position of malformed UTF-8 string *)
let validate content =
let exception Malformed_code of (int*int)*int in
let len = String.length content in
let rec trail lp c i a =
if c = 0 then a else
if i >= len then raise (Malformed_code (lp,(i-1))) else
let n = Char.code (String.unsafe_get content i) in
if n < 0x80 || n >= 0xc0 then raise (Malformed_code (lp,(i-1))) else
trail lp (c - 1) (i + 1) (a lsl 6 lor (n - 0x80)) in
let rec main ((l,_) as lp) i =
if i >= len then () else
let n = Char.code (String.unsafe_get content i) in
let lp = if n = Char.code '\n' then l+1,i else lp in
if n < 0x80 then main lp (i + 1) else
if n < 0xc2 then raise (Malformed_code (lp,i)) else
if n <= 0xdf then
if trail lp 1 (i + 1) (n - 0xc0) < 0x80 then raise (Malformed_code (lp,i)) else
main lp (i + 2)
else if n <= 0xef then
if trail lp 2 (i + 1) (n - 0xe0) < 0x800 then raise (Malformed_code (lp,i)) else
main lp (i + 3)
else if n <= 0xf7 then
if trail lp 3 (i + 1) (n - 0xf0) < 0x10000 then raise (Malformed_code (lp,i)) else
main lp (i + 4)
else if n <= 0xfb then
if trail lp 4 (i + 1) (n - 0xf8) < 0x200000 then raise (Malformed_code (lp,i)) else
main lp (i + 5)
else if n <= 0xfd then
let n = trail lp 5 (i + 1) (n - 0xfc) in
if n lsr 16 < 0x400 then raise (Malformed_code (lp,i)) else
main lp (i + 6)
else raise (Malformed_code (lp,i))
in
try
main (1,0) 0 ; None
with Malformed_code ((l,p),i) -> Some (l,p,i)
(**************************************************************************)
(**************************************************************************)
(* *)
(* This file is part of Frama-C. *)
(* *)
(* Copyright (C) 2007-2022 *)
(* CEA (Commissariat à l'énergie atomique et aux énergies *)
(* alternatives) *)
(* *)
(* you can redistribute it and/or modify it under the terms of the GNU *)
(* Lesser General Public License as published by the Free Software *)
(* Foundation, version 2.1. *)
(* *)
(* It is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)
(* GNU Lesser General Public License for more details. *)
(* *)
(* See the GNU Lesser General Public License version 2.1 *)
(* for more details (enclosed in the file licenses/LGPLv2.1). *)
(* *)
(**************************************************************************)
(** [validate s] returns
- [None] when [s] is UTF-8 compliant
- [Some (line,byte_pos_line,byte_pos_error)] otherwise.
note: the first error is at the just after the byte located at
position [byte_pos_error] (that is also just after the byte
[byte_pos_error-byte_pos_line] of the line number [line]).
*)
val validate: string -> (int * int * int) option
......@@ -23,6 +23,6 @@
(executable
(public_name frama-c-lint)
(name lint)
(modules lint)
(libraries unix camomile ocp-indent.lexer ocp-indent.lib ocp-indent.dynlink)
(modules lint UTF8)
(libraries unix ocp-indent.lexer ocp-indent.lib ocp-indent.dynlink)
)
......@@ -20,8 +20,6 @@
(* *)
(**************************************************************************)
open CamomileLibrary
(**************************************************************************)
(* Warning/Error *)
......@@ -174,12 +172,6 @@ let rec collect = function
(**************************************************************************)
(* Functions used to check lint *)
(* UTF8 *)
let is_utf8 content =
try UTF8.validate (Bytes.to_string content) ; true
with UTF8.Malformed_code -> false
(* Syntax *)
let check_syntax ~update content =
......@@ -333,8 +325,8 @@ let check ~verbose ~update file params =
close_in in_chan ;
(* UTF8 *)
if params.utf8 then
if not @@ is_utf8 content then
error "Bad encoding (not UTF8) for %s@." file ;
Option.iter (fun (line,p,i) -> error "Bad encoding (not UTF8) for %s:%i:%i@." file line (i-p))
(UTF8.validate (Bytes.to_string content)) ;
(* Blanks *)
let rewrite = ref false in
let syntactic_check checker content message =
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment