diff --git a/src/kernel_internals/parsing/logic_preprocess.mll b/src/kernel_internals/parsing/logic_preprocess.mll index 34ea01b4d6bda1c8a16eb82d54425fc1b82b90ef..765d158daf4a966e8c609f7a6f0e56426b143a7e 100644 --- a/src/kernel_internals/parsing/logic_preprocess.mll +++ b/src/kernel_internals/parsing/logic_preprocess.mll @@ -62,9 +62,30 @@ let backslash = "__ANNOT_BACKSLASH__" let annot_content = "__ANNOT_CONTENT__" + let utf8_prefix = "__FC_UTF8_" + + let encode_utf8 c = utf8_prefix ^ (string_of_int (Char.code c)) let re_backslash = Str.regexp_string backslash let re_annot_content = Str.regexp_string annot_content + let re_utf8 = Str.regexp (utf8_prefix ^ "\\([0-9]+\\)") + + let decode_utf8 s = + let res = ref s in + let start = ref 0 in + try + while true do + let b = Str.search_forward re_utf8 !res !start in + let e = Str.match_end () in + let chr = Char.chr (int_of_string (Str.matched_group 1 !res)) in + let buf = Bytes.of_string !res in + Bytes.set buf b chr; + Bytes.blit buf e buf (b+1) (String.length !res - e); + res:= Bytes.sub_string buf 0 (String.length !res + 1 + b - e); + start := b+1; + done; + assert false; + with Not_found -> !res (* Delimiters for the various annotations in the preprocessing buffer. We have one delimiter for the beginning of an annotation (to discard @@ -115,7 +136,7 @@ ignore_content (); ignore (input_line file); (* ignore the #line directive *) let with_nl, content = get_annot true in - with_nl, replace_backslash content + with_nl, decode_utf8 @@ replace_backslash content with End_of_file -> Kernel.fatal "too few annotations in result file while pre-processing annotations" @@ -184,6 +205,8 @@ add_preprocess_line_info() } +let utf8 = ['\128'-'\255'] + rule main = parse | ("#define"|"#undef") [' ''\t']* ((['a'-'z''A'-'Z''0'-'9''_'])* as m) { @@ -388,6 +411,10 @@ and annot = parse is_newline:=CHAR; Buffer.add_char preprocess_buffer '"'; string annot lexbuf } + | utf8 as c { + Buffer.add_string preprocess_buffer (encode_utf8 c); + annot lexbuf + } | _ as c { is_newline := CHAR; Buffer.add_char preprocess_buffer c; annot lexbuf } diff --git a/src/plugins/markdown-report/tests/sarif/oracle/with-libc.sarif b/src/plugins/markdown-report/tests/sarif/oracle/with-libc.sarif index 999ef8e57b7c0cf38f6383d4b549ebdd8f0b1b84..75cbdcd725df74bc9cf5732be0bcd534ed041e59 100644 --- a/src/plugins/markdown-report/tests/sarif/oracle/with-libc.sarif +++ b/src/plugins/markdown-report/tests/sarif/oracle/with-libc.sarif @@ -516,8 +516,8 @@ "startLine": 168, "startColumn": 4, "endLine": 170, - "endColumn": 82, - "byteLength": 146 + "endColumn": 75, + "byteLength": 132 } } } @@ -587,8 +587,8 @@ "startLine": 240, "startColumn": 4, "endLine": 244, - "endColumn": 70, - "byteLength": 180 + "endColumn": 63, + "byteLength": 173 } } } @@ -1151,7 +1151,7 @@ "startColumn": 4, "endLine": 121, "endColumn": 77, - "byteLength": 155 + "byteLength": 148 } } } @@ -1266,7 +1266,7 @@ "startColumn": 4, "endLine": 125, "endColumn": 77, - "byteLength": 156 + "byteLength": 149 } } } @@ -1455,7 +1455,7 @@ "startColumn": 4, "endLine": 109, "endColumn": 59, - "byteLength": 125 + "byteLength": 118 } } } @@ -1992,8 +1992,8 @@ "startLine": 89, "startColumn": 4, "endLine": 90, - "endColumn": 79, - "byteLength": 108 + "endColumn": 72, + "byteLength": 101 } } } @@ -2202,8 +2202,8 @@ "startLine": 143, "startColumn": 4, "endLine": 147, - "endColumn": 70, - "byteLength": 177 + "endColumn": 63, + "byteLength": 170 } } } @@ -2439,7 +2439,7 @@ "startColumn": 4, "endLine": 59, "endColumn": 62, - "byteLength": 148 + "byteLength": 134 } } } @@ -2788,7 +2788,7 @@ "startColumn": 4, "endLine": 185, "endColumn": 63, - "byteLength": 150 + "byteLength": 143 } } } @@ -3563,7 +3563,7 @@ "startColumn": 4, "endLine": 202, "endColumn": 22, - "byteLength": 120 + "byteLength": 113 } } } @@ -3655,7 +3655,7 @@ "startColumn": 4, "endLine": 269, "endColumn": 29, - "byteLength": 167 + "byteLength": 153 } } } @@ -3958,7 +3958,7 @@ "startColumn": 4, "endLine": 97, "endColumn": 58, - "byteLength": 127 + "byteLength": 120 } } } @@ -4431,7 +4431,7 @@ "startColumn": 4, "endLine": 59, "endColumn": 62, - "byteLength": 148 + "byteLength": 134 } } } @@ -4826,7 +4826,7 @@ "startColumn": 4, "endLine": 105, "endColumn": 51, - "byteLength": 115 + "byteLength": 108 } } } @@ -4849,7 +4849,7 @@ "startColumn": 4, "endLine": 135, "endColumn": 38, - "byteLength": 192 + "byteLength": 185 } } } @@ -5078,8 +5078,8 @@ "startLine": 68, "startColumn": 4, "endLine": 70, - "endColumn": 70, - "byteLength": 156 + "endColumn": 63, + "byteLength": 135 } } } @@ -5269,8 +5269,8 @@ "startLine": 252, "startColumn": 4, "endLine": 256, - "endColumn": 60, - "byteLength": 208 + "endColumn": 53, + "byteLength": 194 } } } @@ -5364,7 +5364,7 @@ "startColumn": 4, "endLine": 269, "endColumn": 29, - "byteLength": 167 + "byteLength": 153 } } } @@ -5573,7 +5573,7 @@ "startColumn": 4, "endLine": 101, "endColumn": 59, - "byteLength": 124 + "byteLength": 117 } } } @@ -5739,7 +5739,7 @@ "startColumn": 4, "endLine": 113, "endColumn": 51, - "byteLength": 118 + "byteLength": 111 } } } @@ -5948,7 +5948,7 @@ "startColumn": 4, "endLine": 117, "endColumn": 62, - "byteLength": 157 + "byteLength": 143 } } } @@ -6636,7 +6636,7 @@ "startColumn": 4, "endLine": 130, "endColumn": 38, - "byteLength": 191 + "byteLength": 184 } } } @@ -6731,8 +6731,8 @@ "startLine": 39, "startColumn": 4, "endLine": 42, - "endColumn": 77, - "byteLength": 184 + "endColumn": 70, + "byteLength": 170 } } } @@ -6800,8 +6800,8 @@ "startLine": 155, "startColumn": 4, "endLine": 159, - "endColumn": 60, - "byteLength": 205 + "endColumn": 53, + "byteLength": 191 } } } @@ -7177,8 +7177,8 @@ "startLine": 143, "startColumn": 4, "endLine": 147, - "endColumn": 70, - "byteLength": 177 + "endColumn": 63, + "byteLength": 170 } } } @@ -7246,8 +7246,8 @@ "startLine": 240, "startColumn": 4, "endLine": 244, - "endColumn": 70, - "byteLength": 180 + "endColumn": 63, + "byteLength": 173 } } } @@ -7362,7 +7362,7 @@ "startColumn": 4, "endLine": 82, "endColumn": 40, - "byteLength": 183 + "byteLength": 169 } } } @@ -7500,7 +7500,7 @@ "startColumn": 4, "endLine": 135, "endColumn": 38, - "byteLength": 192 + "byteLength": 185 } } } @@ -7592,7 +7592,7 @@ "startColumn": 4, "endLine": 197, "endColumn": 41, - "byteLength": 188 + "byteLength": 174 } } } @@ -7614,8 +7614,8 @@ "startLine": 155, "startColumn": 4, "endLine": 159, - "endColumn": 60, - "byteLength": 205 + "endColumn": 53, + "byteLength": 191 } } } @@ -8124,8 +8124,8 @@ "startLine": 252, "startColumn": 4, "endLine": 256, - "endColumn": 60, - "byteLength": 208 + "endColumn": 53, + "byteLength": 194 } } } @@ -8267,7 +8267,7 @@ "startColumn": 4, "endLine": 185, "endColumn": 63, - "byteLength": 150 + "byteLength": 143 } } } @@ -8595,8 +8595,8 @@ "startLine": 39, "startColumn": 4, "endLine": 42, - "endColumn": 77, - "byteLength": 184 + "endColumn": 70, + "byteLength": 170 } } } @@ -8738,7 +8738,7 @@ "startColumn": 4, "endLine": 87, "endColumn": 22, - "byteLength": 116 + "byteLength": 109 } } } @@ -8829,8 +8829,8 @@ "startLine": 68, "startColumn": 4, "endLine": 70, - "endColumn": 70, - "byteLength": 156 + "endColumn": 63, + "byteLength": 135 } } } @@ -9538,8 +9538,8 @@ "startLine": 168, "startColumn": 4, "endLine": 170, - "endColumn": 82, - "byteLength": 146 + "endColumn": 75, + "byteLength": 132 } } }