diff --git a/share/analysis-scripts/build.py b/share/analysis-scripts/build.py index 6e31678116717c083d215b0c33c5ab648965ba0c..20978b56624c4fad8d681d96dec96764b02ff9a7 100755 --- a/share/analysis-scripts/build.py +++ b/share/analysis-scripts/build.py @@ -191,8 +191,11 @@ def copy_fc_stubs(): # returns the line number where a likely definition for [funcname] was found, # or None otherwise def find_definitions(funcname, filename): - newlines = function_finder.compute_newline_offsets(filename) - defs = function_finder.find_definitions_and_declarations(True, False, filename, newlines) + with open(filename, encoding="ascii", errors='ignore') as data: + file_content = data.read() + file_lines = file_content.splitlines(keepends=True) + newlines = function_finder.compute_newline_offsets(file_lines) + defs = function_finder.find_definitions_and_declarations(True, False, filename, file_content, file_lines, newlines) defs = [d for d in defs if d[0] == funcname] return [d[2] for d in defs] diff --git a/share/analysis-scripts/build_callgraph.py b/share/analysis-scripts/build_callgraph.py index b53e0d33bb133f15413b096c368f0cc8701d3df2..bfac7c709596c4273a30e712cbdbd43b07453558 100755 --- a/share/analysis-scripts/build_callgraph.py +++ b/share/analysis-scripts/build_callgraph.py @@ -82,9 +82,12 @@ def compute(files): cg = Callgraph() for f in files: #print(f"Processing {os.path.relpath(f)}...") - newlines = function_finder.compute_newline_offsets(f) - defs = function_finder.find_definitions_and_declarations(True, False, f, newlines) - calls = function_finder.find_calls(f, newlines) + with open(f, encoding="ascii", errors='ignore') as data: + file_content = data.read() + file_lines = file_content.splitlines(keepends=True) + newlines = function_finder.compute_newline_offsets(file_lines) + defs = function_finder.find_definitions_and_declarations(True, False, f, file_content, file_lines, newlines) + calls = function_finder.find_calls(file_content, newlines) for call in calls: caller = function_finder.find_caller(defs, call) if caller: diff --git a/share/analysis-scripts/function_finder.py b/share/analysis-scripts/function_finder.py index e68f15ee479361b58be8bbea9284b2538b0130e5..ab04c68afa9f1e62d320ae1d9915c1be623b05eb 100755 --- a/share/analysis-scripts/function_finder.py +++ b/share/analysis-scripts/function_finder.py @@ -60,9 +60,9 @@ def prepare_re_specific_name(fname): # Returns 0 if not found, 1 if declaration, 2 if definition def find_specific_name(prepared_re, f): - with open(f, encoding="ascii", errors='ignore') as content_file: - content = content_file.read() - has_decl_or_def = prepared_re.search(content) + with open(f, encoding="ascii", errors='ignore') as data: + file_content = data.read() + has_decl_or_def = prepared_re.search(file_content) if has_decl_or_def is None: return 0 else: @@ -70,33 +70,35 @@ def find_specific_name(prepared_re, f): return 1 if is_decl else 2 -# matches function definitions -re_fundef_or_decl = re.compile("^" + optional_type_prefix + - "(" + c_identifier + ")" + whitespace + - argument_list + whitespace + - optional_c_id + whitespace + "(;|{)", - flags=re.DOTALL | re.MULTILINE) +# matches function definitions or declarations +# if funcname is not None, only matches for the specified +# function name +def compute_re_def_or_decl(funcname): + id = funcname if funcname else c_identifier + return re.compile("^" + optional_type_prefix + + "(" + id + ")" + whitespace + + argument_list + whitespace + + optional_c_id + whitespace + "(;|{)", + flags=re.DOTALL | re.MULTILINE) # matches function calls re_funcall = re.compile("(" + c_identifier + ")" + whitespace + "\(") # Computes the offset (in bytes) of each '\n' in the file, # returning them as a list -def compute_newline_offsets(filename): +def compute_newline_offsets(file_lines): offsets = [] current = 0 - with open(filename, encoding="ascii", errors='ignore') as data: - for line in data: - current += len(line) - offsets.append(current) + for line in file_lines: + current += len(line) + offsets.append(current) return offsets # Returns the line number (starting at 1) containing the character # of offset [offset]. # [offsets] is the sorted list of offsets for newline characters in the file. def line_of_offset(offsets, offset): - i = bisect.bisect_right(offsets, offset) - return i+1 + return bisect.bisect_right(offsets, offset) + 1 # Returns the line number (starting at 1) of each line starting with '}' # as its first character. @@ -104,12 +106,12 @@ def line_of_offset(offsets, offset): # This is a heuristic to attempt to detect function closing braces: # it assumes that the first '}' (without preceding whitespace) after a # function definition denotes its closing brace. -def compute_closing_braces(filename): +def compute_closing_braces(file_lines): braces = [] - with open(filename, encoding="ascii", errors='ignore') as data: - for i, line in enumerate(data, start=1): - if line.startswith("}"): - braces.append(i) + for i, line in enumerate(file_lines, start=1): + # note: lines contain '\n', so they are never empty + if line[0] == '}': + braces.append(i) return braces # Returns the first element of [line_numbers] greater than [n], or [None] @@ -118,10 +120,11 @@ def compute_closing_braces(filename): # # [line_numbers] must be sorted in ascending order. def get_first_line_after(line_numbers, n): - for line in line_numbers: - if line > n: - return line - return None + #for line in line_numbers: + # if line > n: + # return line + #assert False + return line_numbers[bisect.bisect_left(line_numbers, n)] # Returns a list of tuples (fname, is_def, line_start, line_end, terminator_offset) # for each function definition or declaration. @@ -133,37 +136,38 @@ def get_first_line_after(line_numbers, n): # [terminator_offset] is used by the caller to filter the function prototype # itself and avoid considering it as a call. For function definitions, # this is the opening brace; for function declarations, this is the semicolon. -def find_definitions_and_declarations(want_defs, want_decls, filename, newlines): - braces = compute_closing_braces(filename) - with open(filename, encoding="ascii", errors='ignore') as data: - content = data.read() +def find_definitions_and_declarations(want_defs, want_decls, filename, file_content, file_lines, newlines, funcname=None): + braces = compute_closing_braces(file_lines) res = [] - for match in re.finditer(re_fundef_or_decl, content): + re_fundef_or_decl = compute_re_def_or_decl(funcname) + for match in re.finditer(re_fundef_or_decl, file_content): funcname = match.group(1) - is_def = match.group(2) == "{" - is_decl = match.group(2) == ";" + terminator = match.group(2) + terminator_offset = match.start(2) + is_def = terminator == "{" + is_decl = terminator == ";" assert is_def or is_decl start = line_of_offset(newlines, match.start(1)) if is_decl: if not want_decls: continue - end = line_of_offset(newlines, match.start(2)) + end = line_of_offset(newlines, terminator_offset) else: if not want_defs: continue - definition = content[match.start(1):newlines[start-1]] + definition = file_content[match.start(1):newlines[start-1]] # try "single-line function heuristic": # assume the function is defined as 'type f(...) { code; }', # in a single line if definition.strip().endswith("}"): - end = line_of_offset(newlines, match.start(2)) + end = line_of_offset(newlines, terminator_offset) else: end = get_first_line_after(braces, start) if not end: # no closing braces found; try again the "single-line function heuristic" def_start_newline_offset = newlines[start-1] - line_of_opening_brace = line_of_offset(newlines, match.start(2)) - if start == line_of_opening_brace and definition.rstrip().endswith("}"): + line_of_opening_brace = line_of_offset(newlines, terminator_offset) + if start == line_of_opening_brace and definition.rstrip()[-1] == '}': # assume the '}' is closing the '{' from the same line end = line_of_opening_brace else: @@ -171,7 +175,6 @@ def find_definitions_and_declarations(want_defs, want_decls, filename, newlines) print(f"{os.path.relpath(filename)}:{start}:closing brace not found, " + f"skipping potential definition of '{funcname}'") continue - terminator_offset = match.start(2) if debug: print(f"function_finder: {'def' if is_def else 'decl'} of {funcname} between {start} and {end}") res.append((funcname, is_def, start, end, terminator_offset)) @@ -184,12 +187,10 @@ calls_blacklist = ["if", "while", "for", "return", "sizeof", "switch", "_Alignas # # Note: this may include the function prototype itself; # it must be filtered by the caller. -def find_calls(filename, newlines): - with open(filename, encoding="ascii", errors='ignore') as data: - content = data.read() +def find_calls(file_content, newlines): # create a list of Match objects that fit "pattern" regex res = [] - for match in re.finditer(re_funcall, content): + for match in re.finditer(re_funcall, file_content): funcname = match.group(1) offset = match.start(1) line = line_of_offset(newlines, offset) diff --git a/share/analysis-scripts/heuristic_list_functions.py b/share/analysis-scripts/heuristic_list_functions.py index ceee3fd5a3cf2892f1ed5c8fc8de5b8e76ce3c82..0dc5a867025afcb18b0991fdc9f3c521a8f8224d 100755 --- a/share/analysis-scripts/heuristic_list_functions.py +++ b/share/analysis-scripts/heuristic_list_functions.py @@ -55,8 +55,11 @@ want_decls = boolish_string(sys.argv[2]) files = sys.argv[3:] for f in files: - newlines = function_finder.compute_newline_offsets(f) - defs_and_decls = function_finder.find_definitions_and_declarations(want_defs, want_decls, f, newlines) + with open(f, encoding="ascii", errors='ignore') as data: + file_content = data.read() + file_lines = file_content.splitlines(keepends=True) + newlines = function_finder.compute_newline_offsets(file_lines) + defs_and_decls = function_finder.find_definitions_and_declarations(want_defs, want_decls, f, file_content, file_lines, newlines) for (funcname, is_def, start, end, _offset) in defs_and_decls: if is_def: print(f"{os.path.relpath(f)}:{start}:{end}: {funcname} (definition)")