From 08419ffc6befbbe085536066b76f05e3bad8ae1b Mon Sep 17 00:00:00 2001 From: Corentin De Souza <9597216+fantazio@users.noreply.github.com> Date: Sat, 7 Feb 2026 15:32:55 +0100 Subject: [PATCH 1/4] [src][utils] move `DeadCode.kind` to `Utils.kind` This will be used in `Config` to build the resolved set of files for `paths_to_analyze`, `references_paths`, and `excluded_paths`. Currently, the paths stored are those found in the command line. Ideally, the paths stored should only be .cmi and .cmt files. --- src/deadCode.ml | 15 ++------------- src/utils.ml | 24 ++++++++++++++++++------ src/utils.mli | 9 +++++++++ 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/deadCode.ml b/src/deadCode.ml index 8e3393e1..6b7ea418 100644 --- a/src/deadCode.ml +++ b/src/deadCode.ml @@ -342,18 +342,6 @@ let collect_references = (* Tast_mapper *) type_declaration } -(* Checks the nature of the file *) -let kind fn = - let state = State.get_current () in - if not (Sys.file_exists fn) then begin - prerr_endline ("Warning: '" ^ fn ^ "' not found"); - `Ignore - end else if Config.is_excluded fn state.config then `Ignore - else if Sys.is_directory fn then `Dir - else if Filename.check_suffix fn ".cmi" then `Cmi - else if Filename.check_suffix fn ".cmt" then `Cmt - else `Ignore - let regabs state = let fn = State.File_infos.get_sourcepath state.State.file_infos in @@ -458,7 +446,8 @@ let rec load_file fn state = (* TODO: stateful computations should take and return the state when possible *) state in - match kind fn with + let exclude filepath = Config.is_excluded filepath state.State.config in + match Utils.kind ~exclude fn with | `Cmi when !DeadCommon.declarations -> last_loc := Lexing.dummy_pos; if state.State.config.verbose then Printf.eprintf "Scanning %s\n%!" fn; diff --git a/src/utils.ml b/src/utils.ml index cf66a239..d0a3b444 100644 --- a/src/utils.ml +++ b/src/utils.ml @@ -1,11 +1,23 @@ -let remove_pp fn = - let ext = Filename.extension fn in - let no_ext = Filename.remove_extension fn in +let remove_pp filepath = + let ext = Filename.extension filepath in + let no_ext = Filename.remove_extension filepath in match Filename.extension no_ext with | ".pp" -> Filename.remove_extension no_ext ^ ext - | _ -> fn + | _ -> filepath -let unit fn = - Filename.remove_extension (Filename.basename fn) +let unit filepath = + Filename.remove_extension (Filename.basename filepath) + +(* Checks the nature of the file *) +let kind ~exclude filepath = + if exclude filepath then `Ignore + else if not (Sys.file_exists filepath) then ( + prerr_endline ("Warning: '" ^ filepath ^ "' not found"); + `Ignore + ) + else if Sys.is_directory filepath then `Dir + else if Filename.check_suffix filepath ".cmi" then `Cmi + else if Filename.check_suffix filepath ".cmt" then `Cmt + else `Ignore module StringSet = Set.Make(String) diff --git a/src/utils.mli b/src/utils.mli index ce4b30e9..75cd950c 100644 --- a/src/utils.mli +++ b/src/utils.mli @@ -1,5 +1,14 @@ val remove_pp : string -> string +(** [remove_pp filepath] removes the `.pp` extension (if it exists) from + [filepath]. Eg. [remove_pp "dir/foo.pp.ml" = "dir/foo.ml"] *) val unit : string -> string +(** [unit filepath] estimates the compilation unit of [filepath] *) + +val kind : exclude:(string -> bool) -> string -> [> `Cmi | `Cmt | `Dir | `Ignore ] +(** [kind ~exclude filepath] returns the kind of [filepath]. + If [exclude filepath = true], [filepath] does not exists, or [filepath] + does not fit in qnother kind, then its kind is [`Ignore]. + Other kinds are self explanatory. *) module StringSet : Set.S with type elt = String.t From 31ec2e5c7b4ee55c58e50d0eb8ba6f3a85d659b0 Mon Sep 17 00:00:00 2001 From: Corentin De Souza <9597216+fantazio@users.noreply.github.com> Date: Mon, 9 Feb 2026 14:20:56 +0100 Subject: [PATCH 2/4] [src][utils] create Filepath module This module group filepath-related functions. The return type of `Utils.Filepath.kind` is extracted to its own type rather than using polymorphic variants --- src/deadCode.ml | 22 +++++++++--------- src/deadCommon.ml | 8 +++---- src/state/file_infos.ml | 8 +++---- src/utils.ml | 49 +++++++++++++++++++++++++---------------- src/utils.mli | 35 +++++++++++++++++++---------- 5 files changed, 72 insertions(+), 50 deletions(-) diff --git a/src/deadCode.ml b/src/deadCode.ml index 6b7ea418..55a1327e 100644 --- a/src/deadCode.ml +++ b/src/deadCode.ml @@ -345,9 +345,9 @@ let collect_references = (* Tast_mapper *) let regabs state = let fn = State.File_infos.get_sourcepath state.State.file_infos in - hashtbl_add_unique_to_list abspath (Utils.unit fn) fn; + hashtbl_add_unique_to_list abspath (Utils.Filepath.unit fn) fn; if !DeadCommon.declarations then - hashtbl_add_unique_to_list main_files (Utils.unit fn) () + hashtbl_add_unique_to_list main_files (Utils.Filepath.unit fn) () let read_interface fn cmi_infos state = let open Cmi_format in @@ -358,7 +358,7 @@ let read_interface fn cmi_infos state = let open Cmi_format in if State.File_infos.has_sourcepath state.file_infos then State.File_infos.get_sourceunit state.file_infos else - Utils.unit fn + Utils.Filepath.unit fn in let module_id = State.File_infos.get_modname state.file_infos @@ -383,11 +383,11 @@ let assoc decs (loc1, loc2) = let is_implem fn = fn.[String.length fn - 1] <> 'i' in let has_iface fn = fn.[String.length fn - 1] = 'i' - || ( Utils.unit fn = sourceunit + || ( Utils.Filepath.unit fn = sourceunit && DeadCommon.file_exists (fn ^ "i")) in let is_iface fn loc = - Hashtbl.mem decs loc || Utils.unit fn <> sourceunit + Hashtbl.mem decs loc || Utils.Filepath.unit fn <> sourceunit || not (is_implem fn && has_iface fn) in if fn1 <> _none && fn2 <> _none && loc1 <> loc2 then begin @@ -408,7 +408,7 @@ let clean references loc = let state = State.get_current () in let sourceunit = State.File_infos.get_sourceunit state.file_infos in let fn = loc.Lexing.pos_fname in - if (fn.[String.length fn - 1] <> 'i' && Utils.unit fn = sourceunit) then + if (fn.[String.length fn - 1] <> 'i' && Utils.Filepath.unit fn = sourceunit) then LocHash.remove references loc let eof loc_dep = @@ -447,8 +447,8 @@ let rec load_file fn state = state in let exclude filepath = Config.is_excluded filepath state.State.config in - match Utils.kind ~exclude fn with - | `Cmi when !DeadCommon.declarations -> + match Utils.Filepath.kind ~exclude fn with + | Cmi when !DeadCommon.declarations -> last_loc := Lexing.dummy_pos; if state.State.config.verbose then Printf.eprintf "Scanning %s\n%!" fn; init_and_continue state fn (fun state -> @@ -457,7 +457,7 @@ let rec load_file fn state = | Some cmi_infos -> read_interface fn cmi_infos state ) - | `Cmt -> + | Cmt -> let open Cmt_format in last_loc := Lexing.dummy_pos; if state.config.verbose then Printf.eprintf "Scanning %s\n%!" fn; @@ -490,7 +490,7 @@ let rec load_file fn state = | _ -> () (* todo: support partial_implementation? *) ) - | `Dir -> + | Dir -> let next = Sys.readdir fn in Array.sort compare next; Array.fold_left @@ -507,7 +507,7 @@ let rec load_file fn state = (* Prepare the list of opt_args for report *) let analyze_opt_args () = DeadArg.eocb (); - let dec_loc loc = Hashtbl.mem main_files (Utils.unit loc.Lexing.pos_fname) in + let dec_loc loc = Hashtbl.mem main_files (Utils.Filepath.unit loc.Lexing.pos_fname) in let all = ref [] in let opt_args_tbl = Hashtbl.create 256 in diff --git a/src/deadCommon.ml b/src/deadCommon.ml index bd9344ad..7db05bac 100644 --- a/src/deadCommon.ml +++ b/src/deadCommon.ml @@ -127,7 +127,7 @@ let find_path fn l = List.find (is_sub_path ~sep fn) l let find_abspath fn = - find_path fn (hashtbl_find_list abspath (Utils.unit fn)) + find_path fn (hashtbl_find_list abspath (Utils.Filepath.unit fn)) let file_exists fn = match find_abspath fn with @@ -152,7 +152,7 @@ let exported ?(is_type = false) (flag : Config.Sections.main_section) loc = && (is_type || state.config.internal || fn.[String.length fn - 1] = 'i' - || sourceunit <> Utils.unit fn + || sourceunit <> Utils.Filepath.unit fn || not (file_exists (fn ^ "i"))) @@ -362,7 +362,7 @@ module VdNode = struct if not (LocSet.is_empty worklist) then let loc = LocSet.choose worklist in let wl = LocSet.remove loc worklist in - if Utils.unit loc.Lexing.pos_fname <> sourceunit then + if Utils.Filepath.unit loc.Lexing.pos_fname <> sourceunit then List.iter (LocHash.remove parents) loc_list else begin LocHash.replace met loc (); @@ -394,7 +394,7 @@ let export ?(sep = ".") path u stock id loc = will create value definitions whose location is in set.mli *) if not loc.Location.loc_ghost - && (u = Utils.unit loc.Location.loc_start.Lexing.pos_fname || u == _include) + && (u = Utils.Filepath.unit loc.Location.loc_start.Lexing.pos_fname || u == _include) && check_underscore (Ident.name id) then let state = State.get_current () in let builddir = State.File_infos.get_builddir state.file_infos in diff --git a/src/state/file_infos.ml b/src/state/file_infos.ml index ec7d5007..7b079f51 100644 --- a/src/state/file_infos.ml +++ b/src/state/file_infos.ml @@ -23,7 +23,7 @@ let empty = { let init_from_cmt_infos cmt_infos cmt_file = let builddir = cmt_infos.Cmt_format.cmt_builddir in let sourcepath = - Option.map Utils.remove_pp cmt_infos.cmt_sourcefile + Option.map Utils.Filepath.remove_pp cmt_infos.cmt_sourcefile |> Option.map (Filename.concat builddir) in let modname = cmt_infos.cmt_modname in @@ -56,7 +56,7 @@ let sourcefname_of_cmi_infos cmi_unit cmi_infos = *) let cmi_unit = String.lowercase_ascii cmi_unit in let candidate_of_fname fname = - let src_unit = Utils.unit fname |> String.lowercase_ascii in + let src_unit = Utils.Filepath.unit fname |> String.lowercase_ascii in if String.equal src_unit cmi_unit then `Identical fname else if String.ends_with ~suffix:src_unit cmi_unit then @@ -112,7 +112,7 @@ let init_from_cmi_infos ?with_cmt cmi_infos cmi_file = let sourcepath = let sourcepath = (* Try to find a sourcepath in the cmi_infos *) - let cmi_unit = Utils.unit cmi_file in + let cmi_unit = Utils.Filepath.unit cmi_file in let sourcefname = sourcefname_of_cmi_infos cmi_unit cmi_infos in match sourcefname, builddir with | Some fname, Some builddir -> Some (Filename.concat builddir fname) @@ -209,7 +209,7 @@ let get_sourcepath t = let get_sourceunit t = match t.sourcepath with - | Some sourcepath -> Utils.unit sourcepath + | Some sourcepath -> Utils.Filepath.unit sourcepath | None -> "!!UNKNOWN_SOURCEUNIT_FOR<" ^ t.cmti_file ^ ">!!" let get_modname t = t.modname diff --git a/src/utils.ml b/src/utils.ml index d0a3b444..a74ab528 100644 --- a/src/utils.ml +++ b/src/utils.ml @@ -1,23 +1,34 @@ -let remove_pp filepath = - let ext = Filename.extension filepath in - let no_ext = Filename.remove_extension filepath in - match Filename.extension no_ext with - | ".pp" -> Filename.remove_extension no_ext ^ ext - | _ -> filepath +module Filepath = struct -let unit filepath = - Filename.remove_extension (Filename.basename filepath) + type t = string -(* Checks the nature of the file *) -let kind ~exclude filepath = - if exclude filepath then `Ignore - else if not (Sys.file_exists filepath) then ( - prerr_endline ("Warning: '" ^ filepath ^ "' not found"); - `Ignore - ) - else if Sys.is_directory filepath then `Dir - else if Filename.check_suffix filepath ".cmi" then `Cmi - else if Filename.check_suffix filepath ".cmt" then `Cmt - else `Ignore + let remove_pp filepath = + let ext = Filename.extension filepath in + let no_ext = Filename.remove_extension filepath in + match Filename.extension no_ext with + | ".pp" -> Filename.remove_extension no_ext ^ ext + | _ -> filepath + + let unit filepath = + Filename.remove_extension (Filename.basename filepath) + + type kind = + | Cmi + | Cmt + | Dir + | Ignore + + (* Checks the nature of the file *) + let kind ~exclude filepath = + if exclude filepath then Ignore + else if not (Sys.file_exists filepath) then ( + prerr_endline ("Warning: '" ^ filepath ^ "' not found"); + Ignore + ) + else if Sys.is_directory filepath then Dir + else if Filename.check_suffix filepath ".cmi" then Cmi + else if Filename.check_suffix filepath ".cmt" then Cmt + else Ignore +end module StringSet = Set.Make(String) diff --git a/src/utils.mli b/src/utils.mli index 75cd950c..f9fe1424 100644 --- a/src/utils.mli +++ b/src/utils.mli @@ -1,14 +1,25 @@ -val remove_pp : string -> string -(** [remove_pp filepath] removes the `.pp` extension (if it exists) from - [filepath]. Eg. [remove_pp "dir/foo.pp.ml" = "dir/foo.ml"] *) - -val unit : string -> string -(** [unit filepath] estimates the compilation unit of [filepath] *) - -val kind : exclude:(string -> bool) -> string -> [> `Cmi | `Cmt | `Dir | `Ignore ] -(** [kind ~exclude filepath] returns the kind of [filepath]. - If [exclude filepath = true], [filepath] does not exists, or [filepath] - does not fit in qnother kind, then its kind is [`Ignore]. - Other kinds are self explanatory. *) +module Filepath : sig + + type t = string + + val remove_pp : t -> t + (** [remove_pp filepath] removes the `.pp` extension (if it exists) from + [filepath]. Eg. [remove_pp "dir/foo.pp.ml" = "dir/foo.ml"] *) + + val unit : t -> string + (** [unit filepath] estimates the compilation unit of [filepath] *) + + type kind = + | Cmi (** .cmi file *) + | Cmt (** .cmt file *) + | Dir (** Directory *) + | Ignore (** Irrelevant for the analyzer *) + + val kind : exclude:(t -> bool) -> t -> kind + (** [kind ~exclude filepath] returns the kind of [filepath]. + If [exclude filepath = true], [filepath] does not exists, or [filepath] + does not fit in another kind, then its kind is [Ignore]. + Other kinds are self explanatory. *) +end module StringSet : Set.S with type elt = String.t From c6959048f967f84448dbf3d1a0403942772598ce Mon Sep 17 00:00:00 2001 From: Corentin De Souza <9597216+fantazio@users.noreply.github.com> Date: Sat, 7 Feb 2026 16:00:26 +0100 Subject: [PATCH 3/4] [src][config] exposed paths are only valid cmi and cmt Finding the cmi and cmt files in the paths provided in the command line is now done at configuration time. The analysis now only expects to find cmi and cmt. In case a directory is passed to the analysis, then the analyzer will stop and raise an exception. --- src/config/config.ml | 19 ++++++++++++++++--- src/config/config.mli | 8 +++++--- src/deadCode.ml | 12 ++++-------- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/config/config.ml b/src/config/config.ml index 8192ffb9..ce0dd485 100644 --- a/src/config/config.ml +++ b/src/config/config.ml @@ -106,9 +106,22 @@ let normalize_path path = | "" -> Filename.current_dir_name | normalized_path -> normalized_path +let rec add_filepaths acc path = + match Utils.Filepath.kind ~exclude:(fun _ -> false) path with + | Cmi | Cmt -> Utils.StringSet.add path acc + | Dir -> + Sys.readdir path + |> Array.fold_left + (fun acc sub_path -> + let path = Filename.concat path sub_path in + add_filepaths acc path + ) + acc + | Ignore -> acc + let exclude path config = let path = normalize_path path in - let excluded_paths = Utils.StringSet.add path config.excluded_paths in + let excluded_paths = add_filepaths config.excluded_paths path in {config with excluded_paths} let is_excluded path config = @@ -116,11 +129,11 @@ let is_excluded path config = Utils.StringSet.mem path config.excluded_paths let add_reference_path path config = - let references_paths = Utils.StringSet.add path config.references_paths in + let references_paths = add_filepaths config.references_paths path in {config with references_paths} let add_path_to_analyze path config = - let paths_to_analyze = Utils.StringSet.add path config.paths_to_analyze in + let paths_to_analyze = add_filepaths config.paths_to_analyze path in {config with paths_to_analyze} (* Command line parsing *) diff --git a/src/config/config.mli b/src/config/config.mli index cca086bd..5daa27cd 100644 --- a/src/config/config.mli +++ b/src/config/config.mli @@ -22,10 +22,12 @@ type t = private ; internal : bool (** Keep track of internal uses for exported values *) ; underscore : bool (** Keep track of elements with names starting with [_] *) ; paths_to_analyze : Utils.StringSet.t - (** Paths found in the command line and considered for analysis *) - ; excluded_paths : Utils.StringSet.t (** Paths to exclude from the analysis *) + (** Cmi and cmt filepaths found by exploring the paths provided in the + command line and considered for analysis *) + ; excluded_paths : Utils.StringSet.t + (** Cmi and cmt filepaths to exclude from the analysis *) ; references_paths : Utils.StringSet.t - (** Paths to explore for references only *) + (** Cmi and cmt filepaths to explore for references only *) ; sections : Sections.t (** Config for the different report sections *) } diff --git a/src/deadCode.ml b/src/deadCode.ml index 55a1327e..76b26c85 100644 --- a/src/deadCode.ml +++ b/src/deadCode.ml @@ -434,7 +434,7 @@ let eof loc_dep = (* Starting point *) -let rec load_file fn state = +let load_file fn state = let init_and_continue state fn f = match State.change_file state fn with | Error msg -> @@ -491,13 +491,9 @@ let rec load_file fn state = ) | Dir -> - let next = Sys.readdir fn in - Array.sort compare next; - Array.fold_left - (fun state s -> load_file (fn ^ "/" ^ s) state) - state - next - (* else Printf.eprintf "skipping directory %s\n" fn *) + (* TODO : better error handling *) + failwith ("Internal error : Unexpected directory " + ^ fn ^ ". Only .cmi and .cmt are expected") | _ -> state From 70afb4f68e19bf6f0f083800639b51a298eb163f Mon Sep 17 00:00:00 2001 From: Corentin De Souza <9597216+fantazio@users.noreply.github.com> Date: Sun, 8 Feb 2026 14:39:41 +0100 Subject: [PATCH 4/4] [src][utils] compilation units are computed via compiler-libs This ensures the unit is capitalized properly. Thanks to this, there is no need to lowercase the units when looking for sourcepaths in cmi files. --- src/state/file_infos.ml | 10 +++------- src/utils.ml | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/state/file_infos.ml b/src/state/file_infos.ml index 7b079f51..4a3eace1 100644 --- a/src/state/file_infos.ml +++ b/src/state/file_infos.ml @@ -50,13 +50,8 @@ let init_from_cmt cmt_file = let sourcefname_of_cmi_infos cmi_unit cmi_infos = - (* Use lowercased units because dune wrapped lib's module units follow the - pattern : `__` while the original module unit may - not be capitalized. - *) - let cmi_unit = String.lowercase_ascii cmi_unit in let candidate_of_fname fname = - let src_unit = Utils.Filepath.unit fname |> String.lowercase_ascii in + let src_unit = Utils.Filepath.unit fname in if String.equal src_unit cmi_unit then `Identical fname else if String.ends_with ~suffix:src_unit cmi_unit then @@ -199,7 +194,8 @@ let get_builddir t = let get_sourcepath t = match t.sourcepath with - | Some sourcepath -> sourcepath + | Some sourcepath -> + sourcepath | None -> match t.builddir with | Some builddir -> Printf.sprintf "!!UNKNOWN_SOURCEPATH_IN<%s>_FOR_<%s>!!" diff --git a/src/utils.ml b/src/utils.ml index a74ab528..bb524456 100644 --- a/src/utils.ml +++ b/src/utils.ml @@ -10,7 +10,7 @@ module Filepath = struct | _ -> filepath let unit filepath = - Filename.remove_extension (Filename.basename filepath) + Unit_info.modname_from_source filepath type kind = | Cmi