Skip to content

Commit

Permalink
rework the HOF to include the export to a csv file
Browse files Browse the repository at this point in the history
  • Loading branch information
FardaleM committed Aug 1, 2024
1 parent f3190ee commit 02e5ed2
Showing 1 changed file with 102 additions and 84 deletions.
186 changes: 102 additions & 84 deletions bin/dowsindex/CmdHOF.ml
Original file line number Diff line number Diff line change
@@ -1,74 +1,81 @@
open CommonOpts

let cmp (time1, _) (time2, _) = CCFloat.compare time1 time2
type data = {
id : int * int;
t1 : Type.t;
t2 : Type.t;
unif_time : float;
skipable : bool;
}

let full size idx_file with_features iter_idx =
let timer = Timer.make () in
let acc_time = ref 0. in
let types = Iter.to_list iter_idx |> List.sort_uniq Type.compare in
let pp_data fmt data =
Format.fprintf fmt "%i/%i: %b %f %a %a" (fst data.id) (snd data.id)
data.skipable data.unif_time Type.pp data.t1 Type.pp data.t2

let pp_data_csv fmt data =
Format.fprintf fmt "%i/%i,%b,%f,%a,%a" (fst data.id) (snd data.id)
data.skipable data.unif_time Type.pp data.t1 Type.pp data.t2

let cmp data1 data2 = CCFloat.compare data1.unif_time data2.unif_time
let features = Db.Feature.all

let check_features t1 t2 =
not
@@ List.for_all
(fun (module F : Db.Feature.S) ->
let f1 = F.compute t1 and f2 = F.compute t2 in
F.compatible ~query:f1 ~data:f2)
features

let full types k =
let n_types = List.length types in
let rec all_pairs i acc l =
let timer = Timer.make () in
let rec all_pairs i l =
match l with
| [] -> acc
| [] -> ()
| t1 :: t ->
Format.printf "@[<h>%i/%i: %a@]@." i n_types Type.pp t1;
let l =
if with_features then
let iter_idx =
let db =
try Db.load idx_file
with Sys_error _ ->
error
@@ Fmt.str "cannot open index file `%a'" Fpath.pp idx_file
in
Db.iter_compatible db t1 |> Iter.map snd
in
Iter.to_list iter_idx
|> List.sort_uniq Type.compare
|> CCList.drop_while (fun t2 -> Type.compare t1 t2 < 0)
else l
in
acc_time := 0.;
let acc =
List.fold_left
(fun acc t2 ->
let env = Type.Env.make () in
Timer.start timer;
(try ignore @@ Acic.unify env t1 t2
with e ->
Format.printf "\"%a\" \"%a\"@." Type.pp t1 Type.pp t2;
raise e);
Timer.stop timer;
let time = Timer.get timer in
acc_time := !acc_time +. time;
(* if time > 10. then Format.printf "@[<h>Big time: %a@]@." Type.pp t2; *)
CCList.tl (CCList.sorted_insert ~cmp (time, (t1, t2)) acc))
acc l
in
List.iteri
(fun j t2 ->
let env = Type.Env.make () in
Timer.start timer;
(try ignore @@ Acic.unify env t1 t2
with e ->
Format.printf "\"%a\" \"%a\"@." Type.pp t1 Type.pp t2;
raise e);
Timer.stop timer;
let unif_time = Timer.get timer in
k
{
id = (i, j);
t1;
t2;
unif_time;
skipable = check_features t1 t2;
})
l;
Gc.full_major ();
Gc.print_stat stdout;
all_pairs (i + 1) acc t
(* Gc.print_stat stdout; *)
all_pairs (i + 1) t
in
all_pairs 1 (List.init size (fun _ -> (0., (Type.dummy, Type.dummy)))) types
all_pairs 1 types

let one size iter_idx t1 =
let timer = Timer.make () in
let types = Iter.to_list iter_idx |> List.sort_uniq Type.compare in
let one types t1 k =
let n_types = List.length types in
let timer = Timer.make () in
Format.printf "@[<h>Test against: %a@]@." Type.pp t1;
CCList.foldi
(fun acc i t2 ->
Format.printf "@[<h>%i/%i: %a@]@." i n_types Type.pp t2;
List.iteri
(fun j t2 ->
Format.printf "@[<h>%i/%i: %a@]@." j n_types Type.pp t1;
let env = Type.Env.make () in
Timer.start timer;
(try ignore @@ Acic.unify env t1 t2
with e ->
Format.printf "\"%a\" \"%a\"@." Type.pp t1 Type.pp t2;
raise e);
Timer.stop timer;
let time = Timer.get timer in
CCList.tl (CCList.sorted_insert ~cmp (time, (t1, t2)) acc))
(List.init size (fun _ -> (0., (Type.dummy, Type.dummy))))
let unif_time = Timer.get timer in
k { id = (-1, j); t1; t2; unif_time; skipable = check_features t1 t2 })
types

let print_stat () =
Expand All @@ -84,43 +91,54 @@ let print_stat () =
stats;
Fmt.pr "@."

let main with_features size idx_file ty =
let main csv size idx_file ty =
let types =
let db =
try Db.load idx_file
with Sys_error _ ->
error @@ Fmt.str "cannot open index file `%a'" Fpath.pp idx_file
in
Db.iter db |> Iter.map snd |> Iter.to_list |> List.sort_uniq Type.compare
in
let all_unifs =
match ty with None -> full types | Some ty -> one types ty
in
let output, close =
match csv with
| None -> ((fun _ -> ()), fun () -> ())
| Some csv ->
let oc = open_out csv in
let fmt = Format.formatter_of_out_channel oc in
Format.pp_set_margin fmt max_int;
( (fun data -> Format.fprintf fmt "@[<h>%a@]@." pp_data_csv data),
fun () -> close_out oc )
in
let hof =
match ty with
| None ->
let iter_idx =
let db =
try Db.load idx_file
with Sys_error _ ->
error @@ Fmt.str "cannot open index file `%a'" Fpath.pp idx_file
in
Db.iter db |> Iter.map snd
in
full size idx_file with_features iter_idx
| Some ty ->
let iter_idx =
let db =
try Db.load idx_file
with Sys_error _ ->
error @@ Fmt.str "cannot open index file `%a'" Fpath.pp idx_file
in
(if with_features then Db.iter_compatible db ty else Db.iter db)
|> Iter.map snd
in
one size iter_idx ty
Iter.fold
(fun acc data ->
output data;
CCList.tl (CCList.sorted_insert ~cmp data acc))
(List.init size (fun _ ->
{
id = (-2, -2);
unif_time = 0.;
t1 = Type.dummy;
t2 = Type.dummy;
skipable = true;
}))
all_unifs
in
close ();
Format.printf "@[<v>%a@]@."
(CCList.pp ~pp_start:(CCFormat.return "@[<h>")
~pp_sep:(CCFormat.return "@]@ @[<h>")
~pp_stop:(CCFormat.return "@]")
(CCPair.pp CCFloat.pp (CCPair.pp Type.pp Type.pp)))
~pp_stop:(CCFormat.return "@]") pp_data)
hof;
print_stat ()

let main with_features size idx_file ty =
let main csv size idx_file ty =
Format.set_margin max_int;
try Ok (main with_features size idx_file ty)
with Error msg -> Error (`Msg msg)
try Ok (main csv size idx_file ty) with Error msg -> Error (`Msg msg)

open Cmdliner
open Cmd
Expand All @@ -139,12 +157,12 @@ let ty =
let docv = "TYPE" in
Arg.(value & pos 0 (some @@ Convs.typ env) None & info [] ~docv)

let with_features =
let doc = "Pre filter using the features." in
Arg.(value & flag & info [ "with-features" ] ~doc)
let csv =
let doc = "Export data to a csv file." in
Arg.(value & opt (some string) None & info [ "csv" ] ~doc)

let cmd =
let doc = "compute hall of fame" in
Cmd.v
(info "hof" ~sdocs:Manpage.s_common_options ~doc)
Term.(term_result (const main $ with_features $ size $ idx_file $ ty))
Term.(term_result (const main $ csv $ size $ idx_file $ ty))

0 comments on commit 02e5ed2

Please sign in to comment.