Skip to content

Commit f950d1c

Browse files
committed
Merge remote-tracking branch 'opt/master'
2 parents 01b5241 + 1cb4cf3 commit f950d1c

34 files changed

+6471
-34
lines changed

.gitignore

-24
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,7 @@
11
.ipynb_checkpoints/
22

3-
*.annot
4-
*.cmo
5-
*.cma
6-
*.cmi
7-
*.a
8-
*.o
9-
*.cmx
10-
*.cmxs
11-
*.cmxa
12-
13-
# ocamlbuild working directory
143
_build/
154

16-
# ocamlbuild targets
17-
*.byte
18-
*.native
19-
20-
# oasis generated files
21-
setup.data
22-
setup.log
23-
24-
# Merlin configuring file for Vim and Emacs
25-
.merlin
26-
27-
*.install
28-
295
config-*sh
306
config.ini
317
_coverage/

.ocamlformat

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
margin = 85
22
break-cases=fit
3-
profile=conventional
3+
profile=conventional

LICENSE

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright 2018 John K. Feser
1+
Copyright 2022 John K. Feser
22

33
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
44

bin/dune

+26-3
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,29 @@
6161
(pps ppx_sexp_conv ppx_let))
6262
(modules combine))
6363

64-
; Local Variables:
65-
; mode: tuareg-dune
66-
; End:
64+
(executable
65+
(name opt)
66+
(public_name opt.exe)
67+
(libraries core core_unix.command_unix castor castor_opt logs logs.fmt fmt
68+
fmt.tty)
69+
(preprocess
70+
(pps ppx_sexp_conv ppx_let ppx_sexp_conv ppx_compare ppx_hash))
71+
(modules opt))
72+
73+
(executable
74+
(name xform)
75+
(public_name xform.exe)
76+
(libraries core core_unix.command_unix castor castor_opt logs logs.fmt fmt
77+
fmt.tty)
78+
(preprocess
79+
(pps ppx_sexp_conv ppx_let ppx_sexp_conv ppx_compare ppx_hash))
80+
(modules xform))
81+
82+
(executable
83+
(name sql)
84+
(public_name sql.exe)
85+
(libraries core core_unix.command_unix castor castor_opt logs logs.fmt fmt
86+
fmt.tty)
87+
(preprocess
88+
(pps ppx_sexp_conv ppx_let ppx_sexp_conv ppx_compare ppx_hash))
89+
(modules sql))

bin/explore.ml

+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
open Core
2+
open Castor
3+
4+
module Node = struct
5+
module T = struct
6+
type t = Abslayout.t * int [@@deriving compare, hash, sexp_of]
7+
end
8+
9+
include T
10+
include Comparator.Make (T)
11+
end
12+
13+
module Edge = struct
14+
module T = struct
15+
type t = Ok of Node.t * Node.t * string | Err of Node.t * string
16+
[@@deriving compare, hash, sexp_of]
17+
end
18+
19+
include T
20+
include Comparator.Make (T)
21+
end
22+
23+
let choose ls = List.nth_exn ls (Random.int (List.length ls))
24+
let choose_set ls = Set.nth ls (Random.int (Set.length ls))
25+
26+
let main ~params ~db ch =
27+
let params =
28+
List.map params ~f:(fun (n, t) -> Name.create ~type_:t n)
29+
|> Set.of_list (module Name.Compare_no_type)
30+
in
31+
let module Config = struct
32+
let conn = Db.create db
33+
let params = params
34+
let check_transforms = true
35+
end in
36+
let module A = Abslayout_db.Make (Config) in
37+
let module T = Transform.Make (Config) (A) () in
38+
let query_str = In_channel.input_all ch in
39+
let query = Abslayout.of_string_exn query_str |> A.resolve ~params in
40+
let explore ?(max_nodes = 10000) query =
41+
let tfs =
42+
List.filter_map T.transforms ~f:(fun (_, tf) ->
43+
try Some (tf []) with _ -> None)
44+
in
45+
let edges = ref (Set.empty (module Edge)) in
46+
let nodes = ref (Set.singleton (module Node) (query, 0)) in
47+
let rec loop () =
48+
if Set.length !nodes > max_nodes then ()
49+
else
50+
match choose_set !nodes with
51+
| Some ((query, _) as n) ->
52+
let tf = choose tfs in
53+
(try
54+
match T.run tf query with
55+
| [] -> ()
56+
| ls ->
57+
let n' = (choose ls, Set.length !nodes) in
58+
edges := Set.add !edges (Ok (n, n', tf.name));
59+
nodes := Set.add !nodes n'
60+
with _ -> edges := Set.add !edges (Err (n, tf.name)));
61+
loop ()
62+
| None -> ()
63+
in
64+
loop ();
65+
printf "digraph {";
66+
Set.to_sequence !edges
67+
|> Sequence.iter ~f:(function
68+
| Edge.Err ((_, i), name) ->
69+
printf "%d -> err [label=\"%s\"];\n" i name
70+
| Ok ((_, i1), (_, i2), name) ->
71+
printf "%d -> %d [label=\"%s\"];\n" i1 i2 name);
72+
printf "}"
73+
in
74+
explore query
75+
76+
let reporter ppf =
77+
let report _ level ~over k msgf =
78+
let k _ =
79+
over ();
80+
k ()
81+
in
82+
let with_time h _ k ppf fmt =
83+
let time = Core.Time.now () in
84+
Format.kfprintf k ppf
85+
("%a [%s] @[" ^^ fmt ^^ "@]@.")
86+
Logs.pp_header (level, h) (Core.Time.to_string time)
87+
in
88+
msgf @@ fun ?header ?tags fmt -> with_time header tags k ppf fmt
89+
in
90+
{ Logs.report }
91+
92+
let () =
93+
Logs.set_reporter (reporter Format.err_formatter);
94+
let open Command in
95+
let open Let_syntax in
96+
Logs.info (fun m ->
97+
m "%s" (Sys.argv |> Array.to_list |> String.concat ~sep:" "));
98+
basic ~summary:"Compile a query."
99+
(let%map_open verbose =
100+
flag "verbose" ~aliases:[ "v" ] no_arg ~doc:"increase verbosity"
101+
and quiet = flag "quiet" ~aliases:[ "q" ] no_arg ~doc:"decrease verbosity"
102+
and db =
103+
flag "db" (required string) ~doc:"CONNINFO the database to connect to"
104+
and params =
105+
flag "param" ~aliases:[ "p" ] (listed Util.param)
106+
~doc:"NAME:TYPE query parameters"
107+
and ch =
108+
anon (maybe_with_default In_channel.stdin ("query" %: Util.channel))
109+
in
110+
fun () ->
111+
if verbose then Logs.set_level (Some Logs.Debug)
112+
else if quiet then Logs.set_level (Some Logs.Error)
113+
else Logs.set_level (Some Logs.Info);
114+
Logs.info (fun m ->
115+
m "%s" (Sys.argv |> Array.to_list |> String.concat ~sep:" "));
116+
main ~params ~db ch)
117+
|> run

bin/opt.ml

+170
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
open! Core
2+
open Castor
3+
open Collections
4+
open Castor_opt
5+
open Abslayout_load
6+
module A = Abslayout
7+
8+
let dump fn r =
9+
Out_channel.with_file fn ~f:(fun ch ->
10+
Fmt.pf (Format.formatter_of_out_channel ch) "%a" Abslayout.pp r)
11+
12+
(** Run a command and return its output on stdout, logging it if it fails. *)
13+
let command_out cmd =
14+
let open Or_error.Let_syntax in
15+
let ch = Core_unix.open_process_in cmd in
16+
let out = In_channel.input_all ch in
17+
let%map () =
18+
Core_unix.Exit_or_signal.or_error (Core_unix.close_process_in ch)
19+
|> Or_error.tag ~tag:cmd
20+
in
21+
out
22+
23+
let system_exn cmd =
24+
match Core_unix.system cmd with
25+
| Ok () -> ()
26+
| Error (`Exit_non_zero code) ->
27+
failwith @@ sprintf "Command '%s' exited with code %d" cmd code
28+
| Error (`Signal signal) ->
29+
failwith
30+
@@ sprintf "Command '%s' terminated by signal %s" cmd
31+
(Signal.to_string signal)
32+
33+
let opt conn cost_conn params cost_timeout state query =
34+
let module Config = struct
35+
let conn = conn
36+
let cost_conn = cost_conn
37+
let params = params
38+
let cost_timeout = cost_timeout
39+
let random = state
40+
end in
41+
let module T = Transform.Make (Config) in
42+
match Transform.optimize (module Config) query with
43+
| First opt_query ->
44+
if is_ok @@ T.is_serializable opt_query then Some opt_query
45+
else (
46+
Logs.warn (fun m -> m "Not serializable:@ %a" A.pp opt_query);
47+
None)
48+
| Second failed_subquery ->
49+
Logs.warn (fun m ->
50+
m "Optimization failed for subquery:@ %a" A.pp failed_subquery);
51+
None
52+
53+
let eval dir params query =
54+
let open Result.Let_syntax in
55+
Logs.info (fun m -> m "Evaluating:@ %a" A.pp query);
56+
57+
(* Set up the output directory. *)
58+
system_exn @@ sprintf "rm -rf %s" dir;
59+
system_exn @@ sprintf "mkdir -p %s" dir;
60+
let query_fn = sprintf "%s/query.txt" dir in
61+
dump query_fn query;
62+
63+
(* Try to build the query. *)
64+
let%bind () =
65+
let compile_cmd =
66+
let params =
67+
List.map params ~f:(fun (n, t, _) ->
68+
Fmt.str "-p %s:%a" n Prim_type.pp t)
69+
|> String.concat ~sep:" "
70+
in
71+
sprintf
72+
"$CASTOR_ROOT/../_build/default/castor/bin/compile.exe -o %s %s %s > \
73+
%s/compile.log 2>&1"
74+
dir params query_fn dir
75+
in
76+
let%map out = command_out compile_cmd in
77+
Logs.info (fun m -> m "Compile output: %s" out)
78+
in
79+
80+
(* Try to run the query. *)
81+
let%map run_time =
82+
let run_cmd =
83+
let params =
84+
List.map params ~f:(fun (_, _, v) -> sprintf "'%s'" @@ Value.to_param v)
85+
|> String.concat ~sep:" "
86+
in
87+
sprintf "%s/scanner.exe -t 1 %s/data.bin %s" dir dir params
88+
in
89+
let%map out = command_out run_cmd in
90+
let time, _ = String.lsplit2_exn ~on:' ' out in
91+
String.rstrip ~drop:Char.is_alpha time |> Float.of_string
92+
in
93+
94+
run_time
95+
96+
let trial_dir = sprintf "%s-trial"
97+
98+
let copy_out out_file out_dir query =
99+
dump out_file query;
100+
system_exn @@ sprintf "rm -rf %s" out_dir;
101+
system_exn @@ sprintf "mv -f %s %s" (trial_dir out_dir) out_dir
102+
103+
let main ~params ~cost_timeout ~timeout ~out_dir ~out_file ch =
104+
Random.init 0;
105+
106+
let conn = Db.create (Sys.getenv_exn "CASTOR_OPT_DB") in
107+
let cost_conn = conn in
108+
let params_set =
109+
List.map params ~f:(fun (n, t, _) -> Name.create ~type_:t n)
110+
|> Set.of_list (module Name)
111+
in
112+
let query =
113+
load_string_exn ~params:params_set conn @@ In_channel.input_all ch
114+
in
115+
116+
let best_cost = ref Float.infinity in
117+
let cost state =
118+
Fresh.reset Global.fresh;
119+
match opt conn cost_conn params_set cost_timeout state query with
120+
| Some query' -> (
121+
match eval (trial_dir out_dir) params query' with
122+
| Ok cost ->
123+
if Float.(cost < !best_cost) then (
124+
copy_out out_file out_dir query';
125+
best_cost := cost);
126+
cost
127+
| Error err ->
128+
Logs.warn (fun m -> m "Evaluation failed: %a" Error.pp err);
129+
Float.infinity)
130+
| None -> Float.infinity
131+
in
132+
133+
let cost = Memo.of_comparable (module Mcmc.Random_choice.C) cost in
134+
let max_time = Option.map ~f:Time.Span.of_sec timeout in
135+
136+
try Mcmc.run ?max_time cost |> ignore
137+
with Resolve.Resolve_error r -> Fmt.epr "%a@." (Resolve.pp_err Fmt.nop) r
138+
139+
let spec =
140+
let open Command.Let_syntax in
141+
[%map_open
142+
let () = Log.param
143+
and () = Ops.param
144+
and () = Db.param
145+
and () = Type_cost.param
146+
and () = Join_opt.param
147+
and () = Groupby_tactics.param
148+
and () = Type.param
149+
and () = Simplify_tactic.param
150+
and cost_timeout =
151+
flag "cost-timeout" (optional float)
152+
~doc:"SEC time to run cost estimation"
153+
and timeout =
154+
flag "timeout" (optional float) ~doc:"SEC time to run optimizer"
155+
and params =
156+
flag "param" ~aliases:[ "p" ]
157+
(listed Util.param_and_value)
158+
~doc:"NAME:TYPE query parameters"
159+
and out_dir =
160+
flag "out-dir" (required string) ~aliases:[ "o" ]
161+
~doc:"DIR output directory"
162+
and out_file =
163+
flag "out-file" (required string) ~aliases:[ "f" ]
164+
~doc:"FILE output directory"
165+
and ch =
166+
anon (maybe_with_default In_channel.stdin ("query" %: Util.channel))
167+
in
168+
fun () -> main ~params ~cost_timeout ~timeout ~out_dir ~out_file ch]
169+
170+
let () = Command.basic spec ~summary:"Optimize a query." |> Command_unix.run

0 commit comments

Comments
 (0)