Skip to content

Commit

Permalink
Merge pull request #2322 from max-au/max-au/cache-epp-deps
Browse files Browse the repository at this point in the history
rebar3 Erlang compiler: performance improvements
  • Loading branch information
ferd authored Aug 10, 2020
2 parents a5bfc23 + 8fca720 commit 0952780
Show file tree
Hide file tree
Showing 3 changed files with 205 additions and 47 deletions.
120 changes: 76 additions & 44 deletions src/rebar_compiler_dag.erl
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@

-include("rebar.hrl").

-define(DAG_VSN, 3).
-define(DAG_VSN, 4).
-define(DAG_ROOT, "source").
-define(DAG_EXT, ".dag").

-type dag_v() :: {digraph:vertex(), term()} | 'false'.
-type dag_e() :: {digraph:vertex(), digraph:vertex()}.
-type critical_meta() :: term(). % if this changes, the DAG is invalid
-type dag_rec() :: {list(dag_v()), list(dag_e()), critical_meta()}.
-type dag() :: digraph:graph().
-type critical_meta() :: term().

-record(dag, {vsn = ?DAG_VSN :: pos_integer(),
info = {[], [], []} :: dag_rec()}).
meta :: critical_meta(),
vtab :: notable | [tuple()],
etab :: notable | [tuple()],
ntab :: notable | [tuple()]}).

-type dag() :: digraph:graph().

%% @doc You should initialize one DAG per compiler module.
%% `CritMeta' is any contextual information that, if it is found to change,
Expand Down Expand Up @@ -105,13 +107,46 @@ filter_prefix(G, [{App, Out} | AppTail] = AppPaths, [File | FTail]) ->
filter_prefix(G, AppPaths, FTail)
end.

finalise_populate_sources(_G, _InDirs, Waiting) when Waiting =:= #{} ->
ok;
finalise_populate_sources(G, InDirs, Waiting) ->
%% wait for all deps to complete
receive
{deps, Pid, AbsIncls} ->
{Status, Source} = maps:get(Pid, Waiting),
%% the file hasn't been visited yet; set it to existing, but with
%% a last modified value that's null so it gets updated to something new.
[digraph:add_vertex(G, Src, 0) || Src <- AbsIncls,
digraph:vertex(G, Src) =:= false],
%% drop edges from deps that aren't included!
[digraph:del_edge(G, Edge) || Status == old,
Edge <- digraph:out_edges(G, Source),
{_, _Src, Path, _Label} <- [digraph:edge(G, Edge)],
not lists:member(Path, AbsIncls)],
%% Add the rest
[digraph:add_edge(G, Source, Incl) || Incl <- AbsIncls],
%% mark the digraph dirty when there is any change in
%% dependencies, for any application in the project
mark_dirty(G),
finalise_populate_sources(G, InDirs, Waiting);
{'DOWN', _MRef, process, Pid, normal} ->
finalise_populate_sources(G, InDirs, maps:remove(Pid, Waiting));
{'DOWN', _MRef, process, Pid, Reason} ->
{_Status, Source} = maps:get(Pid, Waiting),
?ERROR("Failed to get dependencies for ~s~n~p", [Source, Reason]),
?FAIL
end.

%% @doc this function scans all the source files found and looks into
%% all the `InDirs' for deps (other source files, or files that aren't source
%% but still returned by the compiler module) that are related
%% to them.
populate_sources(_G, _Compiler, _InDirs, [], _DepOpts) ->
ok;
populate_sources(G, Compiler, InDirs, [Source|Erls], DepOpts) ->
populate_sources(G, Compiler, InDirs, Sources, DepOpts) ->
populate_sources(G, Compiler, InDirs, Sources, DepOpts, #{}).

populate_sources(G, _Compiler, InDirs, [], _DepOpts, Waiting) ->
finalise_populate_sources(G, InDirs, Waiting);
populate_sources(G, Compiler, InDirs, [Source|Erls], DepOpts, Waiting) ->
case digraph:vertex(G, Source) of
{_, LastUpdated} ->
case filelib:last_modified(Source) of
Expand All @@ -120,21 +155,20 @@ populate_sources(G, Compiler, InDirs, [Source|Erls], DepOpts) ->
%% from the graph.
digraph:del_vertex(G, Source),
mark_dirty(G),
populate_sources(G, Compiler, InDirs, Erls, DepOpts);
populate_sources(G, Compiler, InDirs, Erls, DepOpts, Waiting);
LastModified when LastUpdated < LastModified ->
digraph:add_vertex(G, Source, LastModified),
prepopulate_deps(G, Compiler, InDirs, Source, DepOpts, old),
mark_dirty(G);
Worker = prepopulate_deps(Compiler, InDirs, Source, DepOpts, self()),
populate_sources(G, Compiler, InDirs, Erls, DepOpts, Waiting#{Worker => {old, Source}});
_ -> % unchanged
ok
populate_sources(G, Compiler, InDirs, Erls, DepOpts, Waiting)
end;
false ->
LastModified = filelib:last_modified(Source),
digraph:add_vertex(G, Source, LastModified),
prepopulate_deps(G, Compiler, InDirs, Source, DepOpts, new),
mark_dirty(G)
end,
populate_sources(G, Compiler, InDirs, Erls, DepOpts).
Worker = prepopulate_deps(Compiler, InDirs, Source, DepOpts, self()),
populate_sources(G, Compiler, InDirs, Erls, DepOpts, Waiting#{Worker => {new, Source}})
end.

%% @doc Scan all files in the digraph that are seen as dependencies, but are
%% neither source files nor artifacts (i.e. header files that don't produce
Expand Down Expand Up @@ -228,19 +262,23 @@ restore_dag(G, File, CritMeta) ->
{ok, Data} ->
%% The CritMeta value is checked and if it doesn't match, we fail
%% the whole restore operation.
#dag{vsn=?DAG_VSN, info={Vs, Es, CritMeta}} = binary_to_term(Data),
[digraph:add_vertex(G, V, LastUpdated) || {V, LastUpdated} <- Vs],
[digraph:add_edge(G, V1, V2, Label) || {_, V1, V2, Label} <- Es],
#dag{vsn=?DAG_VSN, meta = CritMeta, vtab = VTab,
etab = ETab, ntab = NTab} = binary_to_term(Data),
{digraph, VT, ET, NT, false} = G,
true = ets:insert_new(VT, VTab),
true = ets:insert_new(ET, ETab),
true = ets:delete_all_objects(NT),
true = ets:insert(NT, NTab),
ok;
{error, _Err} ->
ok
end.

store_dag(G, File, CritMeta) ->
ok = filelib:ensure_dir(File),
Vs = lists:map(fun(V) -> digraph:vertex(G, V) end, digraph:vertices(G)),
Es = lists:map(fun(E) -> digraph:edge(G, E) end, digraph:edges(G)),
Data = term_to_binary(#dag{info={Vs, Es, CritMeta}}, [{compressed, 2}]),
{digraph, VT, ET, NT, false} = G,
Data = term_to_binary(#dag{meta = CritMeta, vtab = ets:tab2list(VT),
etab = ets:tab2list(ET), ntab = ets:select(NT, [{'_',[],['$_']}])}, [{compressed, 2}]),
file:write_file(File, Data).

%% Drop a file from the digraph if it doesn't exist, and if so,
Expand Down Expand Up @@ -285,26 +323,20 @@ maybe_rm_vertex(G, Source) ->
%% mark its timestamp to 0, which means we have no info on it.
%% Source files will be covered at a later point in their own scan, and
%% non-source files are going to be covered by `populate_deps/3'.
prepopulate_deps(G, Compiler, InDirs, Source, DepOpts, Status) ->
SourceDir = filename:dirname(Source),
AbsIncls = case erlang:function_exported(Compiler, dependencies, 4) of
false ->
Compiler:dependencies(Source, SourceDir, InDirs);
true ->
Compiler:dependencies(Source, SourceDir, InDirs, DepOpts)
end,
%% the file hasn't been visited yet; set it to existing, but with
%% a last modified value that's null so it gets updated to something new.
[digraph:add_vertex(G, Src, 0) || Src <- AbsIncls,
digraph:vertex(G, Src) =:= false],
%% drop edges from deps that aren't included!
[digraph:del_edge(G, Edge) || Status == old,
Edge <- digraph:out_edges(G, Source),
{_, _Src, Path, _Label} <- [digraph:edge(G, Edge)],
not lists:member(Path, AbsIncls)],
%% Add the rest
[digraph:add_edge(G, Source, Incl) || Incl <- AbsIncls],
ok.
prepopulate_deps(Compiler, InDirs, Source, DepOpts, Control) ->
{Worker, _MRef} = spawn_monitor(
fun () ->
SourceDir = filename:dirname(Source),
AbsIncls = case erlang:function_exported(Compiler, dependencies, 4) of
false ->
Compiler:dependencies(Source, SourceDir, InDirs);
true ->
Compiler:dependencies(Source, SourceDir, InDirs, DepOpts)
end,
Control ! {deps, self(), AbsIncls}
end
),
Worker.

%% check that a dep file is up to date
refresh_dep(_G, {artifact, _}) ->
Expand Down
124 changes: 124 additions & 0 deletions src/rebar_compiler_epp.erl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,18 @@
%%% @end
-module(rebar_compiler_epp).
-export([deps/2, resolve_module/2]).
%% cache (a la code path storage, but for dependencies not in code path)
-export([ensure_started/0, flush/0, resolve_source/2]).
-export([init/1, handle_call/3, handle_cast/2]).
%% remove when OTP 19 support is no longer needed
-export([handle_info/2, terminate/2, code_change/3]).

-behaviour(gen_server).

-include_lib("kernel/include/file.hrl").

-include("rebar.hrl").

%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Basic File Handling %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expand Down Expand Up @@ -37,6 +47,120 @@ resolve_module(Mod, Paths) ->
Path -> {ok, Path}
end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Cache for deps %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%
-spec ensure_started() -> ok.
ensure_started() ->
case whereis(?MODULE) of
undefined ->
case gen_server:start({local, ?MODULE}, ?MODULE, [], []) of
{ok, _Pid} ->
ok;
{error, {already_started, _Pid}} ->
ok
end;
Pid when is_pid(Pid) ->
ok
end.

flush() ->
gen_server:cast(?MODULE, flush).

%% @doc Resolves "Name" erl module to a path, given list of paths to search.
%% Caches result for subsequent requests.
-spec resolve_source(atom() | file:filename_all(), [file:filename_all()]) -> {true, file:filename_all()} | false.
resolve_source(Name, Dirs) when is_atom(Name) ->
gen_server:call(?MODULE, {resolve, atom_to_list(Name) ++ ".erl", Dirs});
resolve_source(Name, Dirs) when is_list(Name) ->
gen_server:call(?MODULE, {resolve, Name, Dirs}).

-record(state, {
%% filesystem cache, denormalised
fs = #{} :: #{file:filename_all() => [file:filename_all()]},
%% map of module name => abs path
resolved = #{} :: #{file:filename_all() => file:filename_all()}
}).

init([]) ->
{ok, #state{}}.

handle_call({resolve, Name, Dirs}, _From, #state{fs = Fs, resolved = Res} = State) ->
case maps:find(Name, Res) of
{ok, Found} ->
{reply, Found, State};
error ->
{Resolved, NewFs} = resolve(Name, Fs, Dirs),
{reply, Resolved, State#state{resolved = Res#{Name => Resolved}, fs = NewFs}}
end.

handle_cast(flush, _State) ->
{noreply, #state{}}.

resolve(_Name, Fs, []) ->
{false, Fs};
resolve(Name, Fs, [Dir | Tail]) ->
{NewFs, Files} = list_directory(Dir, Fs),
case lists:member(Name, Files) of
true ->
{{true, filename:join(Dir, Name)}, NewFs};
false ->
resolve(Name, NewFs, Tail)
end.

%% list_directory/2 caches files in the directory and all subdirectories,
%% to support the behaviour of looking for source files in
%% subdirectories of src/* folder.
%% This may introduce weird dependencies for cases when CT
%% test cases contain test data with files named the same
%% as requested behaviour/parse_transforms, but let's hope
%% it won't happen for many projects. If it does, in fact,
%% it won't cause any damage, just extra unexpected recompiles.
list_directory(Dir, Cache) ->
case maps:find(Dir, Cache) of
{ok, Files} ->
{Cache, Files};
error ->
case file:list_dir(Dir) of
{ok, DirFiles} ->
%% create a full list of *.erl files under Dir.
{NewFs, Files} = lists:foldl(
fun (File, {DirCache, Files} = Acc) ->
%% recurse into subdirs
FullName = filename:join(Dir, File),
case filelib:is_dir(FullName) of
true ->
{UpdFs, MoreFiles} = list_directory(FullName, DirCache),
{UpdFs, MoreFiles ++ Files};
false ->
%% ignore all but *.erl files
case filename:extension(File) =:= ".erl" of
true ->
{DirCache, [File | Files]};
false ->
Acc
end
end
end,
{Cache, []}, DirFiles),
{NewFs#{Dir => Files}, Files};
{error, Reason} ->
?DEBUG("Failed to list ~s, ~p", [Dir, Reason]),
{Cache, []}
end
end.

%%%%%%%%%%%%%%%
%%% OTP 19 %%%
handle_info(_Request, State) ->
{noreply, State}.

terminate(_Reason, _State) ->
ok.

code_change(_OldVsn, State, _Extra) ->
{ok, State}.

%%%%%%%%%%%%%%%
%%% PRIVATE %%%
%%%%%%%%%%%%%%%
Expand Down
8 changes: 5 additions & 3 deletions src/rebar_compiler_erl.erl
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ dependencies(Source, SourceDir, Dirs) ->
end.

dependencies(Source, _SourceDir, Dirs, DepOpts) ->
rebar_compiler_epp:ensure_started(),
OptPTrans = proplists:get_value(parse_transforms, DepOpts, []),
try rebar_compiler_epp:deps(Source, DepOpts) of
#{include := AbsIncls,
Expand All @@ -110,9 +111,9 @@ dependencies(Source, _SourceDir, Dirs, DepOpts) ->
%% TODO: check for core transforms?
{_MissIncl, _MissInclLib} =/= {[],[]} andalso
?DEBUG("Missing: ~p", [{_MissIncl, _MissInclLib}]),
expand_file_names([module_to_erl(Mod) || Mod <- OptPTrans ++ PTrans], Dirs) ++
expand_file_names([module_to_erl(Mod) || Mod <- Behaviours], Dirs) ++
AbsIncls
lists:filtermap(
fun (Mod) -> rebar_compiler_epp:resolve_source(Mod, Dirs) end,
OptPTrans ++ PTrans ++ Behaviours) ++ AbsIncls
catch
error:{badmatch, {error, Reason}} ->
case file:format_error(Reason) of
Expand Down Expand Up @@ -141,6 +142,7 @@ compile(Source, [{_, OutDir}], Config, ErlOpts) ->
end.

compile_and_track(Source, [{Ext, OutDir}], Config, ErlOpts) ->
rebar_compiler_epp:flush(),
BuildOpts = [{outdir, OutDir} | ErlOpts],
Target = target_base(OutDir, Source) ++ Ext,
AllOpts = case erlang:function_exported(compile, env_compiler_options, 0) of
Expand Down

0 comments on commit 0952780

Please sign in to comment.