-
Notifications
You must be signed in to change notification settings - Fork 110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implementation of checkpoints/temp targets #88 #106
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
; Regression testing for Drake: checkpointing/temp targets | ||
; Relevant URLs: | ||
; https://github.com/Factual/drake/issues/88 | ||
|
||
~temp1 <- start | ||
echo -n $INPUTS > $OUTPUT | ||
echo -n "t1" >> stepsrun | ||
|
||
~temp2 <- temp1 | ||
echo -n $INPUTS > $OUTPUT | ||
echo -n "t2" >> stepsrun | ||
|
||
~temp3, perm1 <- temp2 | ||
echo -n INPUTS > $OUTPUT0 | ||
echo -n $INPUTS > $OUTPUT1 | ||
echo -n "t3p1" >> stepsrun | ||
|
||
perm2 <- perm1 | ||
echo -n $INPUTS > $OUTPUT | ||
echo -n "p2" >> stepsrun | ||
|
||
perm3 <- temp2 | ||
echo -n $INPUTS > $OUTPUT | ||
echo -n "p3" >> stepsrun | ||
|
||
perm4 <- temp3 | ||
echo -n $INPUTS > $OUTPUT | ||
echo -n "p4" >> stepsrun | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
#!/bin/bash | ||
# Regression testing for Drake: checkpointing/temp targets | ||
# Relevant URLs: | ||
# https://github.com/Factual/drake/issues/88 | ||
|
||
source $(dirname $0)/regtest_utils.sh | ||
|
||
export FILES=start\ stepsrun\ temp1\ temp2\ temp3\ perm1\ perm2\ perm3\ perm4 | ||
|
||
cleanup() { | ||
for FILE in $FILES | ||
do | ||
rm -f $(dirname $0)/$FILE>/dev/null 2>&1 | ||
done | ||
} | ||
|
||
checkfiles() { | ||
check_does_not_exist temp1 temp2 temp3 | ||
check_exists perm1 perm2 perm3 perm4 | ||
} | ||
|
||
echo "------------------" | ||
echo "TESTS: checkpoints" | ||
echo "------------------" | ||
|
||
# First cleanup any existing files | ||
cleanup | ||
|
||
# Run script from scratch, make sure temp files are deleted | ||
touch start | ||
run_d regtest_temp.d -a | ||
check_grep stepsrun "t1t2t3p1p2p3p4" | ||
checkfiles | ||
|
||
# Run script again, make sure nothing happens even though temp files are deleted | ||
rm stepsrun | ||
run_d regtest_temp.d -a | ||
check_does_not_exist stepsrun | ||
checkfiles | ||
|
||
# Touch start, run script again, make sure all files are built | ||
sleep 2 # get around the 1 second accuracy of lastModified() in Java | ||
#rm stepsrun | ||
touch start | ||
run_d regtest_temp.d -a | ||
check_grep stepsrun "t1t2t3p1p2p3p4" | ||
checkfiles | ||
|
||
# Remove perm2, make sure only perm2 step is run | ||
sleep 2 # get around the 1 second accuracy of lastModified() in Java | ||
rm stepsrun | ||
rm perm2 | ||
run_d regtest_temp.d -a | ||
check_grep stepsrun "p2" | ||
checkfiles | ||
|
||
# Touch temp2, make sure temp3, perm1, perm2, perm3, perm4 are built | ||
sleep 2 # get around the 1 second accuracy of lastModified() in Java | ||
rm stepsrun | ||
touch temp2 | ||
run_d regtest_temp.d -a | ||
check_grep stepsrun "t3p1p2p3p4" | ||
checkfiles | ||
|
||
# Touch temp3, make sure perm4 is run | ||
sleep 2 # get around the 1 second accuracy of lastModified() in Java | ||
rm stepsrun | ||
touch temp3 | ||
run_d regtest_temp.d -a | ||
check_grep stepsrun "p4" | ||
checkfiles | ||
|
||
# Touch start, build only perm3, make sure temp1, temp2, perm3 are run | ||
sleep 2 # get around the 1 second accuracy of lastModified() in Java | ||
rm stepsrun | ||
touch start | ||
run_d regtest_temp.d -a perm3 | ||
check_grep stepsrun "t1t2p3" | ||
checkfiles | ||
|
||
# Cleanup, build only temp3, make sure temp1, temp2, temp3, perm1 are run | ||
# Also make sure temp3 still exists | ||
cleanup | ||
touch start | ||
run_d regtest_temp.d -a temp3 | ||
check_grep stepsrun "t1t2t3p1" | ||
check_does_not_exist temp1 temp2 | ||
check_exists perm1 temp3 | ||
|
||
# Final cleanup | ||
cleanup | ||
|
||
echo "ALL PASSED" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -158,6 +158,47 @@ | |
:vars vars | ||
:opts (if-not method opts (merge (method :opts) opts))))) | ||
|
||
(defn- data-in? | ||
"Shortcut for (fs di/data-in?)" | ||
[file] | ||
(fs di/data-in? file)) | ||
|
||
(defn- no-data-in? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
"Shortcut for (not (fs di/data-in?))" | ||
[file] | ||
(not (data-in? file))) | ||
|
||
(defn- expand-outputs | ||
"Given a set of outputs, expand any temp outputs that have no data by | ||
recursively replacing the temp output with the outputs of any steps | ||
that depend on that temp output - and then doing the same for the | ||
temp outputs of the dependent steps that have no data. Temp outputs | ||
that have data will not be expanded. This is the key algorithm that | ||
allows Drake to deal with deleted temp targets without triggering | ||
unnecessary rebuilds." | ||
[parse-tree step-list step] | ||
(let [step-set (into #{} step-list) | ||
steps (parse-tree :steps) | ||
real-outputs (step :real-outputs) | ||
temp-outputs (step :temp-outputs) | ||
temp-input-map-lookup (parse-tree :temp-input-map-lookup) | ||
[existing-temp-outputs empty-temp-outputs] (split-with data-in? temp-outputs) | ||
empty-temp-output-step-deps (->> empty-temp-outputs | ||
(map (comp temp-input-map-lookup normalized-path)) | ||
flatten | ||
(filter step-set)) | ||
expanded-temp-outputs (->> empty-temp-output-step-deps | ||
(map (comp (partial expand-outputs parse-tree step-list) steps)) | ||
flatten)] | ||
(trace "expand-outputs step:" step) | ||
(trace "expand-outputs step-list:" step-list) | ||
(trace "expand-outputs existing-temp-outputs:" existing-temp-outputs) | ||
(trace "expand-outputs empty-temp-outputs:" empty-temp-outputs) | ||
(trace "expand-outputs empty-temp-output-step-deps:" empty-temp-output-step-deps) | ||
(trace "expand-outputs expanded-temp-outputs" expanded-temp-outputs) | ||
|
||
(concat real-outputs existing-temp-outputs expanded-temp-outputs))) | ||
|
||
(defn- should-build? | ||
"Given the parse tree and a step index, determines whether it should | ||
be built and returns the reason (e.g. 'timestamped') or | ||
|
@@ -174,15 +215,21 @@ | |
above). If this step is specified manually (root of a | ||
dependency subtree), will always fail on an empty input, | ||
since it will not be generated by any other step." | ||
[step forced triggered match-type fail-on-empty] | ||
[parse-tree step-list step forced triggered match-type fail-on-empty] | ||
(trace "should-build? fail-on-empty: " fail-on-empty) | ||
(let [{:keys [inputs outputs opts]} (branch-adjust-step step false) | ||
empty-inputs (filter #(not (fs di/data-in? %)) inputs) | ||
(let [{:keys [real-inputs real-outputs temp-inputs temp-outputs opts]} (branch-adjust-step step false) | ||
inputs (into real-inputs (filter data-in? temp-inputs)) | ||
empty-inputs (filter no-data-in? real-inputs) | ||
outputs (expand-outputs parse-tree step-list step) | ||
no-outputs (empty? outputs)] | ||
(trace "should-build? forced:" forced) | ||
(trace "should-build? match-type:" match-type) | ||
(trace "should-build? triggered:" triggered) | ||
(trace "should-build? temp-inputs: " temp-inputs) | ||
(trace "should-build? real-inputs: " real-inputs) | ||
(trace "should-build? inputs: " inputs) | ||
(trace "should-build? temp-outputs: " temp-outputs) | ||
(trace "should-build? real-outputs: " real-outputs) | ||
(trace "should-build? outputs: " outputs) | ||
(trace "should-build? empty inputs: " empty-inputs) | ||
(trace "should-build? no-outputs: " no-outputs) | ||
|
@@ -200,11 +247,13 @@ | |
;; one or more outputs are missing? (can only look for those | ||
;; for targets which were specified explicitly, not triggered) | ||
(and (not triggered) | ||
(some #(not (fs di/data-in? %)) outputs)) "missing output" | ||
(some no-data-in? outputs)) "missing output" | ||
;; timecheck disabled | ||
(= false (get-in step [:opts :timecheck])) nil | ||
;; built as a dependency? | ||
triggered "projected timestamped" | ||
;; if there are nothing but temp inputs, don't build | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does it work with:
|
||
(and (empty? inputs) (not (empty? temp-inputs))) nil | ||
;; no-input steps are always rebuilt | ||
(empty? inputs) "no-input step" | ||
:else | ||
|
@@ -233,7 +282,8 @@ | |
(reduce (fn [[new-target-steps triggered-deps] | ||
{:keys [index build match-type] :as step}] | ||
(let [step-map ((parse-tree :steps) index) | ||
cause (should-build? step-map (= build :forced) | ||
step-list (map :index target-steps) | ||
cause (should-build? parse-tree step-list step-map (= build :forced) | ||
(triggered-deps index) match-type false)] | ||
(trace (format "predict-steps, index=%d, cause=%s" index cause)) | ||
(if (nil? cause) | ||
|
@@ -302,7 +352,7 @@ | |
(defn- run-step | ||
"Runs one step performing all necessary checks, returns | ||
true if the step was actually run; false if skipped." | ||
[parse-tree step-number {:keys [index build match-type opts]}] | ||
[parse-tree steps step-number {:keys [index build match-type opts]}] | ||
(let [step ((parse-tree :steps) index) | ||
inputs (step :inputs)] | ||
;; TODO(artem) | ||
|
@@ -318,7 +368,8 @@ | |
(let [step-descr (step-string (branch-adjust-step step false)) | ||
step (update-in step [:opts] #(merge % opts)) | ||
step (prepare-step-for-run step parse-tree) | ||
should-build (should-build? step (= build :forced) | ||
step-list (map :index steps) | ||
should-build (should-build? parse-tree step-list step (= build :forced) | ||
false match-type true)] | ||
(info "") | ||
(info (format "--- %d. %s: %s" | ||
|
@@ -463,14 +514,14 @@ | |
(add-empty-promises-to-steps steps :exception-promise)) | ||
|
||
(defn- attempt-run-step | ||
[parse-tree step] | ||
[parse-tree steps step] | ||
(let [prom (:promise step)] | ||
(try | ||
; run the step (the actual job) | ||
(run-step parse-tree (:index step) step) | ||
(run-step parse-tree steps (:index step) step) | ||
(deliver prom 1) ; delivers a promise of 1/success | ||
(catch Exception e | ||
(error (str "caught exception step " (:index step) ": ") (.getMessage e) (.printStackTrace e)) | ||
(error e (str "caught exception step " (:index step) ": " e)) | ||
(deliver (:exception-promise step) e)) | ||
(finally | ||
; if promise not delivered, deliver a promise of 0/failure | ||
|
@@ -492,7 +543,7 @@ | |
@(promises-indexed i)) | ||
deps))] | ||
(if (= successful-parent-steps (count deps)) | ||
(attempt-run-step parse-tree step) | ||
(attempt-run-step parse-tree steps step) | ||
(deliver prom 0))) | ||
(catch Exception e | ||
(deliver (:exception-promise step) e)) | ||
|
@@ -553,6 +604,40 @@ | |
[steps] | ||
(reduce + (map (fn [step] @(:promise step)) steps))) | ||
|
||
(defn- setup-temp-deleting-futures | ||
"Set up a future for each temp file which waits for all | ||
the steps which depend on this temp file to finish and | ||
then deletes the temp file." | ||
[parse-tree steps] | ||
(let [steps-list (map :index steps) | ||
steps-map (zipmap steps-list steps) | ||
steps-set (into #{} steps-list)] | ||
(doseq [[file deps] (parse-tree :temp-input-map-lookup)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so i guess we don't need to track the futures anywhere? does that mean we could just use send-off? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding is that send-off (used with agents) is for transforming state. These futures are just waiting on some refs and then deleting the file, so no real state is being transformed. (Perhaps you can show me an example?) |
||
(let [trimmed-deps (filter steps-set deps)] | ||
(when (not (empty? trimmed-deps)) | ||
(future | ||
(try | ||
(trace "Running future to delete target:" file "dependencies:" trimmed-deps) | ||
(let [successful-deps-count (reduce + | ||
(map (fn [i] | ||
@((steps-map i) :promise)) | ||
trimmed-deps))] | ||
(trace "Finished waiting for dependents of target:" | ||
file | ||
"dependencies:" | ||
trimmed-deps | ||
"successful-count:" | ||
successful-deps-count) | ||
(when (= successful-deps-count (count trimmed-deps)) | ||
(info "Deleting temp target:" file) | ||
(fs di/rm file))) | ||
(catch Exception e | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add some comments that explain the ramifications of an error here. e.g., will the user get a clear indication of which files should have been deleted because they're not needed, but an error occurred? is there any easy recourse? Will the error message provide good clues as to why the file deletion failed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do, and the error message below should be better.
In general, it should be pretty rare that there is an error here. |
||
; Likely to happen if there is some problem with deleting the file. | ||
; Catch the exception and inform the user of the problem, | ||
; but do not hald execution as deletion of the file is probably not | ||
; critical to the workflow. | ||
(error e "Exception deleting temp target:" file))))))))) | ||
|
||
(defn- run-steps-async | ||
"Runs steps asynchronously. | ||
If concurrence = 1, this will run the steps in the same order as the | ||
|
@@ -581,6 +666,9 @@ | |
assoc-promise | ||
(assoc-function parse-tree))] | ||
|
||
(when (not (:keep-temp-targets *options*)) | ||
(setup-temp-deleting-futures parse-tree steps-future)) | ||
|
||
(post event-bus (EventWorkflowBegin steps-data)) | ||
|
||
(trigger-futures jobs steps-future event-bus) | ||
|
@@ -911,6 +999,8 @@ | |
"Turn on even more verbose debugging output.") | ||
(no-arg version | ||
"Show version information.") | ||
(no-arg keep-temp-targets | ||
"Do not auto-delete temp targets") | ||
(with-arg tmpdir | ||
"Specifies the temporary directory for Drake files (by default, .drake/ in the same directory the main workflow file is located)." | ||
:type :str | ||
|
@@ -958,8 +1048,7 @@ | |
(error (str "drake: " (m :msg))) | ||
(shutdown (or (get m :exit) 1))) | ||
(catch Exception e | ||
(.printStackTrace e) | ||
(error (stack-trace-str e)) | ||
(error e (str "Exception caught: " e)) | ||
(shutdown 1)))))) | ||
|
||
(defn run-opts [opts] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i guess we should standardize on what we're calling this. is it "temp targets" or "checkpointing"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point - "temp targets" sounds good to me.