diff --git a/.gitignore b/.gitignore index c1cb6c9db..3775f8ce8 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,6 @@ cov/* out .idea clojush.iml -.gorilla-port \ No newline at end of file +.gorilla-port +/clojush/ +.ipynb_checkpoints/ \ No newline at end of file diff --git a/project.clj b/project.clj index 67d0a46ca..3bd2514df 100644 --- a/project.clj +++ b/project.clj @@ -1,6 +1,6 @@ (defproject clojush "2.32.0-1-SNAPSHOT" :description "The Push programming language and the PushGP genetic programming - system implemented in Clojure. See http://pushlanguage.com" + system implemented in Clojure. See http://pushlanguage.com" :license {:name "Eclipse Public License" :url "http://www.eclipse.org/legal/epl-v10.html"} :dependencies [[org.clojure/clojure "1.8.0"] @@ -12,8 +12,9 @@ [clojure-csv "2.0.1"] [org.clojure/data.json "0.2.6"] [clj-random "0.1.7"] - ;; https://mvnrepository.com/artifact/org.apache.commons/commons-math3 - [org.apache.commons/commons-math3 "3.2"]] + ;; https://mvnrepository.com/artifact/org.apache.commons/commons-math3 + [org.apache.commons/commons-math3 "3.2"] + [cheshire "5.7.1"]] :plugins [[lein-codox "0.9.1"] [lein-shell "0.5.0"] [lein-gorilla "0.4.0"] @@ -53,4 +54,3 @@ ;;"-XX:+UseG1GC" ;:jvm-opts ["-Xmx12g" "-Xms12g" "-XX:+UseParallelGC"] :main clojush.core) - diff --git a/run-fly b/run-fly index 038682c32..565f1cea5 100755 --- a/run-fly +++ b/run-fly @@ -25,7 +25,6 @@ homedir="/home/${fly_user}" rundir="$homedir/runs/$label-$number" repodir="$rundir/Clojush" outputdir="$rundir/output" - ssh ${fly_user}@fly.hampshire.edu "mkdir -p $rundir" rsync \ @@ -49,7 +48,7 @@ ssh ${fly_user}@fly.hampshire.edu /opt/pixar/tractor-blade-1.7.2/tractor-spool.p --jobcwd="${repodir}" \ --priority=1 \ --range 1-${n} \ - -c "bash -c 'env JAVA_CMD=/usr/java/latest/bin/java /share/apps/bin/lein run $lein_command > $outputdir/RANGE.out 2> $outputdir/RANGE.err'" + -c "bash -c 'env JAVA_CMD=/usr/java/latest/bin/java /share/apps/bin/lein run $lein_command :label $label > $outputdir/RANGE.out 2> $outputdir/RANGE.err'" echo "Job ID: ${number}" diff --git a/src/clojush/args.clj b/src/clojush/args.clj index 9b95bc757..2b8794349 100644 --- a/src/clojush/args.clj +++ b/src/clojush/args.clj @@ -411,7 +411,7 @@ ;; When true, will exit the run when there is an individual with a zero-error vector ;;---------------------------------------- - ;; Arguments related to printing JSON, EDN, or CSV logs + ;; Arguments related to printing JSON, EDN, CSV, and remote recording ;;---------------------------------------- :print-csv-logs false @@ -450,6 +450,14 @@ :json-log-program-strings false ;; If true, JSON logs will include program strings for each individual. + + :record-host nil + ;; Should be in the format ":" + ;; If set, will send logs of each run to a server running on this + ;; host + :label nil + ;; If set, will send this in the configuration of the run, to the + ;; external record ))) (defn load-push-argmap diff --git a/src/clojush/core.clj b/src/clojush/core.clj index df8809418..e2bbe3302 100644 --- a/src/clojush/core.clj +++ b/src/clojush/core.clj @@ -16,6 +16,7 @@ ;; for more details. (ns clojush.core + (:require [clojush.pushgp.record :as r]) (:use [clojush.pushgp pushgp report]) (:gen-class)) @@ -30,12 +31,13 @@ This allows one to run an example with a call from the OS shell prompt like: lein run examples.simple-regression :population-size 3000" [& args] + (r/new-run!) (println "Command line args:" (apply str (interpose \space args))) (let [param-list (map #(if (.endsWith % ".ser") (str %) (read-string %)) (rest args))] - (require (symbol (first args))) + (require (symbol (r/config-data! [:problem-file] (first args)))) (let [example-params (eval (symbol (str (first args) "/argmap"))) params (merge example-params (apply sorted-map param-list))] (println "######################################") diff --git a/src/clojush/problems/software/replace_space_with_newline.clj b/src/clojush/problems/software/replace_space_with_newline.clj index e18aca417..753d2c24e 100644 --- a/src/clojush/problems/software/replace_space_with_newline.clj +++ b/src/clojush/problems/software/replace_space_with_newline.clj @@ -160,6 +160,8 @@ (println "Outputs of best individual on training cases:") (error-function best-program :train true) (println ";;******************************") + ;; return best individual with tests errors added so that those are recorded + (assoc best :test-errors best-test-errors) )) ;; To do validation, could have this function return an altered best individual ;; with total-error > 0 if it had error of zero on train but not on validation ;; set. Would need a third category of data cases, or a defined split of training cases. diff --git a/src/clojush/pushgp/pushgp.clj b/src/clojush/pushgp/pushgp.clj index 1e9506cc8..8e3e36b5c 100644 --- a/src/clojush/pushgp/pushgp.clj +++ b/src/clojush/pushgp/pushgp.clj @@ -1,7 +1,8 @@ (ns clojush.pushgp.pushgp (:require [clojure.java.io :as io] [clj-random.core :as random] - [clojure.repl :as repl]) + [clojure.repl :as repl] + [clojush.pushgp.record :as r]) (:use [clojush args globals util pushstate random individual evaluate simplification translate] [clojush.instructions boolean code common numbers random-instructions string char vectors tag zip return input-output genome] @@ -144,13 +145,18 @@ ([args] (reset! timer-atom (System/currentTimeMillis)) (load-push-argmap args) + (when (some? (:record-host @push-argmap)) + (r/host! (str (:record-host @push-argmap)))) (random/with-rng (random/make-mersennetwister-rng (:random-seed @push-argmap)) ;; set globals from parameters (reset-globals) (initial-report @push-argmap) ;; Print the inital report - (print-params @push-argmap) + (r/uuid! (:run-uuid @push-argmap)) + (print-params (r/config-data! [:argmap] (dissoc @push-argmap :run-uuid))) (check-genetic-operator-probabilities-add-to-one @push-argmap) (timer @push-argmap :initialization) + (when (:print-timings @push-argmap) + (r/config-data! [:initialization-ms] (:initialization @timer-atom))) (println "\n;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") (println "\nGenerating initial population...") (flush) (let [pop-agents (make-pop-agents @push-argmap) @@ -161,6 +167,7 @@ ;(println) ;; Main loop (loop [generation 0] + (r/new-generation! generation) (println "Processing generation:" generation) (flush) (population-translate-plush-to-push pop-agents @push-argmap) (timer @push-argmap :reproduction) @@ -183,6 +190,8 @@ ;; report and check for success (let [[outcome best] (report-and-check-for-success (vec (doall (map deref pop-agents))) generation @push-argmap)] + (r/generation-data! [:outcome] outcome) + (r/end-generation!) (cond (= outcome :failure) (do (printf "\nFAILURE\n") (if (:return-simplified-on-failure @push-argmap) (auto-simplify best diff --git a/src/clojush/pushgp/record.clj b/src/clojush/pushgp/record.clj new file mode 100644 index 000000000..d168c4090 --- /dev/null +++ b/src/clojush/pushgp/record.clj @@ -0,0 +1,114 @@ +;;; Records the results of runs to an external server + +;; Use documented in https://push-language.hampshire.edu/t/recording-and-analyzing-experimental-results/830 + +;; If `record-host` is set in the arguments, then we should send +;; send data about each run, as it progresses, to that host for archival +;; and monitoring purposes. + +;; The functions in this file are stateful and should be called in this order: +;; +;; (new-run! uuid! config-data!* (new-generation! generation-data!* end-generation!)*)* +;; +;; Currently it doesn't enforce this and if you call a method when you shouldn't +;; the results are unkown. +;; Also it will not send anything over the network until `host!` is called, +;; before that, `end-generation!` will be a no-op. + +(ns clojush.pushgp.record + (:require [clojure.java.io] + [cheshire.core] + [cheshire.generate] + [clojure.string])) + +;; write functions as strings +(cheshire.generate/add-encoder + clojure.lang.AFunction + cheshire.generate/encode-str) + +(def hostname-and-port (atom nil)) +(def writer (atom nil)) + +(defn- ->writer + ; https://github.com/clojure-cookbook/clojure-cookbook/blob/master/05_network-io/5-09_tcp-client.asciidoc + [] + (let [[hostname port] @hostname-and-port] + (-> (java.net.Socket. hostname port) + clojure.java.io/writer))) + +(defn- set-writer! + ; Tries to get a writer to send data on, and if it fails, retries every + ; 5 seconds + [] + (println "Trying to connect to external server for recording at " @hostname-and-port "...") + (try + (reset! writer (->writer)) + (catch java.net.ConnectException _ + (Thread/sleep 5000) + (set-writer!)))) + +(defn host! [host-str] + (let [[hostname port-str] (clojure.string/split host-str #":")] + (reset! hostname-and-port [hostname (int (bigint port-str))]) + (set-writer!))) + +(defn- write-data! [data] + (when (some? @hostname-and-port) + (println "Trying to record data to external server...") + (try + (do + (cheshire.core/generate-stream data @writer) + (.newLine @writer) + (.flush @writer)) + (catch java.net.SocketException _ + (set-writer!) + (write-data! data))))) + +(def data (atom {})) + + +;; Stores a configuration option for the run, for the sequence of `ks` and value `v` +;; i.e. (config-data! [:git-uuid] "abc-def") +(defn config-data! [ks v] + (swap! data assoc-in (cons :config ks) v) + v) + +(defn seconds-since-epoch + ;; http://stackoverflow.com/a/17432411 + ;; because Spark interprets numbers as dates in this format when in JSON + [] + (quot (System/currentTimeMillis) 1000)) + +;; Resets the run data and saves the start time. Should be called at the +;; begining of a run +(defn new-run! [] + (reset! data {:config {:start-time (seconds-since-epoch)}})) + +(defn uuid! [uuid] + (swap! data assoc :uuid uuid)) + +;; Resets the generation data and should be called at the begining of +;; each generation +(defn new-generation! [index] + (swap! + data + assoc + :index index + :generation {:start-time (seconds-since-epoch)})) + + +;; Stores data about the generation, i.e. +;; (generation-data! [:best :error] [1 2 3 10]) +(defn generation-data! [ks v] + (swap! data assoc-in (cons :generation ks) v) + v) + +;; Sends the data for the current generation over the network to be recorded +;; Also sends the configuration with each generation +(defn end-generation! [] + (let [{:keys [generation uuid index config]} @data] + (write-data! + (assoc generation + :config-uuid uuid + :index index + :config config)))) diff --git a/src/clojush/pushgp/report.clj b/src/clojush/pushgp/report.clj index e489b093c..c6173d81e 100644 --- a/src/clojush/pushgp/report.clj +++ b/src/clojush/pushgp/report.clj @@ -6,7 +6,8 @@ [clj-random.core :as random] [local-file] [clojure.data.csv :as csv] - [clojure.java.io :as io])) + [clojure.java.io :as io] + [clojush.pushgp.record :as r])) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; helper functions @@ -308,6 +309,7 @@ print-edn-logs edn-keys edn-log-filename edn-additional-keys] :as argmap}] + (r/generation-data! [:population] population) (println) (println ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") (println ";; -*- Report at generation" generation) @@ -352,21 +354,25 @@ (lexicase-report population argmap)) (when (= total-error-method :ifs) (implicit-fitness-sharing-report population argmap)) (println (format "--- Best Program (%s) Statistics ---" (str "based on " (name err-fn)))) + (r/generation-data! [:best :individual] best) (println "Best genome:" (print-genome best)) (println "Best program:" (pr-str (not-lazy (:program best)))) (when (> report-simplifications 0) (println "Partial simplification:" - (pr-str (not-lazy (:program (auto-simplify best + (pr-str (not-lazy (:program (r/generation-data! [:best :individual-simplified] + (auto-simplify best error-function report-simplifications false - 1000)))))) + 1000))))))) (when print-errors (println "Errors:" (not-lazy (:errors best)))) (when (and print-errors (not (empty? meta-error-categories))) (println "Meta-Errors:" (not-lazy (:meta-errors best)))) (println "Total:" (:total-error best)) - (println "Mean:" (float (/ (:total-error best) - (count (:errors best))))) + (let [mean (r/generation-data! [:best :mean-error] (float (/ (:total-error best) + (count (:errors best)))))] + + (println "Mean:")) (when (not= normalization :none) (println "Normalized error:" (:normalized-error best))) (case total-error-method @@ -375,11 +381,12 @@ :ifs (println "IFS-error:" (:weighted-error best)) nil) (when print-history (println "History:" (not-lazy (:history best)))) - (println "Genome size:" (count (:genome best))) - (println "Size:" (count-points (:program best))) - (printf "Percent parens: %.3f\n" - (double (/ (count-parens (:program best)) - (count-points (:program best))))) ;Number of (open) parens / points + (println "Genome size:" (r/generation-data! [:best :genome-size] (count (:genome best)))) + (println "Size:" (r/generation-data! [:best :program-size] (count-points (:program best)))) + (printf "Percent parens: %.3f\n" + (r/generation-data! [:best :percent-parens] + (double (/ (count-parens (:program best)) + (count-points (:program best)))))) ;Number of (open) parens / points (println "--- Population Statistics ---") (when print-cosmos-data (println "Cosmos Data:" (let [quants (config/quantiles (count population))] @@ -387,9 +394,11 @@ (map #(:total-error (nth (sort-by :total-error population) %)) quants))))) (println "Average total errors in population:" - (*' 1.0 (mean (map :total-error sorted)))) + (r/generation-data! [:population-report :mean-total-error] + (*' 1.0 (mean (map :total-error sorted))))) (println "Median total errors in population:" - (median (map :total-error sorted))) + (r/generation-data! [:population-report :median-total-error] + (median (map :total-error sorted)))) (when print-errors (println "Error averages by case:" (apply map (fn [& args] (*' 1.0 (mean args))) (map :errors population)))) @@ -404,46 +413,73 @@ (apply map (fn [& args] (apply min args)) (map :meta-errors population)))) (println "Average genome size in population (length):" - (*' 1.0 (mean (map count (map :genome sorted))))) + (r/generation-data! [:population-report :mean-genome-size] + (*' 1.0 (mean (map count (map :genome sorted)))))) (println "Average program size in population (points):" - (*' 1.0 (mean (map count-points (map :program sorted))))) + (r/generation-data! [:population-report :mean-program-size] + (*' 1.0 (mean (map count-points (map :program sorted)))))) (printf "Average percent parens in population: %.3f\n" - (mean (map #(double (/ (count-parens (:program %)) (count-points (:program %)))) sorted))) + (r/generation-data! [:population-report :mean-program-percent-params] + (mean (map #(double (/ (count-parens (:program %)) (count-points (:program %)))) sorted)))) (let [ages (map :age population)] (println "Minimum age in population:" - (* 1.0 (apply min ages))) + (r/generation-data! [:population-report :min-age] + (* 1.0 (apply min ages)))) (println "Maximum age in population:" - (* 1.0 (apply max ages))) + (r/generation-data! [:population-report :max-age] + (* 1.0 (apply max ages)))) (println "Average age in population:" - (* 1.0 (mean ages))) + (r/generation-data! [:population-report :mean-age] + (* 1.0 (mean ages)))) (println "Median age in population:" - (* 1.0 (median ages)))) + (r/generation-data! [:population-report :median-age] + (* 1.0 (median ages))))) (let [grain-sizes (map :grain-size population)] (println "Minimum grain-size in population:" - (* 1.0 (apply min grain-sizes))) + (r/generation-data! [:population-report :min-grain-size] + (* 1.0 (apply min grain-sizes)))) (println "Maximum grain-size in population:" - (* 1.0 (apply max grain-sizes))) + (r/generation-data! [:population-report :max-grain-size] + (* 1.0 (apply max grain-sizes)))) (println "Average grain-size in population:" - (* 1.0 (mean grain-sizes))) + (r/generation-data! [:population-report :mean-grain-size] + (* 1.0 (mean grain-sizes)))) (println "Median grain-size in population:" - (* 1.0 (median grain-sizes)))) + (r/generation-data! [:population-report :median-grain-size] + (* 1.0 (median grain-sizes))))) (println "--- Population Diversity Statistics ---") (let [genome-frequency-map (frequencies (map :genome population))] - (println "Min copy number of one Plush genome:" (apply min (vals genome-frequency-map))) - (println "Median copy number of one Plush genome:" (median (vals genome-frequency-map))) - (println "Max copy number of one Plush genome:" (apply max (vals genome-frequency-map))) + (println "Min copy number of one Plush genome:" + (r/generation-data! [:population-report :min-genome-frequency] + (apply min (vals genome-frequency-map)))) + (println "Median copy number of one Plush genome:" + (r/generation-data! [:population-report :median-genome-frequency] + (median (vals genome-frequency-map)))) + (println "Max copy number of one Plush genome:" + (r/generation-data! [:population-report :max-genome-frequency] + (apply max (vals genome-frequency-map)))) (println "Genome diversity (% unique Plush genomes):\t" - (float (/ (count genome-frequency-map) (count population))))) + (r/generation-data! [:population-report :percent-genomes-unique] + (float (/ (count genome-frequency-map) (count population)))))) (let [frequency-map (frequencies (map :program population))] - (println "Min copy number of one Push program:" (apply min (vals frequency-map))) - (println "Median copy number of one Push program:" (median (vals frequency-map))) - (println "Max copy number of one Push program:" (apply max (vals frequency-map))) + (println "Min copy number of one Push program:" + (r/generation-data! [:population-report :min-program-frequency] + (apply min (vals frequency-map)))) + (println "Median copy number of one Push program:" + (r/generation-data! [:population-report :median-program-frequency] + (median (vals frequency-map)))) + (println "Max copy number of one Push program:" + (r/generation-data! [:population-report :max-program-frequency] + (apply max (vals frequency-map)))) (println "Syntactic diversity (% unique Push programs):\t" - (float (/ (count frequency-map) (count population))))) + (r/generation-data! [:population-report :percent-programs-unique] + (float (/ (count frequency-map) (count population)))))) (println "Total error diversity:\t\t\t\t" - (float (/ (count (frequencies (map :total-error population))) (count population)))) + (r/generation-data! [:population-report :percent-total-error-unique] + (float (/ (count (frequencies (map :total-error population))) (count population))))) (println "Error (vector) diversity:\t\t\t" - (float (/ (count (frequencies (map :errors population))) (count population)))) + (r/generation-data! [:population-report :percent-errors-unique] + (float (/ (count (frequencies (map :errors population))) (count population))))) (when @global-print-behavioral-diversity (swap! population-behaviors #(take-last population-size %)) ; Only use behaviors during evaluation, not those during simplification @@ -468,7 +504,8 @@ (reset! selection-counts {})) (when autoconstructive (println "Number of random replacements for non-diversifying individuals:" - (count (filter :is-random-replacement population)))) + (r/generation-data! [:population-report :number-random-replacements] + (count (filter :is-random-replacement population))))) (println "--- Run Statistics ---") (println "Number of program evaluations used so far:" @evaluations-count) (println "Number of point (instruction) evaluations so far:" point-evaluations-before-report) @@ -514,7 +551,8 @@ "Prints the initial report of a PushGP run." [{:keys [problem-specific-initial-report] :as push-argmap}] (problem-specific-initial-report push-argmap) - (println "Registered instructions:" @registered-instructions) + (println "Registered instructions:" + (r/config-data! [:registered-instructions] @registered-instructions)) (println "Starting PushGP run.") (printf "Clojush version = ") (try @@ -524,7 +562,7 @@ version-number (.substring version-str 1 (count version-str))] (if (empty? version-number) (throw Exception) - (printf (str version-number "\n")))) + (printf (str (r/config-data! [:version-number] version-number)) "\n"))) (flush) (catch Exception e (printf "version number unavailable\n") @@ -538,6 +576,7 @@ ;; been committed already. ;; - GitHub link will only work if commit has been pushed ;; to GitHub. + (r/config-data! [:git-hash] git-hash) (printf (str "Hash of last Git commit = " git-hash "\n")) (printf (str "GitHub link = https://github.com/lspector/Clojush/commit/" git-hash