Dev (#96)

felixleopoldo · melmasri · Mohamad · web-flow · commit 5a43979739af · 2023-10-26T16:11:33.000+02:00
* Adding functionality for nesting algorithms (#84) * Fixing the input graph feature. * ON the input graph feature. * Passing seed number to the tetrad modules. * Adding discrete data examples for boss and grasp. * Fixing boss and grasp json schemas. * Added test for grasp. * Updating docs and conf. * Fixed bug in benchpress module when empty graph files are written. --------- Co-authored-by: Mohamad Elmasri <melmasri@users.noreply.github.com> Co-authored-by: Mohamad <mo@julia>
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.4.0
+2.5.0
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -35,7 +35,6 @@
     available_structure_learning_algorithms
     available_evaluations
 
-
 .. toctree::
     :hidden:
     :maxdepth: 1
@@ -44,14 +43,6 @@
     
     module_add
 
-.. .. toctree::
-..     :hidden:
-..     :maxdepth: 3
-..     :name: File formats
-..     :caption: File formats
-
-..     data_formats
-
 .. toctree::
     :hidden:
     :maxdepth: 3
@@ -71,12 +62,10 @@
 
 ------------------------
 
-
 ##################################
 Benchpress
 ##################################
 
-
 Describing the relationship between the variables in a study domain and modelling
 the data generating mechanism is a fundamental problem in many empirical sciences.
 `Probabilistic graphical models <https://en.wikipedia.org/wiki/Graphical_model>`_ are one common approach to tackle the problem. 
@@ -97,12 +86,12 @@ generated datasets, the workflow also includes a number of standard datasets and
 * The paper :footcite:t:`rios2021benchpress`
 * The `GitHub <https://github.com/felixleopoldo/benchpress>`_ repository
 * This `Medium story <https://medium.com/@felixleopoldorios/structure-learning-using-benchpress-826847db0aa8>`_
-* This tutorial `UAI 2023 Tutorial: Structure Learning Using Benchpress - YouTube <https://www.youtube.com/watch?v=tx3hIH3b9Hg>`_
-* The `Discord <https://discord.com/channels/1007933286724685824/1007933287215411284>`_ chat for any kind of questions
-
+* This video tutorial `UAI 2023 Tutorial: Structure Learning Using Benchpress - YouTube <https://www.youtube.com/watch?v=tx3hIH3b9Hg>`_
+* The `Discord <https://discord.com/channels/1007933286724685824/1007933287215411284>`_ chat for any kind of discussions etc.
 
 .. rubric:: Updates
 
+* 2023-10-13: Benchpress 2.5.0 released. Added the feature to pass the graph estimate of one algorithm as input of another. Added the algorithm module :ref:`athomas_jtsamplers` for MCMC estimating graphs of undirected decomposable graphical models.
 * 2023-09-24: Benchpress 2.4.0 released. Added the Psi-learner algorithm for learning graphs of undirected Gaussian graphical models (:ref:`equsa_psilearner`).
 * 2023-09-19: Benchpress 2.3.0 released. Updated causal-cmd to version 1.10.0. Added the BOSS algorithm (:ref:`tetrad_boss`).
 * 2023-09-08: Benchpress 2.2.0 released. Now supporting the `ARM64 <https://en.wikipedia.org/wiki/AArch64>`_ architecture used e.g. by the recent Apple computers. 
diff --git a/workflow/rules/data/iid/rules.smk b/workflow/rules/data/iid/rules.smk
@@ -8,7 +8,7 @@ rule sample_bin_bn_data:
     output:
         data="{output_dir}/data" \
              "/adjmat=/{adjmat}"\
-             "/parameters=/bin_bn/{bn}"\             
+             "/parameters=/bin_bn/{bn}"\
              "/data=/"+pattern_strings["iid"] + "/" \
              "seed={replicate}.csv"
     wildcard_constraints:
@@ -107,7 +107,7 @@ rule sample_data_fixed_bnfit:
         data="{output_dir}/data/adjmat=/{adjmat}/parameters=/bn.fit_networks/{bn}/data=/"+pattern_strings["iid"]+"/seed={replicate}.csv"
     wildcard_constraints:
         n="[0-9]*",
-        bn=".*\.rds"    
+        bn=".*\.rds"
     shell:
         "Rscript workflow/rules/data/iid/sample_from_bnlearn_bn.R " \
         "--filename {output.data} " \
diff --git a/workflow/rules/evaluation/benchmarks/path_generators.py b/workflow/rules/evaluation/benchmarks/path_generators.py
@@ -33,7 +33,7 @@ def summarise_alg_input_adjmat_est_path(algorithm):
             "adjmat=/{adjmat}/"\
             "parameters=/{bn}/"\
             "data=/{data}/"\
-            "algorithm=/" + pattern_strings[algorithm] + "/" + \
+            "algorithm=/" + pattern_strings[algorithm] + "/"  \
             "seed={replicate}/" \
             "adjmat.csv"
 
@@ -42,7 +42,7 @@ def summarise_alg_input_time_path(algorithm):
                     "adjmat=/{adjmat}/"\
                     "parameters=/{bn}/"\
                     "data=/{data}/" \
-                    "algorithm=/" + pattern_strings[algorithm] + "/" + \
+                    "algorithm=/" + pattern_strings[algorithm] + "/" \
                     "seed={replicate}/" \
                     "time.txt"
 
@@ -52,13 +52,13 @@ def summarise_alg_input_ntests_path(algorithm):
                     "adjmat=/{adjmat}/"\
                     "parameters=/{bn}/"\
                     "data=/{data}/" \
-                    "algorithm=/" + pattern_strings[algorithm] + "/" + \
+                    "algorithm=/" + pattern_strings[algorithm] + "/" \
                     "seed={replicate}/" \
                     "ntests.txt"
 
 def summarise_alg_output_res_path(algorithm):
     return "{output_dir}/result/"\
-            "algorithm=/" + pattern_strings[algorithm] + "/" + \
+            "algorithm=/" + pattern_strings[algorithm] + "/"  \
             "adjmat=/{adjmat}/"\
             "parameters=/{bn}/"\
             "data=/{data}/"\
diff --git a/workflow/rules/evaluation/benchmarks/plot_ROC.R b/workflow/rules/evaluation/benchmarks/plot_ROC.R
@@ -61,7 +61,6 @@ fpr_tpr_pattern <- function(){
               )
             }
           } + {
-
             if (!param_annot) {
               geom_point(
                 data = toplot, alpha = 0.5,
@@ -84,7 +83,7 @@ fpr_tpr_pattern <- function(){
                   col = id_numlev
                 ), shape = 20,
                 size = 1
-              )
+              ) 
             }
           } + {
             if (scatter && show_seed) {
@@ -106,7 +105,7 @@ fpr_tpr_pattern <- function(){
                   replace_na(list("curve_vals" = 0)) %>%
                   mutate(SHDP_pattern_median = 1 - TPR_pattern_median + FPRn_pattern_median) %>%
                   filter(SHDP_pattern_median == min(SHDP_pattern_median)),
-                alpha = 0.8, position = "dodge", alpha = 1, show.legend = FALSE,
+                alpha = 0.8, position = "dodge", show.legend = FALSE,
                 aes(
                   x = FPRn_pattern_median, y = TPR_pattern_median,
                   col = id_numlev, label = id_num
@@ -126,7 +125,7 @@ fpr_tpr_pattern <- function(){
               )
             }
           } +
-          guides(shape = FALSE) +
+          guides(shape = "none") +
           facet_wrap(. ~ adjmat + parameters + data + n_seeds, nrow = 2) +
           {
             if (!is.null(xlim)) {
@@ -929,9 +928,12 @@ if (file.info(snakemake@input[["csv"]])$size == 0) {
   toplot <- read.csv(snakemake@input[["csv"]]) # Median, mean, quantiles, taken over the seeds
   joint_bench <- read.csv(snakemake@input[["raw_bench"]]) # All raw benchmarks in one dataframe
 
-  replacement_list <- list(parameters = "NA") # converts NA to string "NA" in the dataframe
-  toplot[is.na(toplot)] <- "NA"
-  joint_bench[is.na(joint_bench)] <- "NA"
+  # ME: converting NA to sting causes mix types in a column
+  # R in this case converts all to string
+  # made an laternative fix below
+  # replacement_list <- list(parameters = "NA") # converts NA to string "NA" in the dataframe
+  # toplot[is.na(toplot)] <- "NA"
+  # joint_bench[is.na(joint_bench)] <- "NA"
   #toplot <- toplot %>% replace_na(replacement_list)
   #joint_bench <- joint_bench %>% replace_na(replacement_list)
 
@@ -997,7 +999,7 @@ if (file.info(snakemake@input[["csv"]])$size == 0) {
           filter(adjmat == adjmat2) %>%
           filter(parameters == parameters2) %>%
           filter(data == data2)
-
+        
         if (nrow(joint_bench) > 0) {
           fpr_tpr_pattern()
           fpr_tpr_skel()