From 83c416addca3f347b33b55636f2ac2f66399efed Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Fri, 31 Mar 2017 15:07:10 +0100 Subject: [PATCH] Rebuild vignettes --- vignettes/checkpoint.R | 62 ++++++ vignettes/checkpoint.html | 207 ++++++++++++++++++++ vignettes/managing-checkpoint-archives.R | 71 +++++++ vignettes/managing-checkpoint-archives.html | 177 +++++++++++++++++ vignettes/using-checkpoint-with-knitr.R | 60 ++++++ vignettes/using-checkpoint-with-knitr.html | 182 +++++++++++++++++ 6 files changed, 759 insertions(+) create mode 100644 vignettes/checkpoint.R create mode 100644 vignettes/checkpoint.html create mode 100644 vignettes/managing-checkpoint-archives.R create mode 100644 vignettes/managing-checkpoint-archives.html create mode 100644 vignettes/using-checkpoint-with-knitr.R create mode 100644 vignettes/using-checkpoint-with-knitr.html diff --git a/vignettes/checkpoint.R b/vignettes/checkpoint.R new file mode 100644 index 0000000..d217265 --- /dev/null +++ b/vignettes/checkpoint.R @@ -0,0 +1,62 @@ +## ---- eval=FALSE--------------------------------------------------------- +# library(checkpoint) +# checkpoint("2015-04-26", checkpointLocation = tempdir()) + +## ---- eval=FALSE, warning=FALSE------------------------------------------ +# library(checkpoint) +# checkpoint("2015-04-26", checkpointLocation = tempdir()) +# +# library(MASS) +# hist(islands) +# truehist(islands) + +## ----setup, include=FALSE------------------------------------------------ + +## Create temporary project and set working directory + +example_project <- tempdir() + +dir.create(example_project, recursive = TRUE, showWarnings = FALSE) +oldRepos <- getOption("repos") +oldLibPaths <- .libPaths() + + +## Write dummy code file to project + +example_code <- ' +library(checkpoint) +checkpoint("2015-04-26", checkpointLocation = tempdir()) + +library(MASS) +hist(islands) +truehist(islands) +' + +cat(example_code, file = file.path(example_project, "checkpoint_example_code.R")) + + +## ----checkpoint, warning=FALSE------------------------------------------- +## Create a checkpoint by specifying a snapshot date + +library(checkpoint) +dir.create(file.path(tempdir(), ".checkpoint"), recursive = TRUE) +checkpoint("2015-04-26", project = example_project, checkpointLocation = tempdir()) + +## ----inspect-1----------------------------------------------------------- +getOption("repos") + +## ----inspect-2----------------------------------------------------------- +normalizePath(.libPaths(), winslash = "/") + +## ----inspect-3----------------------------------------------------------- +installed.packages()[, "Package"] + +## ----cleanup, include=FALSE---------------------------------------------- +## cleanup + +unlink(example_project, recursive = TRUE) +unlink(file.path(tempdir(), "checkpoint_example_code.R")) +unlink(file.path(tempdir(), ".checkpoint"), recursive = TRUE) +options(repos = oldRepos) +unCheckpoint(oldLibPaths) + diff --git a/vignettes/checkpoint.html b/vignettes/checkpoint.html new file mode 100644 index 0000000..cfdd6e0 --- /dev/null +++ b/vignettes/checkpoint.html @@ -0,0 +1,207 @@ + + + + + + + + + + + + + + + + +Using checkpoint for reproducible research + + + + + + + + + + + + + + + + + +

Using checkpoint for reproducible research

+

Andrie de Vries

+

2017-03-31

+ + + +
+

The Reproducible R Toolkit (RRT)

+

The Reproducible R Toolkit provides tools to ensure the results of R code are repeatable over time, by anyone. Most R scripts rely on packages, but new versions of packages are released daily. To ensure that results from R are reproducible, it’s important to run R scripts using exactly the same package version in use when the script was written.

+

The Reproducible R Toolkit provides an R function checkpoint, which ensures that all of the necessary R packages are installed with the correct version. This makes it easy to reproduce your results at a later date or on another system, and makes it easier to share your code with the confidence that others will get the same results you did.

+

The Reproducible R Toolkit also works in conjunction with the “checkpoint-server”, which makes a daily copy of all CRAN packages, to guarantee that every package version is available to all R developers thereby ensuring reproducibility.

+
+

Components of RRT

+

RRT is a collection of R packages and the checkpoint server that together make your work with R packages more reproducible over time by anyone.

+
+

The checkpoint server

+

To achieve reproducibility, daily snapshots of CRAN are stored on our checkpoint server. At midnight UTC each day, we refresh our mirror of CRAN is refreshed. When the rsync process is complete, the checkpoint server takes and stores a snapshot of the CRAN mirror as it was at that very moment. These daily snapshots can then be accessed on the MRAN website or using the checkpoint package, which installs and consistently use these packages just as they existed at midnight UTC on a specified snapshot date. Daily snapshots are available as far back as 2014-09-17. For more information, visit the checkpoint server GitHub site.

+
+checkpoint server +

checkpoint server

+
+
+
+

The checkpoint package

+

The goal of the checkpoint package is to solve the problem of package reproducibility in R. Since packages get updated on CRAN all the time, it can be difficult to recreate an environment where all your packages are consistent with some earlier state. To solve this issue, checkpoint allows you to install packages locally as they existed on a specific date from the corresponding snapshot (stored on the checkpoint server) and it configures your R session to use only these packages. Together, the checkpoint package and the checkpoint server act as a CRAN time machine so that anyone using checkpoint() can ensure the reproducibility of their scripts or projects at any time.

+
+checkpoint package +

checkpoint package

+
+
+
+
+
+

Using checkpoint

+

Using checkpoint is simple:

+ +
+

Using the checkpoint function

+

When you create a checkpoint, the checkpoint() function performs the following:

+
    +
  • Creates a snapshot folder to install packages. This library folder is located at ~/.checkpoint
  • +
  • Scans your project folder for all packages used. Specifically, it searches for all instances of library() and require() in your code.
  • +
  • Installs these packages from the MRAN snapshot into your snapshot folder using install.packages()
  • +
  • Sets options for your CRAN mirror to point to a MRAN snapshot, i.e. modify options(repos)
  • +
+

This means the remainder of your script will run with the packages from a specific date.

+
+
+

Sharing your scripts for reproducibility

+

Sharing your script to be reproducible is as easy as:

+
    +
  • Load the checkpoint package using library(checkpoint)
  • +
  • Ensure you specify checkpoint() with your checkpoint date, e.g. checkpoint("2014-09-17")
  • +
+

Then send this script to your collaborators. When they run this script on their machine, checkpoint will perform the same steps of installing the necessary packages, creating the checkpoint snapshot folder and producing the same results.

+
+
+

Resetting the checkpoint

+

To reset the checkpoint, simply restart your R session.

+
+
+

Worked example

+

To create a checkpoint project, you do:

+
    +
  1. Create a new folder and change your working directory to this folder. If you use an IDE like RStudio, this is identical to creating a new RStudio project.
  2. +
  3. Add your R script files to this folder.
  4. +
  5. Add a checkpoint to the top of the script:

    +
    library(checkpoint)
    +checkpoint("2015-04-26", checkpointLocation = tempdir())
  6. +
  7. Run the script.

  8. +
+
+

Create a checkpoint project

+

For example, your script may look like this:

+
library(checkpoint)
+checkpoint("2015-04-26", checkpointLocation = tempdir())
+
+library(MASS)
+hist(islands)
+truehist(islands)
+
+
+

Run the checkpoint code

+

Next you want to run the script. Here is what checkpoint does:

+
## Create a checkpoint by specifying a snapshot date
+
+library(checkpoint)
+dir.create(file.path(tempdir(), ".checkpoint"), recursive = TRUE)
+checkpoint("2015-04-26", project = example_project, checkpointLocation = tempdir())
+
## Scanning for packages used in this project
+
## - Discovered 2 packages
+
## Installing packages used in this project
+
##  - Installing 'MASS'
+
## MASS
+
## Installing package into 'C:/Users/adevries/AppData/Local/Temp/RtmpcjAflq/.checkpoint/2015-04-26/lib/x86_64-w64-mingw32/3.3.3'
+## (as 'lib' is unspecified)
+
## package 'MASS' successfully unpacked and MD5 sums checked
+## 
+## The downloaded binary packages are in
+##  C:\Users\adevries\AppData\Local\Temp\RtmpcjAflq\downloaded_packages
+
## checkpoint process complete
+
## ---
+
+
+

Inspecting the results

+

Now inspect the results:

+

First, check that your CRAN mirror is set to MRAN snapshot:

+
getOption("repos")
+
## [1] "https://mran.microsoft.com/snapshot/2015-04-26"
+

Next, check that the library path is set to ~/.checkpoint:

+
normalizePath(.libPaths(), winslash = "/")
+
## [1] "C:/Users/adevries/AppData/Local/Temp/RtmpcjAflq/.checkpoint/2015-04-26/lib/x86_64-w64-mingw32/3.3.3"
+## [2] "C:/Users/adevries/AppData/Local/Temp/RtmpcjAflq/.checkpoint/R-3.3.3"
+

Finally, check which packages are installed in checkpoint library:

+
installed.packages()[, "Package"]
+
##       MASS   compiler 
+##     "MASS" "compiler"
+
+
+
+ + + + + + + + diff --git a/vignettes/managing-checkpoint-archives.R b/vignettes/managing-checkpoint-archives.R new file mode 100644 index 0000000..9d38a57 --- /dev/null +++ b/vignettes/managing-checkpoint-archives.R @@ -0,0 +1,71 @@ +## ---- eval=FALSE--------------------------------------------------------- +# library(MASS) +# hist(islands) +# truehist(islands) + +## ----setup-1, include=FALSE---------------------------------------------- +## Write dummy code file to project +example_code <- ' +library(MASS) +library(chron) +' +dir.create(tempdir(), recursive = TRUE) +cat(example_code, file = file.path(tempdir(), "managing_checkpoint_example_code.R")) + +## ----checkpoint, results="hide", message=FALSE, warning=FALSE------------ +## Create a checkpoint by specifying a snapshot date +library(checkpoint) +dir.create(file.path(tempdir(), ".checkpoint"), recursive = TRUE, showWarnings = FALSE) +checkpoint("2015-04-26", project = tempdir(), checkpointLocation = tempdir()) + + +## ----archives-1---------------------------------------------------------- +# List checkpoint archives on disk. +checkpointArchives(tempdir()) + +## ----archives-2---------------------------------------------------------- +checkpointArchives(tempdir(), full.names = TRUE) + +## ----access-------------------------------------------------------------- +# Returns the date the snapshot was last accessed. +getAccessDate(tempdir()) + + +## ----remove-1, eval=FALSE------------------------------------------------ +# # Remove singe checkpoint archive from disk. +# checkpointRemove("2015-04-26") + +## ----remove-2, eval=FALSE------------------------------------------------ +# # Remove range of checkpoint archives from disk. +# checkpointRemove("2015-04-26", allSinceSnapshot = TRUE) +# checkpointRemove("2015-04-26", allUntilSnapshot = = TRUE) +# + +## ----remove-3, eval=FALSE------------------------------------------------ +# # Remove snapshot archives that have not been used recently +# checkpointRemove("2015-04-26", notUsedSince = TRUE) +# + +## ----logfile-1----------------------------------------------------------- +dir(file.path(tempdir(), ".checkpoint")) + +## ----logfile-2----------------------------------------------------------- + +log_file <- file.path(tempdir(), ".checkpoint", "checkpoint_log.csv") +log <- read.csv(log_file) +head(log) + +## ----uncheckpoint-1------------------------------------------------------ +.libPaths() + +## ----uncheckpoint-2------------------------------------------------------ +# Note this is still experimental +unCheckpoint() +.libPaths() + +## ----cleanup, include=FALSE---------------------------------------------- +## cleanup +unlink("manifest.R") +unlink(file.path(tempdir(), "managing_checkpoint_example_code.R")) +unlink(file.path(tempdir(), ".checkpoint"), recursive = TRUE) + diff --git a/vignettes/managing-checkpoint-archives.html b/vignettes/managing-checkpoint-archives.html new file mode 100644 index 0000000..996a7c5 --- /dev/null +++ b/vignettes/managing-checkpoint-archives.html @@ -0,0 +1,177 @@ + + + + + + + + + + + + + + + + +Managing checkpoint snapshot archives + + + + + + + + + + + + + + + + + +

Managing checkpoint snapshot archives

+

Andrie de Vries

+

2017-03-31

+ + + +

The checkpoint() function enables reproducible research by managing your R package versions. These pacakges are downloaded into a local .checkpoint folder.

+

If you use checkpoint() for many projects, these local packages can consume some storage space, so the package also exposes functions to manage your snapshots.

+

In summary:

+ +
+

Setting up an example project:

+

For illustration, set up a script referencing a single package:

+
library(MASS)
+hist(islands)
+truehist(islands)
+

Next, create the checkpoint:

+
## Create a checkpoint by specifying a snapshot date
+library(checkpoint)
+dir.create(file.path(tempdir(), ".checkpoint"), recursive = TRUE, showWarnings = FALSE)
+checkpoint("2015-04-26", project = tempdir(), checkpointLocation = tempdir())
+
+
+

Working with checkpoint archive snapshots

+

You can query the available snapshots on disk using the checkpointArchives() function. This returns a vector of snapshot folders.

+
# List checkpoint archives on disk.
+checkpointArchives(tempdir())
+
## [1] "2015-04-26"
+

You can get the full paths by including the argument full.names=TRUE:

+
checkpointArchives(tempdir(), full.names = TRUE)
+
## [1] "C:/Users/adevries/AppData/Local/Temp/RtmpcjAflq/.checkpoint/2015-04-26"
+
+
+

Working with access dates

+

Every time you use checkpoint() the function places a small marker in the snapshot archive with the access date. In this way you can track when was the last time you actually used the snapshot archive.

+
# Returns the date the snapshot was last accessed.
+getAccessDate(tempdir())
+
## C:/Users/adevries/AppData/Local/Temp/RtmpcjAflq/.checkpoint/2015-04-26 
+##                                                           "2017-03-31"
+
+
+

Removing a snapshot from local disk

+

Since the date of last access is tracked, you can use this to manage your checkpoint archives.

+

The function checkpointRemove() will delete archives from disk. You can use this function in multiple ways. For example, specify a specific archive to remove:

+
# Remove singe checkpoint archive from disk.
+checkpointRemove("2015-04-26")
+

You can also remove a range of snapshot archives older (or more recent) than a snapshot date

+
# Remove range of checkpoint archives from disk.
+checkpointRemove("2015-04-26", allSinceSnapshot = TRUE)
+checkpointRemove("2015-04-26", allUntilSnapshot =  = TRUE)
+

Finally, you can remove all snapshot archives that have not been accessed since a given date:

+
# Remove snapshot archives that have not been used recently
+checkpointRemove("2015-04-26", notUsedSince = TRUE)
+
+
+

Reading the checkpoint log file

+

One of the side effects of checkpoint() is to create a log file that contains information about packages that get downloaded, as well as the download size.

+

This file is stored in the checkpoint root folder, and is a csv file with column names, so you can read this with your favourite R function or other tools.

+
dir(file.path(tempdir(), ".checkpoint"))
+
## [1] "2015-04-26"         "R-3.3.3"            "checkpoint_log.csv"
+

Inspect the log file:

+
log_file <- file.path(tempdir(), ".checkpoint", "checkpoint_log.csv")
+log <- read.csv(log_file)
+head(log)
+
##             timestamp snapshotDate   pkg   bytes
+## 1 2017-03-31 15:05:51   2015-04-26  MASS 1084392
+## 2 2017-03-31 15:05:53   2015-04-26 chron  107881
+
+
+

Resetting the checkpoint

+

In older versions of checkpoint() the only way to reset the effect of checkpoint() was to restart your R session.

+

In v0.3.20 and above, you can use the function unCheckpoint(). This will reset you .libPaths to the user folder.

+
.libPaths()
+
## [1] "C:/Users/adevries/AppData/Local/Temp/RtmpcjAflq/.checkpoint/2015-04-26/lib/x86_64-w64-mingw32/3.3.3"
+## [2] "C:/Users/adevries/AppData/Local/Temp/RtmpcjAflq/.checkpoint/R-3.3.3"
+

Now use unCheckpoint() to reset your library paths

+
# Note this is still experimental
+unCheckpoint()
+.libPaths()
+
## [1] "C:/Users/adevries/Documents/R/win-library/3.3"
+
+ + + + + + + + diff --git a/vignettes/using-checkpoint-with-knitr.R b/vignettes/using-checkpoint-with-knitr.R new file mode 100644 index 0000000..ab8a7f6 --- /dev/null +++ b/vignettes/using-checkpoint-with-knitr.R @@ -0,0 +1,60 @@ +## ---- eval = FALSE------------------------------------------------------- +# # demo script +# library(MASS) +# hist(islands) + +## ---- echo=FALSE, comment=""--------------------------------------------- +cat("```{r, include=FALSE}", ' +# write a manifest to local folder +cat(" +library(MASS) +", +file = "manifest.R") +', "```", sep = "") + +## ---- echo=FALSE, comment=""--------------------------------------------- +cat("```{r, include=FALSE}", ' +# Create .checkpoint folder (in tempdir for this example) +td <- tempdir() +dir.create(file.path(td, ".checkpoint"), recursive = TRUE) + +# Create the checkpoint +library(checkpoint) +checkpoint("2017-03-28", checkpointLocation = td) +', "```", sep = "") + +## ----checkpoint, warning=FALSE------------------------------------------- +# write a manifest to local folder +cat(' +library(MASS) +', +file = "manifest.R") + +# Create .checkpoint folder (in tempdir for this example) +dir.create(file.path(tempdir(), ".checkpoint"), recursive = TRUE) + +# Create the checkpoint +library(checkpoint) +checkpoint("2017-03-28", checkpointLocation = tempdir()) + + +## ---- eval=FALSE--------------------------------------------------------- +# .libPaths() +# ## [1] ".../Temp/RtmpIVB6bI/.checkpoint/2017-03-28/lib/x86_64-w64-mingw32/3.3.2" +# ## [2] ".../Temp/RtmpIVB6bI/.checkpoint/R-3.3.2" + +## ------------------------------------------------------------------------ +installed.packages()[, "Package"] + +## ---- warning=FALSE------------------------------------------------------ +library(MASS) +hist(islands) + +## ---- include=FALSE------------------------------------------------------ +# clean up + +detach("package:MASS", unload = TRUE) +unlink("manifest.R") +unlink(file.path(tempdir(), ".checkpoint"), recursive = TRUE) +unCheckpoint() + diff --git a/vignettes/using-checkpoint-with-knitr.html b/vignettes/using-checkpoint-with-knitr.html new file mode 100644 index 0000000..f705187 --- /dev/null +++ b/vignettes/using-checkpoint-with-knitr.html @@ -0,0 +1,182 @@ + + + + + + + + + + + + + + + + +Using checkpoint in a markdown document + + + + + + + + + + + + + + + + + +

Using checkpoint in a markdown document

+

Andrie de Vries

+

2017-03-31

+ + + +
+

Introduction

+

Sometimes you want to use create a report using markdown, and you want to checkpoint the code in this document.

+

However, running this as part of a RMarkdown process is problematic, since the knitting process runs inside a temporary folder that is different from the current working directory.

+

To resolve this, I propose a hacky solution: create a “manifest” file in the same folder that contains all of the library() calls.

+
+
+

Example

+

Imagine you have a small script that you want to put in an Rmarkdown document with a checkpoint.

+
# demo script
+library(MASS)
+hist(islands)
+
+
+

The checkpoint solution

+

The only way I’ve found to get checkpoint() to work inside an RMardown document, is really a bit of a hack. The workaround is to create a manifest of required packages, and write this to an R file in the working directory.

+

For example, imagine your R script uses the MASS package, then create a manifest file like this:

+
```{r, include=FALSE}
+# write a manifest to local folder
+cat("
+library(MASS)
+",
+file = "manifest.R")
+```
+

This is hacky, since it requires you to construct the list of library() calls by hand and put these into the manifest file.

+

(Note that you can use include=FALSE in the code block, so that this code doesn’t show up in your rendered document.)

+

Once this is done, the checkpoint process from here is straight-forward

+
```{r, include=FALSE}
+# Create .checkpoint folder (in tempdir for this example)
+td <- tempdir()
+dir.create(file.path(td, ".checkpoint"), recursive = TRUE)
+
+# Create the checkpoint
+library(checkpoint)
+checkpoint("2017-03-28", checkpointLocation = td)
+```
+
+
+

Check that this works

+

Now you are ready to put these instructions in an actual code block to see what happens.

+
# write a manifest to local folder
+cat('
+library(MASS)
+',
+file = "manifest.R")
+
+# Create .checkpoint folder (in tempdir for this example)
+dir.create(file.path(tempdir(), ".checkpoint"), recursive = TRUE)
+
+# Create the checkpoint
+library(checkpoint)
+checkpoint("2017-03-28", checkpointLocation = tempdir())
+
## Scanning for packages used in this project
+
## No file at path 'C:\Users\adevries\AppData\Local\Temp\RtmpcjAflq\file82042881537.Rmd'.
+
## No file at path 'C:\Users\adevries\AppData\Local\Temp\RtmpcjAflq\file820334d6918.Rmd'.
+
## No file at path 'C:\Users\adevries\AppData\Local\Temp\RtmpcjAflq\file820337a7eae.Rmd'.
+
## - Discovered 2 packages
+
## Unable to parse 3 files:
+
## - checkpoint.Rmd
+
## - managing-checkpoint-archives.Rmd
+
## - using-checkpoint-with-knitr.Rmd
+
## Installing packages used in this project
+
##  - Installing 'MASS'
+
## MASS
+
## Installing package into 'C:/Users/adevries/AppData/Local/Temp/RtmpcjAflq/.checkpoint/2017-03-28/lib/x86_64-w64-mingw32/3.3.3'
+## (as 'lib' is unspecified)
+
## package 'MASS' successfully unpacked and MD5 sums checked
+## 
+## The downloaded binary packages are in
+##  C:\Users\adevries\AppData\Local\Temp\RtmpcjAflq\downloaded_packages
+
## checkpoint process complete
+
## ---
+

If this worked, you should see that the library path now points to tempdir() and that MASS should be one of only a few package installed:

+
.libPaths()
+## [1] ".../Temp/RtmpIVB6bI/.checkpoint/2017-03-28/lib/x86_64-w64-mingw32/3.3.2"
+## [2] ".../Temp/RtmpIVB6bI/.checkpoint/R-3.3.2"
+
installed.packages()[, "Package"]
+
##       MASS   compiler 
+##     "MASS" "compiler"
+
+
+

Your real R code:

+

Now your real R code follows, and it creates the plot, as expected:

+
library(MASS)
+hist(islands)
+

+
+
+

Conclusion

+

This is a bit of a hack, but points in a direction for getting your RMarkdown script to be checkpointed.

+
+ + + + + + + +