VLucet · VLucet · Nov 30, 2020 · Oct 15, 2020 · Oct 15, 2020 · Oct 17, 2020
diff --git a/.github/workflows/R_CMD_check.yml b/.github/workflows/R_CMD_check.yml
@@ -18,16 +18,19 @@ jobs:
           - {os: macOS-latest, r: 'devel'}
           #- {os: ubuntu-latest, r: 'release'} # r-lib/actions not supported  # r-lib/actions not supported (setup-r OS)
           #- {os: ubuntu-latest, r: 'devel'} # r-lib/actions not supported  # r-lib/actions not supported (setup-r OS)
-          - {os: ubuntu-16.04, r: '3.6', cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
-          # - {os: ubuntu-16.04, r: 'devel', cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}  # r-lib/actions not supported (setup-r R version)
-          - {os: ubuntu-18.04, r: '3.6', cran: "https://demo.rstudiopm.com/all/__linux__/bionic/latest"}
-          #- {os: ubuntu-18.04, r: 'devel', cran: "https://demo.rstudiopm.com/all/__linux__/bionic/latest"} # r-lib/actions not supported (setup-r R versin)
+          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
+          - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
 
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
       RSPM: ${{ matrix.config.rspm }}
 
     steps:
+      - name: SetVar
+        run: |
+          echo "::set-env name=TMP::$env:USERPROFILE\AppData\Local\Temp"
+          echo "::set-env name=TEMP::$env:USERPROFILE\AppData\Local\Temp"
+
       - uses: actions/checkout@v2
 
       - uses: r-lib/actions/setup-r@master
@@ -37,6 +40,7 @@ jobs:
 
       - uses: r-lib/actions/setup-tinytex@v1
         if: runner.os != 'Windows'
+
 
       #- name: Install GDAL + qpdf / Linux
       #  if: runner.os == 'Linux'

diff --git a/.travis.yml b/.travis.yml
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: rgovcan
 Type: Package
 Title: A package to search for and download data from the Canadian Open Government portal
-Version: 0.1.1
+Version: 0.1.1.9000
 Description: rgovcan allows users to search for existing dataset on the Canadian Open Government portal (<https://open.canada.ca/en>).
 Authors@R: c(
     person("Valentin", "Lucet", email = "[email protected]",
@@ -10,11 +10,11 @@ Authors@R: c(
     comment = c(ORCID = "0000-0001-6619-9874"), role = c("ctb"))
     )
 Imports:
-    cli, ckanr, crayon, dplyr, purrr, stringr
+    cli, ckanr, crayon, dplyr, purrr
+Suggests: 
+    testthat
 License: GPL-3 + file LICENSE
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
 Roxygen: list(markdown = TRUE)
-Suggests: 
-    testthat
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(govcan_dl_resources,character)
+S3method(govcan_dl_resources,ckan_package)
+S3method(govcan_dl_resources,ckan_package_stack)
 S3method(govcan_dl_resources,ckan_resource)
 S3method(govcan_dl_resources,ckan_resource_stack)
 S3method(govcan_get_resources,character)
@@ -13,6 +16,7 @@ export(govcan_get_resources)
 export(govcan_search)
 export(govcan_setup)
 importFrom(cli,style_bold)
+importFrom(cli,style_underline)
 importFrom(crayon,blue)
 importFrom(crayon,green)
 importFrom(crayon,red)

diff --git a/R/ckan_package_stack-class.R b/R/ckan_package_stack-class.R
@@ -20,7 +20,7 @@ print.ckan_package_stack <- function(x, ...) {
   } else {
     cat("  Packages:  \n")
     cli::cat_line()
-    purrr::map(x[1:dim(x)], print_ckan_package_custom)
+    purrr::map(x[seq_len(dim(x))], print_ckan_package_custom)
   }
 }
 

diff --git a/R/ckan_resource_stack-class.R b/R/ckan_resource_stack-class.R
@@ -16,7 +16,7 @@ print.ckan_resource_stack <- function(x, ...) {
   cli::cat_line()
   cat("  Resources:  \n")
   cli::cat_line()
-  purrr::map(x[1:dim(x)], print_ckan_resource_custom)
+  purrr::map(x[seq_len(dim(x))], print_ckan_resource_custom)
 }
 
 # Custom printing function for packages inside a stack

diff --git a/R/govcan_dl_resources.R b/R/govcan_dl_resources.R
@@ -4,121 +4,147 @@
 #' @description Download resources attached to a specific record or (i.e. a CKAN
 #' package) or to a stack of packages.
 #'
-#' @param resources An object of the class ckan_package_stack or ckan_package,
-#' or an id of a specific record or (i.e. a CKAN package), or an object of type
-#' ckan_resource or ckan_resource_stack.
-#' @param file_formats (character vector) A character vector with file formats
-#' to be downloaded, any of :
-#'   * CSV
-#'   * JSON
-#'   * SHP
-#' @param where (string) One of "session" is files have to be charged in the
-#' session or a path to the folder in which to download the files.
-#' @param ... extra argument(s).
+#' @param resources An object of class `ckan_package_stack` or `ckan_package`,
+#' or a specific resource id, or (i.e. a CKAN package), or an object of type
+#' `ckan_resource` or `ckan_resource_stack`.
+#' @param excluded (vector of characters) Files of this format(s) will *not* be downloaded (`NULL` ignores this filter and is the default value).
+#' @param included (vector of characters) Only the files of this format(s) will be downloaded (`NULL` ignores this filter and is the default value).
+#' @param path (character) name indicating where to store the data (default is the current working directory).
+#' @param id_as_filename (logical) Use the resource identifier as file name. This is particularly useful when two different resources have the same filename. 
+#' @param ... Curl arguments passed on to crul::verb-GET (see [ckanr::ckan_fetch()]).
 #'
+#' @details
+#' file names handled internally.
+
 #' @export
-govcan_dl_resources <- function(resources, file_formats, where, ...) {
+govcan_dl_resources <- function(resources,
+                                excluded,
+                                included,
+                                path,
+                                id_as_filename,
+                                ...) {
   UseMethod("govcan_dl_resources")
 }
 
 #' @describeIn govcan_dl_resources Method for ckan_resource objects.
 #' @export
 govcan_dl_resources.ckan_resource <- function(resources,
-                                              file_formats = c("CSV"),
-                                              where = getwd(), ...){
-  all_formats <- unlist(resources$format)
-  wanted_indices <- (all_formats %in% file_formats)
-
-  if (wanted_indices){
-    if (where == "session"){
-
-      message("Warning: the session option is currently not working well due to issues in ckanr")
-      ckanr::ckan_fetch(resources$url, store = "session")
-      write_import_message(resources)
-
-    } else if (where != "session"){
-
-      resource_name <- get_resource_name(resources)
-      path <- create_storing_path(where, resource_name)
-
-      ckanr::ckan_fetch(resources$url, store = "disk", path = path)
-      write_dl_message(resources, path)
-
-    }
+                                              excluded = NULL,
+                                              included = NULL,
+                                              path = ".",
+                                              id_as_filename = FALSE,
+                                              ...) {
+  fmt <- tolower(resources$format)
+  url <- resources$url
+
+  msgInfo(resources$name, paste0("(", fmt, ")"), "",
+    appendLF = FALSE)
+
+  if (grepl("^ftp://", url)) {
+    out <- empty_entry()
+    msgWarning("skipped (ftp not supported yet).")
   } else {
-    msgWarning("No match, no download!")
+    # select type of files to be downloaded
+    tmp <- TRUE
+    if (!is.null(excluded))
+      tmp <- !(fmt %in% tolower(excluded))
+    if (!is.null(included))
+      tmp <- fmt %in% tolower(included)
+
+    if (tmp) {
+      # extract filename from url 
+      fl <- extract_filename(url)
+      if (!is.null(fl)) {
+        if (id_as_filename) {
+          fl <- paste0(resources$id, ".", extract_extension(fl))
+          flp <- normalizePath(file.path(path, fl), mustWork = FALSE)
+        } else {
+          flp <- normalizePath(file.path(path, fl), mustWork = FALSE)
+        }
+        if (file.exists(flp)) {
+          # prevents from downloading the same file several times
+          msgWarning("skipped (already downloaded).")
+          out <- empty_entry("disk", fmt = fmt, path = flp)
+        } else {
+          out <- ckanr::ckan_fetch(url, format = fmt,
+              store = "disk", path = flp)
+          msgSuccess()
+        }
+      } else {
+        out <- empty_entry(fmt = fmt)
+        msgWarning("skipped (not supported).")
+      }
+    } else {
+      out <- empty_entry(fmt = fmt)
+      msgWarning("skipped (format not selected).")
+    }
   }
+
+  out$url <- url
+  out$package_id <- resources$package_id
+  out$id <- resources$id
+  out <- null_to_na(out)
+  out <- as.data.frame(out)
+  class(out) <- c("tbl_df", "tbl", "data.frame")
+  ord <- c("id", "package_id", "url", "path", "fmt")
+  out[, c(ord, setdiff(names(out), ord))]
 }
 
-#' @describeIn govcan_dl_resources Method for ckan_resource_stack objects.
+#' @describeIn govcan_dl_resources Method for `ckan_resource_stack` objects.
 #' @export
-govcan_dl_resources.ckan_resource_stack <- function(resources,
-                                                    file_formats = c("CSV"),
-                                                    where = getwd(), ...){
-
-  all_formats <- unlist(purrr::map(resources, ~.x$format))
-  wanted_indices <- which(all_formats %in% file_formats)
-
-  if (length(wanted_indices)){
-    if (where == "session"){
-
-      message("Warning: the session option is currently not working well due to issues in ckanr")
-
-      for (resource in wanted_indices){
-        resource_tmp <-  resources[[resource]]
-
-        ckanr::ckan_fetch(resource_tmp$url, store = "session")
-        write_import_message(resource_tmp)
-      }
-
-    } else if (where != "session"){
-
-      for (resource in wanted_indices){
-
-        resource_tmp <- resources[[resource]]
+govcan_dl_resources.ckan_resource_stack <- function(resources, ...) {
+  out <- lapply(resources, govcan_dl_resources, ...)
+  do.call(rbind, out)
+}
 
-        resource_name <- get_resource_name(resource_tmp)
-        path <- create_storing_path(where, resource_name)
 
-        ckanr::ckan_fetch(resource_tmp$url, store = "disk", path = path)
-        write_dl_message(resource_tmp, path)
+#' @describeIn govcan_dl_resources Method for `character` objects.
+#' @export
+govcan_dl_resources.character <- function(resources, ...) {
+  govcan_dl_resources(govcan_get_resources(resources), ...)
+}
 
-      }
-    }
-  }
+#' @describeIn govcan_dl_resources Method for `ckan_package` objects.
+#' @export
+govcan_dl_resources.ckan_package <- function(resources, ...) {
+    govcan_dl_resources(resources$id, ...)
 }
 
+#' @describeIn govcan_dl_resources Method for `ckan_package_stack` objects.
+#' @export
+govcan_dl_resources.ckan_package_stack <- function(resources, ...) {
+    out <- lapply(lapply(resources, `[[`, "id"), govcan_dl_resources, ...)
+    do.call(rbind, out)
+}
 
-# Helpers function for govcan_dl_resources
 
-get_resource_name <- function(resource_tmp){
-  name_extracted <- unlist(stringr::str_extract_all(resource_tmp$name,
-                                                    stringr::boundary("word")))
-  extension <- resource_tmp$format
 
-  if (extension == "SHP"){
-    extension <- "zip"
-  }
+# HELPERS
 
-  resource_name <- paste0(c(name_extracted, ".", extension),collapse = "")
+empty_entry <- function(store = NA_character_, 
+                        fmt = NA_character_, 
+                        data = NULL, 
+                        path = NA_character_) {
+  list(
+    store = store,
+    fmt = fmt,
+    data = data,
+    path = path
+  )
 }
 
-create_storing_path <- function(where, resource_name){
-  if (where == "wd"){
-    path <- paste0(getwd(), "/", resource_name)
-  } else {
-    path <- paste0(where, resource_name)
-  }
-  path
+extract_filename <- function(x, sep = "/") {
+  # extract the last part of the path or url 
+  pat <- paste0(".*", sep, "(.+)$")
+  tmp <- sub(pat, "\\1", x)
+  # check whether it contains file basename + file extension 
+  # the regex below should cover 99% of common file extensions
+  if (grepl('[[:graph:]]+\\.[[:alnum:]\\+-\\!]+$', tmp)) {
+    tmp
+  } else NULL
 }
 
-write_import_message <- function(resource_tmp){
-  cat("Dataset ", resource_tmp$name, " imported successfully to session")
+extract_extension <- function(x) {
+  sub('[[:graph:]]+\\.([[:alnum:]\\+-\\!]+)$', "\\1", x)
 }
 
-write_dl_message <- function(resource_tmp, path){
-  cli::cat_rule()
-  cat("",resource_tmp$format, "file named", resource_tmp$name, "downloaded successfully \n")
-  cat(" path to file is:", path, "\n")
-  cli::cat_rule()
-}
diff --git a/R/govcan_search.R b/R/govcan_search.R
@@ -9,15 +9,15 @@
 #' @param only_results (logical) Whether the function should return only the
 #' results without the query metadata (default is `TRUE`)
 #' @param format_results (logical) Whether the function should return a
-#' formatted output of the results as a tibble or an unformatted version under
+#' formatted output of the results as a `tibble` or an unformatted version under
 #' the form of a list of CKAN packages (default is `FALSE`)
 #' @param ... More arguments to be passed on to [ckanr::package_search()]
 #'
 #' @return If `only_results` is `TRUE` and `format_results` is `FALSE`
 #' (recommended), will return only the results of the search as a
 #' `CKAN_package_stack`. If `only_results` is `FALSE`, will return a list
-#' including also the query metadata. If format_results is `TRUE`, the function
-#' formats the output as a data.frame (not CKAN packages).
+#' including also the query metadata. If `format_results` is `TRUE`, the 
+#' function formats the output as a data.frame (not CKAN packages).
 #'
 #' @export
 govcan_search <- function(keywords,
@@ -31,16 +31,15 @@ govcan_search <- function(keywords,
   msgInfo("Searching the Open Portal for records matching:", kwds)
 
   # Collate all keywords
-  keywords_collated <- paste0(keywords, "+", collapse = "")
-  keywords_collated <- substr(keywords_collated, 1,
-    nchar(keywords_collated) - 1)
+  keywords_collated <- paste(keywords, collapse = "+")
 
   # Perform query
   as <- ifelse(format_results, "table", "list")
   query_results <- ckanr::package_search(q = keywords_collated,
                                          rows = records,
                                          as = as,
                                          ...)
+
   if (format_results) {
     query_results$results <- dplyr::as_tibble(query_results$results)
   }