Skip to content

Commit

Permalink
solving issue #13
Browse files Browse the repository at this point in the history
  • Loading branch information
ptaconet committed Nov 7, 2024
1 parent dde3bdd commit f1f9a26
Show file tree
Hide file tree
Showing 29 changed files with 405 additions and 493 deletions.
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ export(mf_import_data)
export(mf_list_collections)
export(mf_list_variables)
export(mf_login)
export(modisfast)
export(mf_modisfast)
import(cli)
import(dplyr)
import(httr)
import(parallel)
import(purrr)
import(sf)
importFrom(cli,cli_alert_info)
importFrom(cli,cli_alert_success)
importFrom(curl,has_internet)
importFrom(lubridate,floor_date)
Expand Down
16 changes: 9 additions & 7 deletions R/buildUrls.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' of interest
#'
#' @importFrom lubridate year yday hour minute second floor_date
#' @importFrom cli cli_alert_info
#' @noRd

.buildUrls <- function(collection,
Expand All @@ -13,8 +14,8 @@
single_netcdf = TRUE,
optionalsOpendap = NULL,
credentials = NULL,
verbose = FALSE) {
ideal_date <- date_closest_to_ideal_date <- index_opendap_closest_to_date <- dimensions_url <- hour_end <- date_character <- hour_start <- number_minutes_from_start_day <- year <- day <- product_name <- month <- x <- . <- url_product <- dayofyear <- Var1 <- Var2 <- lines <- fileSizeEstimated <- NULL
verbose = "inform") {
ideal_date <- date_closest_to_ideal_date <- index_opendap_closest_to_date <- dimensions_url <- hour_end <- date_character <- hour_start <- number_minutes_from_start_day <- year <- day <- product_name <- month <- x <- . <- url_product <- dayofyear <- Var1 <- Var2 <- lines <- maxFileSizeEstimated <- NULL

.testIfCollExists(collection)
.testRoi(roi)
Expand Down Expand Up @@ -96,14 +97,13 @@
lubridate::year(max(timeIndices_of_interest$date_closest_to_ideal_date)),
sprintf("%03d", lubridate::yday(max(timeIndices_of_interest$date_closest_to_ideal_date))), ".", modis_tile)

fileSizeEstimated <- ((roiSpatialIndexBound[2] - roiSpatialIndexBound[1]) * (roiSpatialIndexBound[4] - roiSpatialIndexBound[3]) * (timeIndex[2] - timeIndex[1]) * length(variables)) * 0.000000575 # ie. total number of cells / size of a cell in mb
fileSizeEstimated <- round(fileSizeEstimated, 1)
maxFileSizeEstimated <- ((roiSpatialIndexBound[2] - roiSpatialIndexBound[1]) * (roiSpatialIndexBound[4] - roiSpatialIndexBound[3]) * (timeIndex[2] - timeIndex[1]) * length(variables)) * 4 # ie. total number of cells / size of a cell in bites

table_urls <- data.frame(date = min(timeIndices_of_interest$date_closest_to_ideal_date),
name = name,
url = url,
roi_id = roiId,
fileSizeEstimated = fileSizeEstimated,
maxFileSizeEstimated = maxFileSizeEstimated,
stringsAsFactors = FALSE)
} else { # download data in multiple netcdf files (1/each time frame)
table_urls <- timeIndices_of_interest %>%
Expand All @@ -126,7 +126,7 @@
)
} else if (odap_coll_info$provider == "NASA LAADS DAAC") {
# e.g. VNP46A1
if (verbose) {
if (verbose %in% c("inform","debug")) {
cat("Getting the URLs for this collection might take some time...\n")
}
time_range <- as.Date(time_range, origin = "1970-01-01")
Expand Down Expand Up @@ -166,6 +166,7 @@

############## GPM_3IMERGHH.06 and GPM_3IMERGHH.07 ######################
if (collection %in% c("GPM_3IMERGHH.06", "GPM_3IMERGHHL.06", "GPM_3IMERGHHE.06", "GPM_3IMERGHH.07")) {
cli::cli_alert_info("For this collection, please ensure that hours are provided are in GMT\n")
if (collection %in% c("GPM_3IMERGHHL.06")) {
indicatif <- "-L"
} else if (collection %in% c("GPM_3IMERGHHE.06")) {
Expand Down Expand Up @@ -244,7 +245,8 @@
th_table_urls <- urls %>%
dplyr::mutate(url = paste0(url_product, "?", dim[i])) %>%
dplyr::mutate(name = product_name) %>%
dplyr::mutate(roi_id = roi$id[i])
dplyr::mutate(roi_id = roi$id[i]) %>%
dplyr::mutate(maxFileSizeEstimated = (abs(roiSpatialIndexBound$'1'[1] - roiSpatialIndexBound$'1'[2]) * abs(roiSpatialIndexBound$'1'[4] - roiSpatialIndexBound$'1'[3]) * length(variables)) * 4) # ie. total number of cells / size of a cell in bites)
table_urls <- rbind(table_urls, th_table_urls)
}
} else if (odap_source == "CHIRPS") {
Expand Down
33 changes: 20 additions & 13 deletions R/mf_download_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' \describe{
#' \item{fileDl}{Booloean (dataset downloaded or failure)}
#' \item{dlStatus}{Download status : 1 = download ok ; 2 = download error ; 3 = dataset was already existing in destination file }
#' \item{fileSize}{File size on disk}
#' \item{fileSize}{File size on disk (in bites)}
#' }
#'
#' @details
Expand Down Expand Up @@ -72,14 +72,14 @@
#' ### Plot the data
#' terra::plot(modis_ts)
#' }
mf_download_data <- function(df_to_dl, path = tempfile("modisfast_"), parallel = FALSE, num_workers = parallel::detectCores() - 1, credentials = NULL, verbose = TRUE, min_filesize = 5000) {
fileSize <- destfile <- fileDl <- folders <- readme_files <- source <- NULL
mf_download_data <- function(df_to_dl, path = tempfile("modisfast_"), parallel = FALSE, num_workers = parallel::detectCores() - 1, credentials = NULL, verbose = "inform", min_filesize = 5000) {
fileSize <- destfile <- fileDl <- folders <- readme_files <- source <- maxFileSizeEstimated <- NULL

source <- "earthdata"

# tests
if (!inherits(verbose, "logical")) {
stop("verbose argument must be boolean\n")
if (!inherits(verbose, "character")) {
stop("verbose argument must be a character string ('quiet'', 'inform', or 'debug') \n")
}
if (!inherits(parallel, "logical")) {
stop("parallel argument must be boolean\n")
Expand Down Expand Up @@ -127,7 +127,7 @@ mf_download_data <- function(df_to_dl, path = tempfile("modisfast_"), parallel =
data_to_download <- data_dl %>%
dplyr::filter(fileDl == FALSE)

if (verbose) {
if (verbose %in% c("inform","debug")) {
cat(nrow(df_to_dl), " datasets in total : ", nrow(data_already_exist), " already downloaded and ", nrow(data_to_download), " datasets to download\n")
}

Expand Down Expand Up @@ -158,22 +158,29 @@ mf_download_data <- function(df_to_dl, path = tempfile("modisfast_"), parallel =
# GET(u$url, httr::write_disk(output), httr::progress(), config(maxredirs=-1, netrc = TRUE, netrc_file = netrc), set_cookies("LC" = "cookies"))
}

if (verbose) {
cat("Downloading the data...\n")
if (verbose %in% c("inform","debug")) {
maxFileSizeEstimated <- sum(data_dl$maxFileSizeEstimated[which(data_dl$fileDl == FALSE)])
maxFileSizeEstimated <- dplyr::if_else(round(maxFileSizeEstimated/1000000)>1,round(maxFileSizeEstimated/1000000),1)
cat("Downloading the data in",path,"... \nMaximum estimated data size to download is ~",maxFileSizeEstimated,"Mb\n")
# cat("Downloading the data in",path,"...\n")
}
if (parallel) {
cl <- parallel::makeCluster(num_workers)
parallel::clusterMap(cl, dl_func,
url = data_to_download$url, output = data_to_download$destfile, username = username, password = password,
.scheduling = "dynamic"
)
)
parallel::stopCluster(cl)
} else {
for (i in seq_len(nrow(data_to_download))) {
if (verbose) {
if (verbose %in% c("inform","debug")) {
cat("[", i, " over ", nrow(data_to_download), "]\n")
}
dl_func(url = data_to_download$url[i], output = data_to_download$destfile[i], username = username, password = password)
if(verbose %in% c("quiet","inform")){
dl_func(url = data_to_download$url[i], output = data_to_download$destfile[i], username = username, password = password)
} else if (verbose == "debug"){
httr::with_verbose(dl_func(url = data_to_download$url[i], output = data_to_download$destfile[i], username = username, password = password))
}
}
}
}
Expand All @@ -187,15 +194,15 @@ mf_download_data <- function(df_to_dl, path = tempfile("modisfast_"), parallel =
data_downloaded <- dplyr::filter(data_dl, fileSize >= min_filesize)

if (!(identical(data_dl, data_downloaded))) {
if (verbose) {
if (verbose %in% c("inform","debug")) {
cli::cli_alert_warning("Only part of the data has been downloaded. Downloading the remaining datasets one by one...\n")
}
mf_download_data(df_to_dl = df_to_dl, path = path, parallel = FALSE, credentials = credentials) # ,source=source)
} else {
# 1 : download ok
# 2 : download error
# 3 : data already existing in output folder
if (verbose) {
if (verbose %in% c("inform","debug")) {
cli::cli_alert_success("\nData were all properly downloaded under the folder(s) ", paste(as.character(unique(dirname(df_to_dl$destfile))), collapse = " and "))
cli::cli_alert_info("\nTo import the data in R, use the function modisfast::mf_import_data() rather than terra::rast() or stars::read_stars(). More info at help(mf_import_data)\n")
}
Expand Down
2 changes: 1 addition & 1 deletion R/mf_get_opt_param.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
#' opt_param = opt_param_mod11a1
#' )))
#' }
mf_get_opt_param <- function(collection, roi, credentials = NULL, verbose = TRUE) {
mf_get_opt_param <- function(collection, roi, credentials = NULL, verbose = "inform") {
. <- odap_coll_info <- odap_source <- odap_server <- odap_timeDimName <- odap_lonDimName <- odap_latDimName <- odap_crs <- odap_urlExample <- modis_tile <- OpendapURL <- OpenDAPtimeVector <- OpenDAPXVector <- OpenDAPYVector <- roi_bbox <- Opendap_minLat <- Opendap_maxLat <- Opendap_minLon <- Opendap_maxLon <- roiSpatialIndexBound <- minLat <- maxLat <- minLon <- maxLon <- roiSpatialBound <- availableDimensions <- null_elements <- NULL

OpenDAPtimeVector <- modis_tile <- NULL
Expand Down
25 changes: 14 additions & 11 deletions R/mf_get_url.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' @param single_netcdf boolean. optional. Get the URL either as a single file that encompasses the whole time frame (TRUE) or as multiple files (1 for each date) (FALSE). Default to TRUE. Currently enabled only for MODIS and VIIRS collections.
#' @param opt_param list of optional arguments. optional. (see details).
#' @param credentials vector string of length 2 with username and password. optional if the function \link{mf_login} was previously executed.
#' @param verbose boolean. optional. Verbose (default TRUE)
#' @param verbose string. Verbose mode ("quiet", "inform", or "debug"). Default "inform".
#'
#' @return a data.frame with one row for each dataset to download and 5 columns :
#' \describe{
Expand All @@ -21,6 +21,7 @@
#' \item{collection}{Name of the collection}
#' \item{name}{Indicative name for the dataset}
#' \item{url}{https OPeNDAP URL of the dataset}
#' \item{maxFileSizeEstimated}{Maximum estimated data size for the dataset (in bites)}
#' }
#'
#' @details
Expand All @@ -29,7 +30,7 @@
#'
#' Argument \code{variables} : For each collection, variables available can be retrieved with the function \link{mf_list_variables}
#'
#' Argument \code{time_range} : Can be provided either as i) a single date (e.g. \code{as.Date("2017-01-01"))} or ii) a time frame provided as two bounding dates (starting and ending time) ( e.g. \code{as.Date(c("2010-01-01","2010-01-30"))}) or iii) a POSIXlt single time (e.g. \code{as.POSIXlt("2010-01-01 18:00:00")}) or iv) a POSIXlt time range (e.g. \code{as.POSIXlt(c("2010-01-01 18:00:00","2010-01-02 09:00:00"))}) for the half-hourly collection (GPM_3IMERGHH.06). If POSIXlt, times must be in UTC.
#' Argument \code{time_range} : Can be provided either as i) a single date (e.g. \code{as.Date("2017-01-01"))} or ii) a time frame provided as two bounding dates (starting and ending time) ( e.g. \code{as.Date(c("2010-01-01","2010-01-30"))}) or iii) a POSIXlt single time (e.g. \code{as.POSIXlt("2010-01-01 18:00:00")}) or iv) a POSIXlt time range (e.g. \code{as.POSIXlt(c("2010-01-01 18:00:00","2010-01-02 09:00:00"))}) for the half-hourly collection (GPM_3IMERGHH.06). If POSIXlt, hours must be provided in GMT.
#'
#' Argument \code{single_netcdf} : for MODIS and VIIRS products from LP DAAC: download the data as a single file encompassing the whole time frame (TRUE) or as multiple files : one for each date, which is the behavious for the other collections - GPM and SMAP) (FALSE) ?
#'
Expand Down Expand Up @@ -97,8 +98,8 @@ mf_get_url <- function(collection,
single_netcdf = TRUE,
opt_param = NULL,
credentials = NULL,
verbose = TRUE) {
existing_variables <- odap_coll_info <- odap_timeDimName <- odap_lonDimName <- odap_latDimName <- . <- name <- destfile <- roi_id <- NULL
verbose = "inform") {
existing_variables <- odap_coll_info <- odap_timeDimName <- odap_lonDimName <- odap_latDimName <- . <- name <- destfile <- roi_id <- maxFileSizeEstimated <- NULL

## tests :
# collection
Expand All @@ -117,20 +118,19 @@ mf_get_url <- function(collection,
stop("single_netcdf argument must be boolean\n")
}
# verbose
if (!inherits(verbose, "logical")) {
stop("verbose argument must be boolean\n")
if (!inherits(verbose, "character")) {
stop("verbose argument must be a character string ('quiet'', 'inform', or 'debug') \n")
}
# Internet connection
.testInternetConnection()
# credentials
.testLogin(credentials)

if (verbose) {
if (verbose %in% c("inform","debug")) {
cat("Building the URLs...\n")
}

if (is.null(opt_param)) {
# if(verbose){cat("Retrieving opendap arguments for the collection specified...\n")}
opt_param <- mf_get_opt_param(collection, roi, verbose = verbose)
}

Expand Down Expand Up @@ -167,11 +167,14 @@ mf_get_url <- function(collection,
dplyr::mutate(name = paste0(name, ".", output_format)) %>%
dplyr::arrange(roi_id, date) %>%
dplyr::mutate(collection = collection) %>%
dplyr::select(roi_id, date, collection, name, url) %>% # ,fileSizeEstimated) %>%
dplyr::select(roi_id, date, collection, name, url, maxFileSizeEstimated) %>%
dplyr::rename(time_start = date, id_roi = roi_id)

if (verbose) {
cli::cli_alert_success("URL(s) built\n")
maxFileSizeEstimated <- dplyr::if_else(round(sum(table_urls$maxFileSizeEstimated)/1000000)>1,round(sum(table_urls$maxFileSizeEstimated)/1000000),1)

if (verbose %in% c("inform","debug")) {
cli::cli_alert_success("URL(s) built.\n")
cat("Maximum estimated data size is",maxFileSizeEstimated,"Mb\n")
}

return(table_urls)
Expand Down
8 changes: 4 additions & 4 deletions R/mf_import_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#' @param proj_epsg numeric. EPSG of the desired projection for the output raster (default : source projection of the data).
#' @param roi_mask \code{SpatRaster} or \code{SpatVector} or \code{sf}. Area beyond which data will be masked. Typically, the input ROI of \link{mf_get_url} (default : NULL (no mask))
#' @param vrt boolean. Import virtual raster instead of SpatRaster. Useful for very large files. (default : FALSE)
#' @param verbose boolean. optional. Verbose (default TRUE)
#' @param verbose string. Verbose mode ("quiet", "inform", or "debug"). Default "inform".
#' @inheritParams mf_get_url
#' @param ... not used
#'
Expand Down Expand Up @@ -76,7 +76,7 @@ mf_import_data <- function(path,
proj_epsg = NULL,
roi_mask = NULL,
vrt = FALSE,
verbose = TRUE,
verbose = "inform",
...) {
rasts <- NULL

Expand All @@ -92,7 +92,7 @@ mf_import_data <- function(path,
stop("paramater 'output_class' must be SpatRaster.")
}

if (verbose) {
if (verbose %in% c("inform","debug")) {
cat("Importing the dataset as a",output_class,"object...\n")
}

Expand All @@ -102,7 +102,7 @@ mf_import_data <- function(path,
rasts <- .import_gpm(path, output_class, proj_epsg, roi_mask)
}

if (verbose) {
if (verbose %in% c("inform","debug")) {
cli_alert_success("Dataset imported")
}

Expand Down
19 changes: 16 additions & 3 deletions R/mf_list_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#' (df_varinfo <- mf_list_variables("MOD11A1.061"))
#' }
#'
mf_list_variables <- function(collection, credentials = NULL) { # for a given collection, get the available variables and associated information
mf_list_variables <- function(collection, credentials = NULL, verbose = "inform") { # for a given collection, get the available variables and associated information

.testIfCollExists(collection)
.testInternetConnection()
Expand All @@ -37,7 +37,14 @@ mf_list_variables <- function(collection, credentials = NULL) { # for a given co

InfoURL <- paste0(URL, ".info")
vector_response <- httr::GET(InfoURL)
vector_response <- httr::GET(vector_response$url)
f <- function() {
httr::GET(vector_response$url)
}
if(verbose %in% c("quiet","inform")){
vector_response <- f()
} else if (verbose == "debug"){
vector_response <- httr::with_verbose(f())
}
if (vector_response$status_code == 400) {
stop("Bad request\n")
}
Expand All @@ -55,7 +62,13 @@ mf_list_variables <- function(collection, credentials = NULL) { # for a given co

DdsURL <- paste0(URL, ".dds")
vector_response <- httr::GET(DdsURL)
vector_response <- httr::GET(vector_response$url)

if(verbose %in% c("quiet","inform")){
vector_response <- f()
} else if (verbose == "debug"){
vector_response <- httr::with_verbose(f())
}

vector <- httr::content(vector_response, "text", encoding = "UTF-8")
vector <- strsplit(vector, "\n")
vector <- vector[[1]][-length(vector[[1]])]
Expand Down
22 changes: 14 additions & 8 deletions R/mf_login.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#'
#' @inheritParams mf_get_url
#'
#' @import httr
#' @importFrom cli cli_alert_success
#'
#' @return None.
Expand All @@ -20,27 +21,32 @@
#' mf_login(credentials = c(username, password))
#' }
#'
mf_login <- function(credentials, verbose = TRUE) {
mf_login <- function(credentials, verbose = "inform") {
if (!inherits(credentials, "character") || length(credentials) != 2) {
stop("credentials must be a vector character string of length 2 (username and password)\n")
}
.testInternetConnection()
if (verbose) {
if (verbose %in% c("inform","debug")) {
cat("Checking credentials...\n")
}

# if(source=="earthdata"){
# x <- httr::GET(url = "https://urs.earthdata.nasa.gov/oauth/authorize?app_type=401&client_id=W8DRh2DCZP0iOacUCdwB1g&response_type=code&redirect_uri=https%3A%2F%2Fopendap.cr.usgs.gov%2Fopendap%2Fhyrax%2Foauth2&state=aHR0cHM6Ly9vcGVuZGFwLmNyLnVzZ3MuZ292L29wZW5kYXAvaHlyYXgvTU9EMTFBMi4wNjEvaDE3djA3Lm5jbWwuYXNjaWk%2FdGltZQ",httr::authenticate(user=credentials[1], credentials[2]),config = list(maxredirs=-1)) # testing credentials must be improved...
x <- httr::GET(url = "https://opendap.cr.usgs.gov/opendap/hyrax/MOD11A2.061/h17v07.ncml.ascii?time") # testing credentials must be improved...
x <- httr::GET(x$url, httr::authenticate(credentials[1], credentials[2]), config = list(maxredirs = -1))

f <- function() {
httr::GET(x$url, httr::authenticate(credentials[1], credentials[2]), config = list(maxredirs = -1))
}
if(verbose %in% c("quiet","inform")){
x <- f()
} else if (verbose == "debug"){
x <- httr::with_verbose(f())
}
httr::stop_for_status(x, "login to Earthdata. Check out username and password. The service might also be unavailable (error 503).")
httr::warn_for_status(x)
options(earthdata_user = credentials[1])
options(earthdata_pass = credentials[2])
options(earthdata_mf_login = TRUE)
# }
# if(verbose){cat("Successfull login to",source,"\n")}
if (verbose) {

if (verbose %in% c("inform","debug")) {
cli::cli_alert_success("Successfull login to Earthdata\n")
}
}
Loading

0 comments on commit f1f9a26

Please sign in to comment.