Skip to content

Commit

Permalink
Merge first pass of GridFS support
Browse files Browse the repository at this point in the history
This is still experimental.
  • Loading branch information
jeroen authored Mar 22, 2018
1 parent 91ecd63 commit c0268f8
Show file tree
Hide file tree
Showing 16 changed files with 567 additions and 61 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@
^appveyor\.yml$
^tests/testthat/specifications/(?!source).*$
^tests/testthat/specifications/source/(?!bson-).*$
^test.R$

3 changes: 1 addition & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ matrix:
- os: linux
dist: trusty
env: R_CODECOV=true
r_check_args: '--use-valgrind'
r: devel
- os: osx
osx_image: xcode9.2
Expand Down Expand Up @@ -47,4 +46,4 @@ notifications:
on_failure: change

after_success:
- if [[ "${R_CODECOV}" ]]; then R -e 'covr::codecov()'; fi
- if [[ "${R_CODECOV}" ]]; then R -e 'covr::codecov(type = "all")'; fi
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ Roxygen: list(markdown = TRUE)
Suggests:
spelling
Language: en-GB
Encoding: UTF-8
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ S3method(print,miniprint)
S3method(print,mongo)
S3method(print,mongo_collection)
S3method(print,mongo_iter)
export(gridfs)
export(mongo)
export(mongo_options)
export(ssl_options)
Expand Down Expand Up @@ -49,6 +50,14 @@ useDynLib(mongolite,R_mongo_cursor_next_bsonlist)
useDynLib(mongolite,R_mongo_cursor_next_json)
useDynLib(mongolite,R_mongo_cursor_next_page)
useDynLib(mongolite,R_mongo_get_default_database)
useDynLib(mongolite,R_mongo_gridfs_download)
useDynLib(mongolite,R_mongo_gridfs_drop)
useDynLib(mongolite,R_mongo_gridfs_list)
useDynLib(mongolite,R_mongo_gridfs_new)
useDynLib(mongolite,R_mongo_gridfs_read)
useDynLib(mongolite,R_mongo_gridfs_remove)
useDynLib(mongolite,R_mongo_gridfs_upload)
useDynLib(mongolite,R_mongo_gridfs_write)
useDynLib(mongolite,R_mongo_log_level)
useDynLib(mongolite,R_mongo_restore)
useDynLib(mongolite,R_null_ptr)
Expand Down
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
1.6
- Update mongo-c-driver to upstream 1.9.2
- Expose a run() method to execute commands
- Collection and cursor pointers now protect parent from GC
- Make internal functions static

1.5
- Change default value of 'filters' to NULL for MongoDB < 3.4
Expand Down
137 changes: 137 additions & 0 deletions R/gridfs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#' GridFS API
#'
#' Connect to a GridFS database.
#'
#' @inheritParams mongo
#' @export
#' @param prefix string to prefix the collection name
#' @examples # New GridFS
#' fs <- gridfs(url = "mongodb+srv://readwrite:[email protected]/test")
#' input <- R.home('doc/NEWS.pdf')
#' fs$upload(input)
#' fs$download('NEWS.pdf', 'output.pdf')
#' hashes <- tools::md5sum(c(input, 'output.pdf'))
#' unlink('output.pdf')
#' stopifnot(hashes[[1]] == hashes[[1]])
#'
#' # Insert Binary Data
#' fs$write('iris3', serialize(datasets::iris3, NULL))
#' out <- unserialize(fs$read('iris3'))
#' stopifnot(all.equal(out, datasets::iris3))
#'
#' # Show what we have
#' fs$list()
#' fs$drop()
gridfs <- function(db = "test", url = "mongodb://localhost", prefix = "fs", options = ssl_options()){
client <- do.call(mongo_client_new, c(list(uri = url), options))

# Get a database
if(missing(db) || is.null(db)){
url_db <- mongo_get_default_database(client)
if(length(url_db) && nchar(url_db))
db <- url_db
}

fs <- mongo_gridfs_new(client, prefix, db)
orig <- list(
prefix = prefix,
db = db,
url = url,
options = options
)
if(length(options$pem_file) && file.exists(options$pem_file))
attr(orig, "pemdata") <- readLines(options$pem_file)
fs_object(fs, client, orig)
}

fs_object <- function(fs, client, orig){
self <- local({
drop <- function(){
mongo_gridfs_drop(fs)
}
list <- function(filter = '{}', options = '{}'){
mongo_gridfs_list(fs, filter, options)
}
upload <- function(path, name = basename(path)){
mongo_gridfs_upload(fs, name, path)
}
download <- function(name, path = name){
mongo_gridfs_download(fs, name, path)
}
read <- function(name){
mongo_gridfs_read(fs, name)
}
write <- function(name, data){
mongo_gridfs_write(fs, name, data)
}
remove <- function(name){
mongo_gridfs_remove(fs, name)
}
environment()
})
lockEnvironment(self, TRUE)
structure(self, class=c("gridfs", "jeroen", class(self)))
}

#' @useDynLib mongolite R_mongo_gridfs_new
mongo_gridfs_new <- function(client, prefix, db){
.Call(R_mongo_gridfs_new, client, prefix, db)
}

#' @useDynLib mongolite R_mongo_gridfs_drop
mongo_gridfs_drop <- function(fs){
.Call(R_mongo_gridfs_drop, fs)
}

#' @useDynLib mongolite R_mongo_gridfs_list
mongo_gridfs_list <- function(fs, filter, opts){
out <- .Call(R_mongo_gridfs_list, fs, bson_or_json(filter), bson_or_json(opts))
out <- lapply(out, unlist, recursive = TRUE)
names(out) <- c('id', 'name', 'size', 'date')
out$date <- structure(out$date / 1000, class = c("POSIXct", "POSIXt"))
data.frame(out, stringsAsFactors = FALSE)
}

#' @useDynLib mongolite R_mongo_gridfs_upload
mongo_gridfs_upload <- function(fs, name, path){
stopifnot(is.character(name))
path <- normalizePath(path, mustWork = TRUE)
stopifnot(length(name) == length(path))
id <- rep(NA, length(name))
for(i in seq_along(name)){
out <- .Call(R_mongo_gridfs_upload, fs, name[i], path[i])
id[i] = out$id
}
structure(id, names = name)
}

#' @useDynLib mongolite R_mongo_gridfs_download
mongo_gridfs_download <- function(fs, name, path){
stopifnot(is.character(name))
path <- normalizePath(path, mustWork = FALSE)
stopifnot(length(name) == length(path))
out <- rep(NA, length(name))
for(i in seq_along(name)){
out <- .Call(R_mongo_gridfs_download, fs, name[i], path[i])
}
structure(out, names = name)
}

#' @useDynLib mongolite R_mongo_gridfs_write
mongo_gridfs_write <- function(fs, name, data){
stopifnot(is.raw(data))
stopifnot(is.character(name))
.Call(R_mongo_gridfs_write, fs, name, data)
}

#' @useDynLib mongolite R_mongo_gridfs_read
mongo_gridfs_read <- function(fs, name){
.Call(R_mongo_gridfs_read, fs, name)
}

#' @useDynLib mongolite R_mongo_gridfs_remove
mongo_gridfs_remove <- function(fs, name){
vapply(name, function(x){
.Call(R_mongo_gridfs_remove, fs, x)$id
}, character(1))
}
109 changes: 109 additions & 0 deletions gridfs.rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
---
title: "Proposal: GridFS Design in mongolite"
output:
word_document: default
html_document:
df_print: paged
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

GridFS API in mongolite will be a new top level object class, consistent with the current API to instantiate regular mongodb collection objects.

```r
fs <- mongolite::gridfs(db = "test", url = "mongodb://localhost")
```

The initial API will focus on basic read/write/delete operations.

## Error Handling

All methods will automatically translate `mongoc` exceptions into R errors.

## Listing Files

Returns a data frame with files and fixed meta data (size, date, content-type, etc).

```r
list <- fs$list(filter = '{}', options = '{}')
```

__*References*__:

- [mongoc_gridfs_find_with_opts](http://mongoc.org/libmongoc/current/mongoc_gridfs_t.html) for listing
- [mongoc_gridfs_file_t](http://mongoc.org/libmongoc/current/mongoc_gridfs_file_t.html) for reading file properties

## Reading Files

A file can be read either into a buffer, or streamed to a file or connection. The default behavior is to read the entire file and return the data in a raw data vector:

```r
buf <- fs$read(name = "myfile.bin")
```

Alternatively the user can supply an R connection object that we can use to stream data to e.g. a file or network socket.

```r
fs$read(name = "myfile.bin", con = connection)
```

The latter will be a memory efficient way to incrementally read from the GridFS and write out the data. It is similar to the `export()` method for regular mongo collection objects.


__*References*__:

- [mongoc_gridfs_find_one_by_filename](http://mongoc.org/libmongoc/current/mongoc_gridfs_find_one_by_filename.html) to lookup the file
- [mongoc_stream_gridfs_new](http://mongoc.org/libmongoc/current/mongoc_stream_gridfs_new.html) to create a stream reader
- [mongoc_stream_t](http://mongoc.org/libmongoc/current/mongoc_stream_t.html) methods for reading the stream
- [What exactly is a connection in R](https://stackoverflow.com/questions/30445875/what-exactly-is-a-connection-in-r)


## Writing Files

Analogous to reading, a write operation can either write a raw data vector from memory or stream data from a local file or connection object.

```r
fs$write(name = "myfile.bin", data = buffer)
```

When the `data` argument is an R connection object, it will incrementally read from the connection and upload to GridFS.

```r
fs$write(name = "myfile.bin", data = connection)
```


__*References*__:

- [mongoc_gridfs_create_file_from_stream](http://mongoc.org/libmongoc/current/mongoc_gridfs_create_file_from_stream.html) create a new file using a stream
- [mongoc_stream_write](http://mongoc.org/libmongoc/current/mongoc_stream_write.html) write to the stream
- [What exactly is a connection in R](https://stackoverflow.com/questions/30445875/what-exactly-is-a-connection-in-r)

## Removing Files

Removes a single file from the GridFS collection:

```r
fs$remove(name = "myfile.bin")
```

Here the `name` argument can be vectorized in standard R fashion such that multiple files can be removed with a single call.

__*References*__:

- [mongoc_gridfs_remove_by_filename](http://mongoc.org/libmongoc/current/mongoc_gridfs_remove_by_filename.html) to delete the file

## Drop GridFS

Requests that an entire GridFS be dropped, including all files associated with it.

```r
fs$drop()
```

__*References*__:

- [mongoc_gridfs_drop](http://mongoc.org/libmongoc/current/mongoc_gridfs_drop.html)

41 changes: 41 additions & 0 deletions man/gridfs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ LIBMONGOC=mongoc/mongoc-array.o mongoc/mongoc-b64.o mongoc/mongoc-buffer.o \
mongoc/mongoc-cursor-transform.o mongoc/mongoc-cursor.o mongoc/mongoc-database.o \
mongoc/mongoc-gridfs-file-list.o mongoc/mongoc-gridfs-file-page.o \
mongoc/mongoc-gridfs-file.o mongoc/mongoc-gridfs.o mongoc/mongoc-index.o \
mongoc/mongoc-init.o mongoc/mongoc-list.o mongoc/mongoc-log.o \
mongoc/mongoc-stream-gridfs.o mongoc/mongoc-init.o mongoc/mongoc-list.o mongoc/mongoc-log.o \
mongoc/mongoc-rand-openssl.o mongoc/mongoc-read-prefs.o mongoc/mongoc-rpc.o \
mongoc/mongoc-sasl.o mongoc/mongoc-scram.o mongoc/mongoc-socket.o mongoc/mongoc-ssl.o \
mongoc/mongoc-stream-buffered.o mongoc/mongoc-stream-socket.o mongoc/mongoc-stream-tls.o \
Expand All @@ -30,14 +30,14 @@ LIBMONGOC=mongoc/mongoc-array.o mongoc/mongoc-b64.o mongoc/mongoc-buffer.o \
mongoc/mongoc-cluster-sasl.o mongoc/mongoc-cmd.o \
mongoc/mongoc-change-stream.o mongoc/mongoc-client-session.o mongoc/mongoc-cluster-cyrus.o \
mongoc/mongoc-compression.o mongoc/mongoc-cyrus.o mongoc/mongoc-write-command-legacy.o \
mongoc/mongoc-linux-distro-scanner.o
mongoc/mongoc-linux-distro-scanner.o mongoc/mongoc-stream-file.o

PKG_LIBS=-Lmongoc -lstatmongoc -Lbson -lstatbson @libs@ -lz
STATLIB = mongoc/libstatmongoc.a bson/libstatbson.a

# For development only
# PKG_CFLAGS= -Wall -pedantic
all: clean
# all: clean

$(SHLIB): $(STATLIB)

Expand Down
4 changes: 2 additions & 2 deletions src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ LIBMONGOC=mongoc/mongoc-array.o mongoc/mongoc-b64.o mongoc/mongoc-buffer.o \
mongoc/mongoc-cursor-transform.o mongoc/mongoc-cursor.o mongoc/mongoc-database.o \
mongoc/mongoc-gridfs-file-list.o mongoc/mongoc-gridfs-file-page.o \
mongoc/mongoc-gridfs-file.o mongoc/mongoc-gridfs.o mongoc/mongoc-index.o \
mongoc/mongoc-init.o mongoc/mongoc-list.o mongoc/mongoc-log.o \
mongoc/mongoc-stream-gridfs.o mongoc/mongoc-init.o mongoc/mongoc-list.o mongoc/mongoc-log.o \
mongoc/mongoc-rand-openssl.o mongoc/mongoc-read-prefs.o mongoc/mongoc-rpc.o \
mongoc/mongoc-sasl.o mongoc/mongoc-scram.o mongoc/mongoc-socket.o mongoc/mongoc-ssl.o \
mongoc/mongoc-stream-buffered.o mongoc/mongoc-stream-socket.o mongoc/mongoc-stream-tls.o \
Expand All @@ -38,7 +38,7 @@ LIBMONGOC=mongoc/mongoc-array.o mongoc/mongoc-b64.o mongoc/mongoc-buffer.o \
mongoc/mongoc-read-concern.o mongoc/mongoc-openssl.o mongoc/mongoc-apm.o \
mongoc/mongoc-crypto.o mongoc/mongoc-crypto-openssl.o mongoc/mongoc-handshake.o \
mongoc/mongoc-stream-tls-openssl.o mongoc/mongoc-stream-tls-openssl-bio.o \
mongoc/mongoc-cluster-sasl.o mongoc/mongoc-cmd.o \
mongoc/mongoc-cluster-sasl.o mongoc/mongoc-cmd.o mongoc/mongoc-stream-file.o \
mongoc/mongoc-change-stream.o mongoc/mongoc-client-session.o \
mongoc/mongoc-compression.o mongoc/mongoc-write-command-legacy.o \
mongoc/mongoc-sspi.o mongoc/mongoc-cluster-sspi.o mongoc/mongoc-queue.o
Expand Down
Loading

0 comments on commit c0268f8

Please sign in to comment.