Skip to content

Commit

Permalink
add duckdb
Browse files Browse the repository at this point in the history
  • Loading branch information
kongdd committed Dec 15, 2023
1 parent c43003f commit 7f6197b
Show file tree
Hide file tree
Showing 11 changed files with 159 additions and 6 deletions.
1 change: 0 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,4 @@
^INPUT$
^Figures$
^OUTPUT$
^inst$
^tidydb2\.Rproj$
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@ Description: What the package does (one paragraph).
License: MIT + file LICENSE
Imports:
methods,
R6,
magrittr, purrr,
dbplyr, dplyr,
DBI,
duckdb,
RMySQL,
crayon
Suggests:
knitr,
RMariaDB,
usethis,
rmarkdown,
testthat (>= 3.0.0)
Config/testthat/edition: 3
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,22 @@
export(dbRemoveTables_like)
export(db_append)
export(db_info)
export(dbase)
export(edit_db_config)
export(get_dbInfo)
export(import_table_large)
export(open_mariadb)
export(open_mysql)
export(tbl_copy)
import(DBI)
import(R6)
import(crayon)
import(dplyr)
import(magrittr)
importFrom(duckdb,duckdb)
importFrom(duckdb,duckdb_shutdown)
importFrom(methods,new)
importFrom(purrr,`%||%`)
importFrom(utils,str)
importMethodsFrom(DBI,dbSendQuery)
importMethodsFrom(RMySQL,dbSendQuery)
57 changes: 57 additions & 0 deletions R/R6_dbase.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' @import R6
#' @importFrom duckdb duckdb duckdb_shutdown
#' @export
dbase <- R6Class("duckdb_base", list(
db = NULL,
table = NULL,
type = "duckdb",
con = NULL,
tbl = NULL,
initialize = function(db = NULL, table = NULL, type = c("duckdb", "sqlite")) {
dbinfo <- get_dbInfo("duckdb")[[1]] # 默认是第一个
if (is.null(db)) db <- dbinfo$db
self$db <- db

# 从文件后缀能够猜出变量类型
type <- match.arg(type)
self$type <- type

if (type == "duckdb") {
self$con <- dbConnect(duckdb(), dbdir = self$db, read_only = TRUE)
} else if (type == "sqlite") {
self$con <- dbConnect(duckdb(), dbdir = self$db, read_only = TRUE)
}

if (is.null(table)) table <- dbinfo$table[1] %||% DBI::dbListTables(self$con)[1]
self$table <- table
self$tbl <- tbl(self$con, self$table)
},
print = function(...) {
cat(sprintf("db : %s\n", self$db))
cat(sprintf("table: %s\n", self$table))
print(self$tbl)
},
finalize = function() {
message("close datadb ...")
self$close(force = TRUE)
},
close = function(force = FALSE) {
if (!force) {
DBI::dbDisconnect(self$con, shutdown = TRUE)
} else {
duckdb::duckdb_shutdown(duckdb())
}
},
read_data = function(site_id = 50349L, verbose = TRUE) {
suppressWarnings({
t <- system.time({
d <- self$tbl |>
filter(site == site_id) |>
collect()
})
if (verbose) print(t)
d
})
}
## add a write table options
))
5 changes: 5 additions & 0 deletions R/edit_db_config.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#' @export
edit_db_config <- function() {
f = normalizePath("~/.db.yml")
usethis::edit_file(f)
}
4 changes: 3 additions & 1 deletion R/tool_dbase.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
# library(tidydb) # pak::pkg_install(c("rpkgs/tidydb", "rpkgs/tidymet"))

#' @import DBI dplyr crayon
#' @importMethodsFrom DBI dbSendQuery
#' @importMethodsFrom RMySQL dbSendQuery
setMethod("dbSendQuery", c("MySQLConnection", "character"),
# import S4 method from RMySQL
function(conn, statement, ...) {
RMySQL:::checkValid(conn)

Expand Down Expand Up @@ -116,7 +119,6 @@ db_append <- function(con, tbl, values) {
dbWriteTable(con, tbl, values, append = TRUE)
}


#' @export
dbRemoveTables_like <- function(con, pattern="dbplyr", del=TRUE) {
tbls_bad = dbListTables(con) %>% .[grep(pattern, .)]
Expand Down
14 changes: 12 additions & 2 deletions scripts/database/test-db_perm.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,21 @@ con_mariadb = open_mariadb(dbinfo, 1)
# con
```

## 测试读取数据

```{r}
table = tbl(con, "China_Mete2000_hourly_2020_2022")
con <- open_mariadb()
```

```{r}
table = tbl(con, "China_Mete2000_hourly_2020_2022")
system.time({
d <- table |>
filter(site == 50136) |>
collect()
})
```

## 测试基础命令

Expand All @@ -40,7 +51,6 @@ t1 = system.time({
t2 = system.time({
copy_to(con_mariadb, df, "temp02", overwrite=TRUE, temporary = FALSE)
})
```


Expand Down
8 changes: 8 additions & 0 deletions scripts/speed_duckdb.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
library(dplyr)
library(duckdb)
# dbinfo$hourly_2020_2022

db = dbase$new()
d = db$read_data() # first time about 20s, 16s可以读进来所有数据
d <- db$read_data(50246) # 0.6s
# db$close(force=TRUE)
38 changes: 38 additions & 0 deletions scripts/test1_duckdb.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
```{r}
library(duckdb)
library(data.table)
library(dplyr)
db = "z:/DATA/China/ChinaMet_hourly_mete2000/data/China_Mete2000_hourly_full_2020-2022_tidy.duckdb"
f = "z:/DATA/China/ChinaMet_hourly_mete2000/data/China_Mete2000_hourly_full_2020-2022_tidy.csv"
```

# 写入数据

```{r}
# Create a connection to DuckDB
con <- dbConnect(duckdb::duckdb(db))
df = fread(f)
# Import the CSV file into DuckDB
duckdb::dbWriteTable(con, "China_Mete2000_hourly_2020_2022", df)
# Close the connection
dbDisconnect(con)
```

# 读取数据

```{r}
dbDisconnect(con, shutdown = TRUE)
dbExecute(con, "CREATE INDEX idx_site ON 'China_Mete2000_hourly_full_2020-2022' (site)")
dbDisconnect(con)
```

```{r}
```
2 changes: 0 additions & 2 deletions vignettes/database_config.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,3 @@ dbinfo = get_dbInfo() # see which db to read
con_mysql = open_mysql()
con_mariadb = open_mariadb(1, dbinfo)
```


26 changes: 26 additions & 0 deletions vignettes/database_tidydata.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
title: "database_tidydata"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{database_tidydata}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```

## 测试读取和清洗数据

```{r setup}
library(tidydb2)
```

```{r}
```

0 comments on commit 7f6197b

Please sign in to comment.