-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
3,279 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
year,country,receipts | ||
1970,Sweden,46.9 | ||
1970,Netherlands,44 | ||
1970,Norway,43.5 | ||
1970,Britain,40.7 | ||
1970,France,39 | ||
1970,Germany,37.5 | ||
1970,Belgium,35.2 | ||
1970,Canada,35.2 | ||
1970,Finland,34.9 | ||
1970,Italy,30.4 | ||
1970,United States,30.3 | ||
1970,Greece,26.8 | ||
1970,Switzerland,26.5 | ||
1970,Spain,22.5 | ||
1970,Japan,20.7 | ||
1979,Sweden,57.4 | ||
1979,Netherlands,55.8 | ||
1979,Norway,52.2 | ||
1979,Britain,39 | ||
1979,France,43.4 | ||
1979,Germany,42.9 | ||
1979,Belgium,43.2 | ||
1979,Canada,35.8 | ||
1979,Finland,38.2 | ||
1979,Italy,35.7 | ||
1979,United States,32.5 | ||
1979,Greece,30.6 | ||
1979,Switzerland,33.2 | ||
1979,Spain,27.1 | ||
1979,Japan,26.6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
market,segment,value | ||
"Auburn, AL",Almond lovers,3840 | ||
"Auburn, AL",Berry buyers,1920 | ||
"Auburn, AL",Carrots-n-more,960 | ||
"Auburn, AL",Delicious-n-new,400 | ||
"Birmingham, AL",Almond lovers,1600 | ||
"Birmingham, AL",Berry buyers,1440 | ||
"Birmingham, AL",Carrots-n-more,960 | ||
"Birmingham, AL",Delicious-n-new,400 | ||
"Gainesville, FL",Almond lovers,640 | ||
"Gainesville, FL",Berry buyers,960 | ||
"Gainesville, FL",Carrots-n-more,640 | ||
"Gainesville, FL",Delicious-n-new,400 | ||
"Durham, NC",Almond lovers,320 | ||
"Durham, NC",Berry buyers,480 | ||
"Durham, NC",Carrots-n-more,640 | ||
"Durham, NC",Delicious-n-new,400 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# Convert Arrow files to parquet | ||
|
||
A simple app that converts your arrow files to the parquet format, using DuckDB-wasm under the hood. | ||
|
||
```js | ||
const file = view(Inputs.file()); | ||
``` | ||
|
||
```js | ||
const table = file?.name.replace(".arrow", ""); | ||
``` | ||
```js | ||
const db = file && DuckDBClient.of({[table]: file}); | ||
``` | ||
```js | ||
display(file | ||
? html`<button onclick=${async function() { | ||
this.disabled = true; | ||
download(await toParquet(db, {table})); | ||
this.disabled = false; | ||
}}>Download ${table}.parquet` | ||
: html`<button disabled>…` | ||
); | ||
``` | ||
```js | ||
// Exports a DuckDB table to parquet. | ||
async function toParquet(duckDbClient, {table = "data", name = `${table}.parquet`} = {}) { | ||
const tmp = (Math.random()*1e16).toString(16); | ||
const db = duckDbClient._db; | ||
// https://duckdb.org/docs/sql/statements/copy | ||
console.log("start COPY", {table, name, tmp}); | ||
await duckDbClient.query(`COPY ${duckDbClient.escape(table)} TO '${tmp}' (FORMAT PARQUET, COMPRESSION GZIP)`); | ||
console.log("start copyFileToBuffer"); | ||
const buffer = await db.copyFileToBuffer(tmp); | ||
//db.dropFile(tmp); | ||
|
||
return new File([buffer], name, { | ||
// https://issues.apache.org/jira/browse/PARQUET-1889 | ||
type: "application/vnd.apache.parquet" | ||
}); | ||
} | ||
|
||
// Triggers a download. Needs to be invoked via a user input. | ||
function download(file) { | ||
const a = document.createElement("a"); | ||
a.download = file.name; | ||
a.href = URL.createObjectURL(file); | ||
a.click(); | ||
URL.revokeObjectURL(a.href); | ||
} | ||
``` | ||
<div style="height: 40vh"></div> | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# DuckDB spatial | ||
|
||
```js echo | ||
display( | ||
Plot.plot({ | ||
projection: "identity", | ||
width: 975, | ||
height: 610, | ||
marks: [ | ||
Plot.geo(counties, { | ||
stroke: "var(--theme-background-alt)", | ||
strokeWidth: 0.25, | ||
fill: (d) => d.properties.area | ||
}) | ||
] | ||
}) | ||
); | ||
``` | ||
|
||
<p class=warning>This page only works if we upgrade DuckDBClient to import [email protected] or later, coupled with apache-arrow@14 or later (like I did here manually). See issues <a href=https://github.com/duckdb/duckdb-wasm/issues/1561>duckdb-wasm#1561</a>; <a href=https://github.com/observablehq/framework/issues/750>framework#750</a> and <a href=https://github.com/observablehq/framework/issues/733>framework#733</a>.</p> | ||
|
||
--- | ||
|
||
Start by creating an empty DuckDB database with its [spatial extension](https://duckdb.org/docs/extensions/spatial.html), and load the TopoJSON [US Atlas](https://github.com/topojson/us-atlas): | ||
|
||
```js echo | ||
const db = await DuckDBClient.of(); | ||
await db.sql`INSTALL spatial`; | ||
await db.sql`LOAD spatial`; | ||
await db.sql`CREATE TABLE us AS ( | ||
SELECT * FROM ST_Read('https://cdn.jsdelivr.net/npm/us-atlas/us/10m.json') | ||
)`; | ||
``` | ||
|
||
Now we can work on this file as a database: | ||
|
||
```js echo | ||
const features = await db.sql`SELECT * FROM us`; | ||
``` | ||
|
||
${Inputs.table(features)} | ||
|
||
The **geom** field is a ST_GEOMETRY, the internal format that DuckDB spatial uses to represent geometries. We can convert it to GeoJSON: | ||
|
||
```js echo | ||
const counties = Array.from( | ||
await db.sql`SELECT ST_AsGeoJSON(geom) AS county, ST_Area(geom) as area FROM us`, | ||
({county, area}) => Object.assign(JSON.parse(county), {properties: {area}}) | ||
); | ||
``` | ||
|
||
This is just the beginning. | ||
|
||
(Note: this exploration is all happening client-side.) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
# DuckDB | ||
|
||
DuckDB is “an in-process SQL OLAP Database Management System. [DuckDB-Wasm](https://github.com/duckdb/duckdb-wasm) brings DuckDB to every browser thanks to WebAssembly.” DuckDB-Wasm is available by default as `duckdb` in Markdown, but you can explicitly import it as: | ||
|
||
```js echo | ||
import * as duckdb from "npm:@duckdb/duckdb-wasm"; | ||
``` | ||
|
||
For convenience, we provide a [`DatabaseClient`](https://observablehq.com/@observablehq/database-client-specification) implementation on top of DuckDB-Wasm, `DuckDBClient`. This is also available by default in Markdown, but you can explicitly import it like so: | ||
|
||
```js echo | ||
import {DuckDBClient} from "npm:@observablehq/duckdb"; | ||
``` | ||
|
||
To get a DuckDB client, pass zero or more named tables to `DuckDBClient.of`. Each table can be expressed as a [`FileAttachment`](../javascript/files), [Arquero table](./arquero), [Arrow table](./arrow), an array of objects, or a promise to the same. For example, below we load a sample of 250,000 stars from the [Gaia Star Catalog](https://observablehq.com/@cmudig/peeking-into-the-gaia-star-catalog) as a [Apache Parquet](https://parquet.apache.org/) file: | ||
|
||
```js echo | ||
const db = DuckDBClient.of({gaia: FileAttachment("../data/gaia-sample.parquet")}); | ||
``` | ||
|
||
Now we can run a query using `db.sql` to bin the stars by [right ascension](https://en.wikipedia.org/wiki/Right_ascension) (`ra`) and [declination](https://en.wikipedia.org/wiki/Declination) (`dec`): | ||
|
||
```js echo | ||
const bins = db.sql`SELECT | ||
floor(ra / 2) * 2 + 1 AS ra, | ||
floor(dec / 2) * 2 + 1 AS dec, | ||
count() AS count | ||
FROM | ||
gaia | ||
GROUP BY | ||
1, | ||
2`; | ||
``` | ||
|
||
These bins can quickly be turned into a heatmap with [Plot’s raster mark](https://observablehq.com/plot/marks/raster), showing the milky way. | ||
|
||
```js echo | ||
display( | ||
Plot.plot({ | ||
aspectRatio: 1, | ||
x: {domain: [0, 360]}, | ||
y: {domain: [-90, 90]}, | ||
marks: [ | ||
Plot.frame({fill: 0}), | ||
Plot.raster(bins, { | ||
x: "ra", | ||
y: "dec", | ||
fill: "count", | ||
width: 360 / 2, | ||
height: 180 / 2, | ||
imageRendering: "pixelated" | ||
}) | ||
] | ||
}) | ||
); | ||
``` | ||
|
||
For externally-hosted data, you can create an empty `DuckDBClient` and load a table from a SQL query, say using [`read_parquet`](https://duckdb.org/docs/guides/import/parquet_import) or [`read_csv`](https://duckdb.org/docs/guides/import/csv_import). | ||
|
||
```js run=false | ||
const db = await DuckDBClient.of(); | ||
|
||
await db.sql`CREATE TABLE addresses | ||
AS SELECT * | ||
FROM read_parquet('https://static.data.gouv.fr/resources/bureaux-de-vote-et-adresses-de-leurs-electeurs/20230626-135723/table-adresses-reu.parquet') | ||
LIMIT 100`; | ||
``` | ||
|
||
As an alternative to `db.sql`, there’s also `db.query`: | ||
|
||
```js echo | ||
display(await db.query("SELECT * FROM gaia LIMIT 10")); | ||
``` | ||
|
||
And `db.queryRow`: | ||
|
||
```js echo | ||
display(await db.queryRow("SELECT count() AS count FROM gaia")); | ||
``` | ||
|
||
See the [DatabaseClient Specification](https://observablehq.com/@observablehq/database-client-specification) for more details. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
--- | ||
index: true | ||
--- | ||
|
||
# Mosaic Cross-Filter Flights 10M | ||
|
||
An example using [Mosaic vgplot](https://uwdata.github.io/mosaic/vgplot/) to interactively cross-filter 10 million flight records. | ||
You may need to wait a few seconds for the dataset to load. | ||
|
||
<div style="display: flex; flex-wrap: wrap"> | ||
${makePlot("delay")} | ||
${makePlot("time")} | ||
${makePlot("distance")} | ||
</div> | ||
|
||
_Try selecting delayed flights. How much more likely are they to leave later in the day?_ | ||
|
||
```js | ||
// load flights data from external parquet file | ||
const datasource = new URL(await FileAttachment("../data/flights-10m.parquet").url(), document.location).href; | ||
await vg.coordinator().exec(`CREATE TABLE IF NOT EXISTS flights10m AS | ||
SELECT | ||
GREATEST(-60, LEAST(ARR_DELAY, 180))::DOUBLE AS delay, | ||
DISTANCE AS distance, | ||
DEP_TIME AS time | ||
FROM '${datasource}'`); | ||
|
||
// create a selection with crossfilter resolution | ||
const brush = vg.Selection.crossfilter(); | ||
|
||
// helper method to generate a binned plot filtered by brush | ||
// a plot contains a rectY mark for a histogram, as well as | ||
// an intervalX interactor to populate the brush selection | ||
const makePlot = (column) => | ||
vg.plot( | ||
vg.rectY( | ||
vg.from("flights10m", {filterBy: brush}), // data set and filter selection | ||
{x: vg.bin(column), y: vg.count(), fill: "steelblue", inset: 0.5} | ||
), | ||
vg.intervalX({as: brush}), // create an interval selection brush | ||
vg.xDomain(vg.Fixed), // don't change the x-axis domain across updates | ||
vg.marginLeft(75), | ||
vg.width(350), | ||
vg.height(220) | ||
); | ||
|
||
// generate dashboard with three linked histograms | ||
// display(vg.vconcat(makePlot("delay"), makePlot("time"), makePlot("distance"))); | ||
``` | ||
|
||
```js | ||
// import vgplot and configure Mosaic to use DuckDB-WASM | ||
const vg = await import("npm:@uwdata/vgplot"); | ||
{ | ||
const wasm = await vg.wasmConnector(); | ||
await vg.coordinator().databaseConnector(wasm); | ||
} | ||
``` |