Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,39 @@ Plot.image(presidents, {x: "inauguration", y: "favorability", src: "portrait"})

Returns a new image with the given *data* and *options*. If neither the **x** nor **y** nor **frameAnchor** options are specified, *data* is assumed to be an array of pairs [[*x₀*, *y₀*], [*x₁*, *y₁*], [*x₂*, *y₂*], …] such that **x** = [*x₀*, *x₁*, *x₂*, …] and **y** = [*y₀*, *y₁*, *y₂*, …].


### Linear regression

[<img src="./img/linear-regression.png" width="600" alt="a scatterplot of penguin culmens, showing the length and depth of several species, with linear regression models by species and for the whole population, illustrating Simpson’s paradox">](https://observablehq.com/@observablehq/plot-linear-regression)

[Source](./src/marks/linearRegression.js) · [Examples](https://observablehq.com/@observablehq/plot-linear-regression) · Draws [linear regression](https://en.wikipedia.org/wiki/Linear_regression) lines with confidence bands, representing the estimated relation of a dependent variable (typically *y*) on an independent variable (typically *x*). The linear regression line is fit using the [least squares](https://en.wikipedia.org/wiki/Least_squares) approach. See Torben Jansen’s [“Linear regression with confidence bands”](https://observablehq.com/@toja/linear-regression-with-confidence-bands) and [this StatExchange question](https://stats.stackexchange.com/questions/101318/understanding-shape-and-calculation-of-confidence-bands-in-linear-regression) for details on the confidence interval calculation.

The given *options* are passed through to these underlying marks, with the exception of the following options:

* **stroke** - the stroke color of the regression line; defaults to *currentColor*
* **fill** - the fill color of the confidence band; defaults to the line’s *stroke*
* **fillOpacity** - the fill opacity of the confidence band; defaults to 0.1
* **ci** - the confidence interval in [0, 1), or 0 to hide bands; defaults to 0.95
* **precision** - the distance (in pixels) between samples of the confidence band; defaults to 4

Multiple regressions can be defined by specifying the *z*, *fill*, or *stroke* channel.

#### Plot.linearRegressionX(*data*, *options*)

```js
Plot.linearRegressionX(mtcars, {y: "wt", x: "hp"})
```

Returns a linear regression mark where *x* is the dependent variable and *y* is the independent variable.

#### Plot.linearRegressionY(*data*, *options*)

```js
Plot.linearRegressionY(mtcars, {x: "wt", y: "hp"})
```

Returns a linear regression mark where *y* is the dependent variable and *x* is the independent variable.

### Line

[<img src="./img/line.png" width="320" height="198" alt="a line chart">](https://observablehq.com/@observablehq/plot-line)
Expand Down
Binary file added img/linear-regression.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export {Frame, frame} from "./marks/frame.js";
export {Hexgrid, hexgrid} from "./marks/hexgrid.js";
export {Image, image} from "./marks/image.js";
export {Line, line, lineX, lineY} from "./marks/line.js";
export {linearRegressionX, linearRegressionY} from "./marks/linearRegression.js";
export {Link, link} from "./marks/link.js";
export {Rect, rect, rectX, rectY} from "./marks/rect.js";
export {RuleX, RuleY, ruleX, ruleY} from "./marks/rule.js";
Expand Down
147 changes: 147 additions & 0 deletions src/marks/linearRegression.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import {create, extent, range, sum, area as shapeArea, namespaces} from "d3";
import {identity, indexOf, isNone, isNoneish, maybeZ} from "../options.js";
import {Mark} from "../plot.js";
import {qt} from "../stats.js";
import {applyDirectStyles, applyGroupedChannelStyles, applyIndirectStyles, applyTransform, groupZ, offset} from "../style.js";
import {maybeDenseIntervalX, maybeDenseIntervalY} from "../transforms/bin.js";

const defaults = {
ariaLabel: "linear-regression",
fill: "currentColor",
fillOpacity: 0.1,
stroke: "currentColor",
strokeWidth: 1.5,
strokeLinecap: "round",
strokeLinejoin: "round",
strokeMiterlimit: 1
};

class LinearRegression extends Mark {
constructor(data, options = {}) {
const {x, y, z, ci = 0.95, precision = 4} = options;
super(
data,
[
{name: "x", value: x, scale: "x"},
{name: "y", value: y, scale: "y"},
{name: "z", value: maybeZ(options), optional: true}
],
options,
defaults
);
this.z = z;
this.ci = +ci;
this.precision = +precision;
if (!(0 <= this.ci && this.ci < 1)) throw new Error(`invalid ci; not in [0, 1): ${ci}`);
if (!(this.precision > 0)) throw new Error(`invalid precision: ${precision}`);
}
render(I, {x, y}, channels, dimensions) {
const {x: X, y: Y, z: Z} = channels;
const {dx, dy, ci} = this;
return create("svg:g")
.call(applyIndirectStyles, this, dimensions)
.call(applyTransform, x, y, offset + dx, offset + dy)
.call(g => g.selectAll()
.data(Z ? groupZ(I, Z, this.z) : [I])
.enter()
.call(enter => enter.append("path")
.attr("fill", "none")
.call(applyDirectStyles, this)
.call(applyGroupedChannelStyles, this, {...channels, fill: null, fillOpacity: null})
.attr("d", I => this._renderLine(I, X, Y))
.call(ci && !isNone(this.fill) ? path => path.select(pathBefore)
.attr("stroke", "none")
.call(applyDirectStyles, this)
.call(applyGroupedChannelStyles, this, {...channels, stroke: null, strokeOpacity: null, strokeWidth: null})
.attr("d", I => this._renderBand(I, X, Y)) : () => {})))
.node();
}
}

function pathBefore() {
return this.parentNode.insertBefore(this.ownerDocument.createElementNS(namespaces.svg, "path"), this);
}

class LinearRegressionX extends LinearRegression {
constructor(data, options) {
super(data, options);
}
_renderBand(I, X, Y) {
const {ci, precision} = this;
const [y1, y2] = extent(I, i => Y[i]);
const f = linearRegressionF(I, Y, X);
const g = confidenceIntervalF(I, Y, X, (1 - ci) / 2, f);
return shapeArea()
.y(y => y)
.x0(y => g(y, -1))
.x1(y => g(y, +1))
(range(y1, y2 - precision / 2, precision).concat(y2));
}
_renderLine(I, X, Y) {
const [y1, y2] = extent(I, i => Y[i]);
const f = linearRegressionF(I, Y, X);
return `M${f(y1)},${y1}L${f(y2)},${y2}`;
}
}

class LinearRegressionY extends LinearRegression {
constructor(data, options) {
super(data, options);
}
_renderBand(I, X, Y) {
const {ci, precision} = this;
const [x1, x2] = extent(I, i => X[i]);
const f = linearRegressionF(I, X, Y);
const g = confidenceIntervalF(I, X, Y, (1 - ci) / 2, f);
return shapeArea()
.x(x => x)
.y0(x => g(x, -1))
.y1(x => g(x, +1))
(range(x1, x2 - precision / 2, precision).concat(x2));
}
_renderLine(I, X, Y) {
const [x1, x2] = extent(I, i => X[i]);
const f = linearRegressionF(I, X, Y);
return `M${x1},${f(x1)}L${x2},${f(x2)}`;
}
}

export function linearRegressionX(data, {y = indexOf, x = identity, stroke, fill = isNoneish(stroke) ? "currentColor" : stroke, ...options} = {}) {
return new LinearRegressionX(data, maybeDenseIntervalY({...options, x, y, fill, stroke}));
}

export function linearRegressionY(data, {x = indexOf, y = identity, stroke, fill = isNoneish(stroke) ? "currentColor" : stroke, ...options} = {}) {
return new LinearRegressionY(data, maybeDenseIntervalX({...options, x, y, fill, stroke}));
}

function linearRegressionF(I, X, Y) {
let sumX = 0, sumY = 0, sumXY = 0, sumX2 = 0;
for (const i of I) {
const xi = X[i];
const yi = Y[i];
sumX += xi;
sumY += yi;
sumXY += xi * yi;
sumX2 += xi * xi;
}
const n = I.length;
const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX);
const intercept = (sumY - slope * sumX) / n;
return x => slope * x + intercept;
}

function confidenceIntervalF(I, X, Y, p, f) {
const mean = sum(I, i => X[i]) / I.length;
let a = 0, b = 0;
for (const i of I) {
a += (X[i] - mean) ** 2;
b += (Y[i] - f(X[i])) ** 2;
}
const sy = Math.sqrt(b / (I.length - 2));
const t = qt(p, I.length - 2);
return (x, k) => {
const Y = f(x);
const se = sy * Math.sqrt(1 / I.length + (x - mean) ** 2 / a);
return Y + k * t * se;
};
}
143 changes: 143 additions & 0 deletions src/stats.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// https://github.com/jstat/jstat
//
// Copyright (c) 2013 jStat
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

export function ibetainv(p, a, b) {
var EPS = 1e-8;
var a1 = a - 1;
var b1 = b - 1;
var j = 0;
var lna, lnb, pp, t, u, err, x, al, h, w, afac;
if (p <= 0) return 0;
if (p >= 1) return 1;
if (a >= 1 && b >= 1) {
pp = p < 0.5 ? p : 1 - p;
t = Math.sqrt(-2 * Math.log(pp));
x = (2.30753 + t * 0.27061) / (1 + t * (0.99229 + t * 0.04481)) - t;
if (p < 0.5) x = -x;
al = (x * x - 3) / 6;
h = 2 / (1 / (2 * a - 1) + 1 / (2 * b - 1));
w =
(x * Math.sqrt(al + h)) / h -
(1 / (2 * b - 1) - 1 / (2 * a - 1)) * (al + 5 / 6 - 2 / (3 * h));
x = a / (a + b * Math.exp(2 * w));
} else {
lna = Math.log(a / (a + b));
lnb = Math.log(b / (a + b));
t = Math.exp(a * lna) / a;
u = Math.exp(b * lnb) / b;
w = t + u;
if (p < t / w) x = Math.pow(a * w * p, 1 / a);
else x = 1 - Math.pow(b * w * (1 - p), 1 / b);
}
afac = -gammaln(a) - gammaln(b) + gammaln(a + b);
for (; j < 10; j++) {
if (x === 0 || x === 1) return x;
err = ibeta(x, a, b) - p;
t = Math.exp(a1 * Math.log(x) + b1 * Math.log(1 - x) + afac);
u = err / t;
x -= t = u / (1 - 0.5 * Math.min(1, u * (a1 / x - b1 / (1 - x))));
if (x <= 0) x = 0.5 * (x + t);
if (x >= 1) x = 0.5 * (x + t + 1);
if (Math.abs(t) < EPS * x && j > 0) break;
}
return x;
}

export function ibeta(x, a, b) {
// Factors in front of the continued fraction.
var bt =
x === 0 || x === 1
? 0
: Math.exp(
gammaln(a + b) -
gammaln(a) -
gammaln(b) +
a * Math.log(x) +
b * Math.log(1 - x)
);
if (x < 0 || x > 1) return false;
if (x < (a + 1) / (a + b + 2))
// Use continued fraction directly.
return (bt * betacf(x, a, b)) / a;
// else use continued fraction after making the symmetry transformation.
return 1 - (bt * betacf(1 - x, b, a)) / b;
}

export function betacf(x, a, b) {
var fpmin = 1e-30;
var m = 1;
var qab = a + b;
var qap = a + 1;
var qam = a - 1;
var c = 1;
var d = 1 - (qab * x) / qap;
var m2, aa, del, h;

// These q's will be used in factors that occur in the coefficients
if (Math.abs(d) < fpmin) d = fpmin;
d = 1 / d;
h = d;

for (; m <= 100; m++) {
m2 = 2 * m;
aa = (m * (b - m) * x) / ((qam + m2) * (a + m2));
// One step (the even one) of the recurrence
d = 1 + aa * d;
if (Math.abs(d) < fpmin) d = fpmin;
c = 1 + aa / c;
if (Math.abs(c) < fpmin) c = fpmin;
d = 1 / d;
h *= d * c;
aa = (-(a + m) * (qab + m) * x) / ((a + m2) * (qap + m2));
// Next step of the recurrence (the odd one)
d = 1 + aa * d;
if (Math.abs(d) < fpmin) d = fpmin;
c = 1 + aa / c;
if (Math.abs(c) < fpmin) c = fpmin;
d = 1 / d;
del = d * c;
h *= del;
if (Math.abs(del - 1.0) < 3e-7) break;
}

return h;
}

export function gammaln(x) {
var j = 0;
var cof = [
76.18009172947146, -86.5053203294167, 24.01409824083091, -1.231739572450155,
0.1208650973866179e-2, -0.5395239384953e-5
];
var ser = 1.000000000190015;
var xx, y, tmp;
tmp = (y = xx = x) + 5.5;
tmp -= (xx + 0.5) * Math.log(tmp);
for (; j < 6; j++) ser += cof[j] / ++y;
return Math.log((2.506628274631 * ser) / xx) - tmp;
}

export function qt(p, dof) {
var x = ibetainv(2 * Math.min(p, 1 - p), 0.5 * dof, 0.5);
x = Math.sqrt((dof * (1 - x)) / x);
return p > 0.5 ? x : -x;
}
5 changes: 3 additions & 2 deletions src/style.js
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ function groupAesthetics({ariaLabel: AL, title: T, fill: F, fillOpacity: FO, str
return [AL, T, F, FO, S, SO, SW, O, H].filter(c => c !== undefined);
}

function groupZ(I, Z, z) {
export function groupZ(I, Z, z) {
const G = group(I, i => Z[i]);
if (z === undefined && G.size > I.length >> 1) {
warn(`Warning: the implicit z channel has high cardinality. This may occur when the fill or stroke channel is associated with quantitative data rather than ordinal or categorical data. You can suppress this warning by setting the z option explicitly; if this data represents a single series, set z to null.`);
Expand Down Expand Up @@ -247,7 +247,7 @@ export function maybeClip(clip) {
throw new Error(`invalid clip method: ${clip}`);
}

export function applyIndirectStyles(selection, mark, {width, height, marginLeft, marginRight, marginTop, marginBottom}) {
export function applyIndirectStyles(selection, mark, dimensions) {
applyAttr(selection, "aria-label", mark.ariaLabel);
applyAttr(selection, "aria-description", mark.ariaDescription);
applyAttr(selection, "aria-hidden", mark.ariaHidden);
Expand All @@ -265,6 +265,7 @@ export function applyIndirectStyles(selection, mark, {width, height, marginLeft,
applyAttr(selection, "paint-order", mark.paintOrder);
applyAttr(selection, "pointer-events", mark.pointerEvents);
if (mark.clip === "frame") {
const {width, height, marginLeft, marginRight, marginTop, marginBottom} = dimensions;
const id = `plot-clip-${++nextClipId}`;
selection
.attr("clip-path", `url(#${id})`)
Expand Down
4 changes: 4 additions & 0 deletions test/data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ https://www.metoffice.gov.uk/hadobs/hadcrut4/data/current/series_format.html
The New York Times
https://www.nytimes.com/2019/12/02/upshot/wealth-poverty-divide-american-cities.html

## mtcars.csv
1974 *Motor Trend* US magazine
https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/mtcars

## moby-dick-chapter-1.txt
*Moby Dick; or The Whale* by Herman Melville
https://www.gutenberg.org/files/2701/2701-h/2701-h.htm
Expand Down
Loading