observablehq
diff --git a/‎README.md‎
Lines changed: 33 additions & 0 deletions b/‎README.md‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎img/linear-regression.png‎
37.1 KB b/‎img/linear-regression.png‎
37.1 KB
diff --git a/‎src/index.js‎
Lines changed: 1 addition & 0 deletions b/‎src/index.js‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/marks/linearRegression.js‎
Lines changed: 147 additions & 0 deletions b/‎src/marks/linearRegression.js‎
Lines changed: 147 additions & 0 deletions
diff --git a/‎src/stats.js‎
Lines changed: 143 additions & 0 deletions b/‎src/stats.js‎
Lines changed: 143 additions & 0 deletions
diff --git a/‎src/style.js‎
Lines changed: 3 additions & 2 deletions b/‎src/style.js‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎test/data/README.md‎
Lines changed: 4 additions & 0 deletions b/‎test/data/README.md‎
Lines changed: 4 additions & 0 deletions
@@ -1132,6 +1132,39 @@ Plot.image(presidents, {x: "inauguration", y: "favorability", src: "portrait"})
 
 Returns a new image with the given *data* and *options*. If neither the **x** nor **y** nor **frameAnchor** options are specified, *data* is assumed to be an array of pairs [[*x₀*, *y₀*], [*x₁*, *y₁*], [*x₂*, *y₂*], …] such that **x** = [*x₀*, *x₁*, *x₂*, …] and **y** = [*y₀*, *y₁*, *y₂*, …].
 
+
+### Linear regression
+
+[<img src="./img/linear-regression.png" width="600" alt="a scatterplot of penguin culmens, showing the length and depth of several species, with linear regression models by species and for the whole population, illustrating Simpson’s paradox">](https://observablehq.com/@observablehq/plot-linear-regression)
+
+[Source](./src/marks/linearRegression.js) · [Examples](https://observablehq.com/@observablehq/plot-linear-regression) · Draws [linear regression](https://en.wikipedia.org/wiki/Linear_regression) lines with confidence bands, representing the estimated relation of a dependent variable (typically *y*) on an independent variable (typically *x*). The linear regression line is fit using the [least squares](https://en.wikipedia.org/wiki/Least_squares) approach. See Torben Jansen’s [“Linear regression with confidence bands”](https://observablehq.com/@toja/linear-regression-with-confidence-bands) and [this StatExchange question](https://stats.stackexchange.com/questions/101318/understanding-shape-and-calculation-of-confidence-bands-in-linear-regression) for details on the confidence interval calculation.
+
+The given *options* are passed through to these underlying marks, with the exception of the following options:
+
+* **stroke** - the stroke color of the regression line; defaults to *currentColor*
+* **fill** - the fill color of the confidence band; defaults to the line’s *stroke*
+* **fillOpacity** - the fill opacity of the confidence band; defaults to 0.1
+* **ci** - the confidence interval in [0, 1), or 0 to hide bands; defaults to 0.95
+* **precision** - the distance (in pixels) between samples of the confidence band; defaults to 4
+
+Multiple regressions can be defined by specifying the *z*, *fill*, or *stroke* channel.
+
+#### Plot.linearRegressionX(*data*, *options*)
+
+```js
+Plot.linearRegressionX(mtcars, {y: "wt", x: "hp"})
+```
+
+Returns a linear regression mark where *x* is the dependent variable and *y* is the independent variable.
+
+#### Plot.linearRegressionY(*data*, *options*)
+
+```js
+Plot.linearRegressionY(mtcars, {x: "wt", y: "hp"})
+```
+
+Returns a linear regression mark where *y* is the dependent variable and *x* is the independent variable.
+
 ### Line
 
 [<img src="./img/line.png" width="320" height="198" alt="a line chart">](https://observablehq.com/@observablehq/plot-line)
 
@@ -10,6 +10,7 @@ export {Frame, frame} from "./marks/frame.js";
 export {Hexgrid, hexgrid} from "./marks/hexgrid.js";
 export {Image, image} from "./marks/image.js";
 export {Line, line, lineX, lineY} from "./marks/line.js";
+export {linearRegressionX, linearRegressionY} from "./marks/linearRegression.js";
 export {Link, link} from "./marks/link.js";
 export {Rect, rect, rectX, rectY} from "./marks/rect.js";
 export {RuleX, RuleY, ruleX, ruleY} from "./marks/rule.js";
 
@@ -0,0 +1,147 @@
+import {create, extent, range, sum, area as shapeArea, namespaces} from "d3";
+import {identity, indexOf, isNone, isNoneish, maybeZ} from "../options.js";
+import {Mark} from "../plot.js";
+import {qt} from "../stats.js";
+import {applyDirectStyles, applyGroupedChannelStyles, applyIndirectStyles, applyTransform, groupZ, offset} from "../style.js";
+import {maybeDenseIntervalX, maybeDenseIntervalY} from "../transforms/bin.js";
+
+const defaults = {
+  ariaLabel: "linear-regression",
+  fill: "currentColor",
+  fillOpacity: 0.1,
+  stroke: "currentColor",
+  strokeWidth: 1.5,
+  strokeLinecap: "round",
+  strokeLinejoin: "round",
+  strokeMiterlimit: 1
+};
+
+class LinearRegression extends Mark {
+  constructor(data, options = {}) {
+    const {x, y, z, ci = 0.95, precision = 4} = options;
+    super(
+      data,
+      [
+        {name: "x", value: x, scale: "x"},
+        {name: "y", value: y, scale: "y"},
+        {name: "z", value: maybeZ(options), optional: true}
+      ],
+      options,
+      defaults
+    );
+    this.z = z;
+    this.ci = +ci;
+    this.precision = +precision;
+    if (!(0 <= this.ci && this.ci < 1)) throw new Error(`invalid ci; not in [0, 1): ${ci}`);
+    if (!(this.precision > 0)) throw new Error(`invalid precision: ${precision}`);
+  }
+  render(I, {x, y}, channels, dimensions) {
+    const {x: X, y: Y, z: Z} = channels;
+    const {dx, dy, ci} = this;
+    return create("svg:g")
+        .call(applyIndirectStyles, this, dimensions)
+        .call(applyTransform, x, y, offset + dx, offset + dy)
+        .call(g => g.selectAll()
+          .data(Z ? groupZ(I, Z, this.z) : [I])
+          .enter()
+          .call(enter => enter.append("path")
+            .attr("fill", "none")
+            .call(applyDirectStyles, this)
+            .call(applyGroupedChannelStyles, this, {...channels, fill: null, fillOpacity: null})
+            .attr("d", I => this._renderLine(I, X, Y))
+          .call(ci && !isNone(this.fill) ? path => path.select(pathBefore)
+            .attr("stroke", "none")
+            .call(applyDirectStyles, this)
+            .call(applyGroupedChannelStyles, this, {...channels, stroke: null, strokeOpacity: null, strokeWidth: null})
+            .attr("d", I => this._renderBand(I, X, Y)) : () => {})))
+      .node();
+  }
+}
+
+function pathBefore() {
+  return this.parentNode.insertBefore(this.ownerDocument.createElementNS(namespaces.svg, "path"), this);
+}
+
+class LinearRegressionX extends LinearRegression {
+  constructor(data, options) {
+    super(data, options);
+  }
+  _renderBand(I, X, Y) {
+    const {ci, precision} = this;
+    const [y1, y2] = extent(I, i => Y[i]);
+    const f = linearRegressionF(I, Y, X);
+    const g = confidenceIntervalF(I, Y, X, (1 - ci) / 2, f);
+    return shapeArea()
+        .y(y => y)
+        .x0(y => g(y, -1))
+        .x1(y => g(y, +1))
+      (range(y1, y2 - precision / 2, precision).concat(y2));
+  }
+  _renderLine(I, X, Y) {
+    const [y1, y2] = extent(I, i => Y[i]);
+    const f = linearRegressionF(I, Y, X);
+    return `M${f(y1)},${y1}L${f(y2)},${y2}`;
+  }
+}
+
+class LinearRegressionY extends LinearRegression {
+  constructor(data, options) {
+    super(data, options);
+  }
+  _renderBand(I, X, Y) {
+    const {ci, precision} = this;
+    const [x1, x2] = extent(I, i => X[i]);
+    const f = linearRegressionF(I, X, Y);
+    const g = confidenceIntervalF(I, X, Y, (1 - ci) / 2, f);
+    return shapeArea()
+        .x(x => x)
+        .y0(x => g(x, -1))
+        .y1(x => g(x, +1))
+      (range(x1, x2 - precision / 2, precision).concat(x2));
+  }
+  _renderLine(I, X, Y) {
+    const [x1, x2] = extent(I, i => X[i]);
+    const f = linearRegressionF(I, X, Y);
+    return `M${x1},${f(x1)}L${x2},${f(x2)}`;
+  }
+}
+
+export function linearRegressionX(data, {y = indexOf, x = identity, stroke, fill = isNoneish(stroke) ? "currentColor" : stroke, ...options} = {}) {
+  return new LinearRegressionX(data, maybeDenseIntervalY({...options, x, y, fill, stroke}));
+}
+
+export function linearRegressionY(data, {x = indexOf, y = identity, stroke, fill = isNoneish(stroke) ? "currentColor" : stroke, ...options} = {}) {
+  return new LinearRegressionY(data, maybeDenseIntervalX({...options, x, y, fill, stroke}));
+}
+
+function linearRegressionF(I, X, Y) {
+  let sumX = 0, sumY = 0, sumXY = 0, sumX2 = 0;
+  for (const i of I) {
+    const xi = X[i];
+    const yi = Y[i];
+    sumX += xi;
+    sumY += yi;
+    sumXY += xi * yi;
+    sumX2 += xi * xi;
+  }
+  const n = I.length;
+  const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX);
+  const intercept = (sumY - slope * sumX) / n;
+  return x => slope * x + intercept;
+}
+
+function confidenceIntervalF(I, X, Y, p, f) {
+  const mean = sum(I, i => X[i]) / I.length;
+  let a = 0, b = 0;
+  for (const i of I) {
+    a += (X[i] - mean) ** 2;
+    b += (Y[i] - f(X[i])) ** 2;
+  }
+  const sy = Math.sqrt(b / (I.length - 2));
+  const t = qt(p, I.length - 2);
+  return (x, k) => {
+    const Y = f(x);
+    const se = sy * Math.sqrt(1 / I.length + (x - mean) ** 2 / a);
+    return Y + k * t * se;
+  };
+}
@@ -0,0 +1,143 @@
+// https://github.com/jstat/jstat
+//
+// Copyright (c) 2013 jStat
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+export function ibetainv(p, a, b) {
+  var EPS = 1e-8;
+  var a1 = a - 1;
+  var b1 = b - 1;
+  var j = 0;
+  var lna, lnb, pp, t, u, err, x, al, h, w, afac;
+  if (p <= 0) return 0;
+  if (p >= 1) return 1;
+  if (a >= 1 && b >= 1) {
+    pp = p < 0.5 ? p : 1 - p;
+    t = Math.sqrt(-2 * Math.log(pp));
+    x = (2.30753 + t * 0.27061) / (1 + t * (0.99229 + t * 0.04481)) - t;
+    if (p < 0.5) x = -x;
+    al = (x * x - 3) / 6;
+    h = 2 / (1 / (2 * a - 1) + 1 / (2 * b - 1));
+    w =
+      (x * Math.sqrt(al + h)) / h -
+      (1 / (2 * b - 1) - 1 / (2 * a - 1)) * (al + 5 / 6 - 2 / (3 * h));
+    x = a / (a + b * Math.exp(2 * w));
+  } else {
+    lna = Math.log(a / (a + b));
+    lnb = Math.log(b / (a + b));
+    t = Math.exp(a * lna) / a;
+    u = Math.exp(b * lnb) / b;
+    w = t + u;
+    if (p < t / w) x = Math.pow(a * w * p, 1 / a);
+    else x = 1 - Math.pow(b * w * (1 - p), 1 / b);
+  }
+  afac = -gammaln(a) - gammaln(b) + gammaln(a + b);
+  for (; j < 10; j++) {
+    if (x === 0 || x === 1) return x;
+    err = ibeta(x, a, b) - p;
+    t = Math.exp(a1 * Math.log(x) + b1 * Math.log(1 - x) + afac);
+    u = err / t;
+    x -= t = u / (1 - 0.5 * Math.min(1, u * (a1 / x - b1 / (1 - x))));
+    if (x <= 0) x = 0.5 * (x + t);
+    if (x >= 1) x = 0.5 * (x + t + 1);
+    if (Math.abs(t) < EPS * x && j > 0) break;
+  }
+  return x;
+}
+
+export function ibeta(x, a, b) {
+  // Factors in front of the continued fraction.
+  var bt =
+    x === 0 || x === 1
+      ? 0
+      : Math.exp(
+          gammaln(a + b) -
+            gammaln(a) -
+            gammaln(b) +
+            a * Math.log(x) +
+            b * Math.log(1 - x)
+        );
+  if (x < 0 || x > 1) return false;
+  if (x < (a + 1) / (a + b + 2))
+    // Use continued fraction directly.
+    return (bt * betacf(x, a, b)) / a;
+  // else use continued fraction after making the symmetry transformation.
+  return 1 - (bt * betacf(1 - x, b, a)) / b;
+}
+
+export function betacf(x, a, b) {
+  var fpmin = 1e-30;
+  var m = 1;
+  var qab = a + b;
+  var qap = a + 1;
+  var qam = a - 1;
+  var c = 1;
+  var d = 1 - (qab * x) / qap;
+  var m2, aa, del, h;
+
+  // These q's will be used in factors that occur in the coefficients
+  if (Math.abs(d) < fpmin) d = fpmin;
+  d = 1 / d;
+  h = d;
+
+  for (; m <= 100; m++) {
+    m2 = 2 * m;
+    aa = (m * (b - m) * x) / ((qam + m2) * (a + m2));
+    // One step (the even one) of the recurrence
+    d = 1 + aa * d;
+    if (Math.abs(d) < fpmin) d = fpmin;
+    c = 1 + aa / c;
+    if (Math.abs(c) < fpmin) c = fpmin;
+    d = 1 / d;
+    h *= d * c;
+    aa = (-(a + m) * (qab + m) * x) / ((a + m2) * (qap + m2));
+    // Next step of the recurrence (the odd one)
+    d = 1 + aa * d;
+    if (Math.abs(d) < fpmin) d = fpmin;
+    c = 1 + aa / c;
+    if (Math.abs(c) < fpmin) c = fpmin;
+    d = 1 / d;
+    del = d * c;
+    h *= del;
+    if (Math.abs(del - 1.0) < 3e-7) break;
+  }
+
+  return h;
+}
+
+export function gammaln(x) {
+  var j = 0;
+  var cof = [
+    76.18009172947146, -86.5053203294167, 24.01409824083091, -1.231739572450155,
+    0.1208650973866179e-2, -0.5395239384953e-5
+  ];
+  var ser = 1.000000000190015;
+  var xx, y, tmp;
+  tmp = (y = xx = x) + 5.5;
+  tmp -= (xx + 0.5) * Math.log(tmp);
+  for (; j < 6; j++) ser += cof[j] / ++y;
+  return Math.log((2.506628274631 * ser) / xx) - tmp;
+}
+
+export function qt(p, dof) {
+  var x = ibetainv(2 * Math.min(p, 1 - p), 0.5 * dof, 0.5);
+  x = Math.sqrt((dof * (1 - x)) / x);
+  return p > 0.5 ? x : -x;
+}
@@ -184,7 +184,7 @@ function groupAesthetics({ariaLabel: AL, title: T, fill: F, fillOpacity: FO, str
   return [AL, T, F, FO, S, SO, SW, O, H].filter(c => c !== undefined);
 }
 
-function groupZ(I, Z, z) {
+export function groupZ(I, Z, z) {
   const G = group(I, i => Z[i]);
   if (z === undefined && G.size > I.length >> 1) {
     warn(`Warning: the implicit z channel has high cardinality. This may occur when the fill or stroke channel is associated with quantitative data rather than ordinal or categorical data. You can suppress this warning by setting the z option explicitly; if this data represents a single series, set z to null.`);
@@ -247,7 +247,7 @@ export function maybeClip(clip) {
   throw new Error(`invalid clip method: ${clip}`);
 }
 
-export function applyIndirectStyles(selection, mark, {width, height, marginLeft, marginRight, marginTop, marginBottom}) {
+export function applyIndirectStyles(selection, mark, dimensions) {
   applyAttr(selection, "aria-label", mark.ariaLabel);
   applyAttr(selection, "aria-description", mark.ariaDescription);
   applyAttr(selection, "aria-hidden", mark.ariaHidden);
@@ -265,6 +265,7 @@ export function applyIndirectStyles(selection, mark, {width, height, marginLeft,
   applyAttr(selection, "paint-order", mark.paintOrder);
   applyAttr(selection, "pointer-events", mark.pointerEvents);
   if (mark.clip === "frame") {
+    const {width, height, marginLeft, marginRight, marginTop, marginBottom} = dimensions;
     const id = `plot-clip-${++nextClipId}`;
     selection
         .attr("clip-path", `url(#${id})`)
 
@@ -71,6 +71,10 @@ https://www.metoffice.gov.uk/hadobs/hadcrut4/data/current/series_format.html
 The New York Times
 https://www.nytimes.com/2019/12/02/upshot/wealth-poverty-divide-american-cities.html
 
+## mtcars.csv
+1974 *Motor Trend* US magazine
+https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/mtcars
+
 ## moby-dick-chapter-1.txt
 *Moby Dick; or The Whale* by Herman Melville
 https://www.gutenberg.org/files/2701/2701-h/2701-h.htm