diff --git a/README.md b/README.md index 3df647095f..345ceb7ae3 100644 --- a/README.md +++ b/README.md @@ -1354,9 +1354,10 @@ The following aggregation methods are supported: * *max-index* - the zero-based index of the maximum value * *mean* - the mean value (average) * *median* - the median value +* *mode* - the value with the most occurrences +* *pXX* - the percentile value, where XX is a number in [00,99] * *deviation* - the standard deviation * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) -* *mode* - the value with the most occurrences * *x* - the middle the bin’s *x*-extent (when binning on *x*) * *x1* - the lower bound of the bin’s *x*-extent (when binning on *x*) * *x2* - the upper bound of the bin’s *x*-extent (when binning on *x*) @@ -1492,6 +1493,7 @@ The following aggregation methods are supported: * *max-index* - the zero-based index of the maximum value * *mean* - the mean value (average) * *median* - the median value +* *pXX* - the percentile value, where XX is a number in [00,99] * *deviation* - the standard deviation * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) * a function - passed the array of values for each group @@ -1579,10 +1581,11 @@ The Plot.normalizeX and Plot.normalizeY transforms normalize series values relat * *first* - the first value, as in an index chart; the default * *last* - the last value +* *min* - the minimum value * *max* - the maximum value * *mean* - the mean value (average) * *median* - the median value -* *min* - the minimum value +* *pXX* - the percentile value, where XX is a number in [00,99] * *sum* - the sum of values * *extent* - the minimum is mapped to zero, and the maximum to one * *deviation* - each value is transformed by subtracting the mean and then dividing by the standard deviation @@ -1601,6 +1604,7 @@ The following window reducers are supported: * *mean* - the mean (average) * *median* - the median * *mode* - the mode (most common occurrence) +* *pXX* - the percentile value, where XX is a number in [00,99] * *sum* - the sum of values * *deviation* - the standard deviation * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) diff --git a/src/options.js b/src/options.js index 142461e3d3..78be2825c4 100644 --- a/src/options.js +++ b/src/options.js @@ -1,5 +1,5 @@ import {parse as isoParse} from "isoformat"; -import {color, descending} from "d3"; +import {color, descending, quantile} from "d3"; import {symbolAsterisk, symbolDiamond2, symbolPlus, symbolSquare2, symbolTriangle2, symbolX as symbolTimes} from "d3"; import {symbolCircle, symbolCross, symbolDiamond, symbolSquare, symbolStar, symbolTriangle, symbolWye} from "d3"; @@ -29,6 +29,13 @@ export const first = x => x ? x[0] : undefined; export const second = x => x ? x[1] : undefined; export const constant = x => () => x; +// Converts a string like “p25” into a function that takes an index I and an +// accessor function f, returning the corresponding percentile value. +export function percentile(reduce) { + const p = +`${reduce}`.slice(1) / 100; + return (I, f) => quantile(I, p, f); +} + // Some channels may allow a string constant to be specified; to differentiate // string constants (e.g., "red") from named fields (e.g., "date"), this // function tests whether the given value is a CSS color string and returns a diff --git a/src/transforms/group.js b/src/transforms/group.js index 8ce6811355..677ad646e0 100644 --- a/src/transforms/group.js +++ b/src/transforms/group.js @@ -1,6 +1,6 @@ import {group as grouper, sort, sum, deviation, min, max, mean, median, mode, variance, InternSet, minIndex, maxIndex, rollup} from "d3"; import {ascendingDefined, firstof} from "../defined.js"; -import {valueof, maybeColorChannel, maybeInput, maybeTuple, maybeLazyChannel, lazyChannel, first, identity, take, labelof, range, second} from "../options.js"; +import {valueof, maybeColorChannel, maybeInput, maybeTuple, maybeLazyChannel, lazyChannel, first, identity, take, labelof, range, second, percentile} from "../options.js"; import {basic} from "./basic.js"; // Group on {z, fill, stroke}. @@ -198,6 +198,7 @@ export function maybeGroup(I, X) { export function maybeReduce(reduce, value) { if (reduce && typeof reduce.reduce === "function") return reduce; if (typeof reduce === "function") return reduceFunction(reduce); + if (/^p\d{2}$/i.test(reduce)) return reduceAccessor(percentile(reduce)); switch (`${reduce}`.toLowerCase()) { case "first": return reduceFirst; case "last": return reduceLast; diff --git a/src/transforms/normalize.js b/src/transforms/normalize.js index ee302c77d5..935f5d804b 100644 --- a/src/transforms/normalize.js +++ b/src/transforms/normalize.js @@ -1,6 +1,6 @@ import {extent, deviation, max, mean, median, min, sum} from "d3"; import {defined} from "../defined.js"; -import {take} from "../options.js"; +import {percentile, take} from "../options.js"; import {mapX, mapY} from "./map.js"; export function normalizeX(basis, options) { @@ -16,6 +16,7 @@ export function normalizeY(basis, options) { export function normalize(basis) { if (basis === undefined) return normalizeFirst; if (typeof basis === "function") return normalizeBasis((I, S) => basis(take(S, I))); + if (/^p\d{2}$/i.test(basis)) return normalizeAccessor(percentile(basis)); switch (`${basis}`.toLowerCase()) { case "deviation": return normalizeDeviation; case "first": return normalizeFirst; @@ -41,6 +42,10 @@ function normalizeBasis(basis) { }; } +function normalizeAccessor(f) { + return normalizeBasis((I, S) => f(I, i => S[i])); +} + const normalizeExtent = { map(I, S, T) { const [s1, s2] = extent(I, i => S[i]), d = s2 - s1; @@ -74,8 +79,8 @@ const normalizeDeviation = { } }; -const normalizeMax = normalizeBasis((I, S) => max(I, i => S[i])); -const normalizeMean = normalizeBasis((I, S) => mean(I, i => S[i])); -const normalizeMedian = normalizeBasis((I, S) => median(I, i => S[i])); -const normalizeMin = normalizeBasis((I, S) => min(I, i => S[i])); -const normalizeSum = normalizeBasis((I, S) => sum(I, i => S[i])); +const normalizeMax = normalizeAccessor(max); +const normalizeMean = normalizeAccessor(mean); +const normalizeMedian = normalizeAccessor(median); +const normalizeMin = normalizeAccessor(min); +const normalizeSum = normalizeAccessor(sum); diff --git a/src/transforms/window.js b/src/transforms/window.js index e386dbb91f..37f52ecdfc 100644 --- a/src/transforms/window.js +++ b/src/transforms/window.js @@ -1,6 +1,7 @@ import {mapX, mapY} from "./map.js"; import {deviation, max, min, median, mode, variance} from "d3"; import {warn} from "../warnings.js"; +import {percentile} from "../options.js"; export function windowX(windowOptions = {}, options) { if (arguments.length === 1) options = windowOptions; @@ -43,6 +44,7 @@ function maybeShift(shift) { function maybeReduce(reduce = "mean") { if (typeof reduce === "string") { + if (/^p\d{2}$/i.test(reduce)) return reduceSubarray(percentile(reduce)); switch (reduce.toLowerCase()) { case "deviation": return reduceSubarray(deviation); case "max": return reduceSubarray(max); diff --git a/test/plots/aapl-monthly.js b/test/plots/aapl-monthly.js index 47f3e633cf..47fb65fd11 100644 --- a/test/plots/aapl-monthly.js +++ b/test/plots/aapl-monthly.js @@ -4,8 +4,6 @@ import * as d3 from "d3"; export default async function() { const data = await d3.csv("data/aapl.csv", d3.autoType); const bin = {x: "Date", y: "Volume", thresholds: 40}; - const q1 = data => d3.quantile(data, 0.25); - const q3 = data => d3.quantile(data, 0.75); return Plot.plot({ y: { transform: d => d / 1e6, @@ -15,8 +13,8 @@ export default async function() { marks: [ Plot.ruleY([0]), Plot.ruleX(data, Plot.binX({y1: "min", y2: "max"}, {...bin, stroke: "#999"})), - Plot.rect(data, Plot.binX({y1: q1, y2: q3}, {...bin, fill: "#bbb"})), - Plot.ruleY(data, Plot.binX({y: "median"}, {...bin, strokeWidth: 2})) + Plot.rect(data, Plot.binX({y1: "p25", y2: "p75"}, {...bin, fill: "#bbb"})), + Plot.ruleY(data, Plot.binX({y: "p50"}, {...bin, strokeWidth: 2})) ] }); } diff --git a/test/plots/morley-boxplot.js b/test/plots/morley-boxplot.js index e581e8fe5c..8aaeb7d58b 100644 --- a/test/plots/morley-boxplot.js +++ b/test/plots/morley-boxplot.js @@ -17,8 +17,8 @@ function boxX(data, { } = {}) { return Plot.marks( Plot.ruleY(data, Plot.groupY({x1: iqr1, x2: iqr2}, {x, y, stroke, ...options})), - Plot.barX(data, Plot.groupY({x1: quartile1, x2: quartile3}, {x, y, fill, ...options})), - Plot.tickX(data, Plot.groupY({x: median}, {x, y, stroke, strokeWidth: 2, ...options})), + Plot.barX(data, Plot.groupY({x1: "p25", x2: "p75"}, {x, y, fill, ...options})), + Plot.tickX(data, Plot.groupY({x: "p50"}, {x, y, stroke, strokeWidth: 2, ...options})), Plot.dot(data, Plot.map({x: outliers}, {x, y, z: y, stroke, ...options})) ); } @@ -39,10 +39,6 @@ function iqr2(values, value) { return Math.min(d3.max(values, value), quartile3(values, value) * 2.5 - quartile1(values, value) * 1.5); } -function median(values, value) { - return d3.median(values, value); -} - function quartile1(values, value) { return d3.quantile(values, 0.25, value); } diff --git a/test/plots/movies-profit-by-genre.js b/test/plots/movies-profit-by-genre.js index 327780804d..142ab20b43 100644 --- a/test/plots/movies-profit-by-genre.js +++ b/test/plots/movies-profit-by-genre.js @@ -15,7 +15,7 @@ export default async function() { }, marks: [ Plot.ruleX([0]), - Plot.barX(movies, Plot.groupY({x1: quartile1, x2: quartile3}, { + Plot.barX(movies, Plot.groupY({x1: "p25", x2: "p75"}, { y: Genre, x: Profit, fillOpacity: 0.2 @@ -35,11 +35,3 @@ export default async function() { ] }); } - -function quartile1(values, value) { - return d3.quantile(values, 0.25, value); -} - -function quartile3(values, value) { - return d3.quantile(values, 0.75, value); -}