diff --git a/CHANGES.md b/CHANGES.md index 5d8013a..351882d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,7 +1,7 @@ ## v0.2 -* Move distributions into a separate library (`from squigglepy.distributions import *` will import all the distribution functions) -* Move `sample` into the base library (`import squigglepy` will import `sample`) +* Change `distributed_log` to `mixture` (to follow Squiggle) and allow it to implement any sub-distribution. +* Changed library to single import. * Remove `weighted_log` as a distribution. diff --git a/README.md b/README.md index 8ef7f5c..cdfc492 100644 --- a/README.md +++ b/README.md @@ -8,63 +8,57 @@ Here's the Squigglepy implementation of [the example from Squiggle Docs](https://www.squiggle-language.com/docs/Overview): ```Python -import squigglepy -from squigglepy.distributions import * -from squigglepy.numbers import * -from squigglepy.utils import * +import squigglepy as sq -populationOfNewYork2022 = to(8.1*million(), 8.4*million()) # This means that you're 90% confident the value is between 8.1 and 8.4 Million. +populationOfNewYork2022 = sq.to(8.1*million(), 8.4*million()) # This means that you're 90% confident the value is between 8.1 and 8.4 Million. def proportionOfPopulationWithPianos(): - percentage = to(.2, 1) - return sample(percentage) * 0.01 # We assume there are almost no people with multiple pianos + percentage = sq.to(.2, 1) + return sq.sample(percentage) * 0.01 # We assume there are almost no people with multiple pianos def pianoTunersPerPiano(): - pianosPerPianoTuner = to(2*thousand(), 50*thousand()) - return 1 / sample(pianosPerPianoTuner) + pianosPerPianoTuner = sq.to(2*thousand(), 50*thousand()) + return 1 / sq.sample(pianosPerPianoTuner) def totalTunersIn2022(): - return (sample(populationOfNewYork2022) * + return (sq.sample(populationOfNewYork2022) * proportionOfPopulationWithPianos() * pianoTunersPerPiano()) -get_percentiles(sample(totalTunersIn2022, n=1000)) +sq.get_percentiles(sq.sample(totalTunersIn2022, n=1000)) ``` And the version from the Squiggle doc that incorporates time: ```Python -import squigglepy -from squigglepy.distributions import * -from squigglepy.numbers import * -from squigglepy.utils import * +import squigglepy as sq +K = sq.thousand(); M = sq.million() -populationOfNewYork2022 = to(8.1*million(), 8.4*million()) +populationOfNewYork2022 = sq.to(8.1*M, 8.4*M) def proportionOfPopulationWithPianos(): - percentage = to(.2, 1) - return sample(percentage) * 0.01 + percentage = sq.to(.2, 1) + return sq.sample(percentage) * 0.01 def proportionOfPopulationWithPianos(): - percentage = to(.2, 1) - return sample(percentage) * 0.01 + percentage = sq.to(.2, 1) + return sq.sample(percentage) * 0.01 def pianoTunersPerPiano(): - pianosPerPianoTuner = to(2*thousand(), 50*thousand()) - return 1 / sample(pianosPerPianoTuner) + pianosPerPianoTuner = sq.to(2*K, 50*K) + return 1 / sq.sample(pianosPerPianoTuner) # Time in years after 2022 def populationAtTime(t): - averageYearlyPercentageChange = to(-0.01, 0.05) # We're expecting NYC to continuously grow with an mean of roughly between -1% and +4% per year - return sample(populationOfNewYork2022) * ((sample(averageYearlyPercentageChange) + 1) ** t) -} + averageYearlyPercentageChange = sq.to(-0.01, 0.05) # We're expecting NYC to continuously grow with an mean of roughly between -1% and +4% per year + return sq.sample(populationOfNewYork2022) * ((sq.sample(averageYearlyPercentageChange) + 1) ** t) def totalTunersAtTime(t): return (populationAtTime(t) * proportionOfPopulationWithPianos() * pianoTunersPerPiano()) -get_percentiles(sample(lambda: totalTunersAtTime(2030-2022), n=1000)) +sq.get_percentiles(sq.sample(lambda: totalTunersAtTime(2030-2022), n=1000)) ``` ## Additional Features @@ -72,9 +66,16 @@ get_percentiles(sample(lambda: totalTunersAtTime(2030-2022), n=1000)) Additional distributions: ```Python -sample(norm(1, 3)) -sample(lognorm(1, 10)) -sample(tdist(1, 10, t=5)) +import squigglepy as sq + +sq.sample(sq.norm(1, 3)) +sq.sample(sq.lognorm(1, 10)) +sq.sample(sq.tdist(1, 10, t=5)) + +sq.sample(sq.mixture([sq.norm(1, 3), + sq.norm(4, 10), + sq.lognorm(1, 10)], + [0.3, 0.3, 0.4])) ``` ## Installation diff --git a/setup.py b/setup.py index e923a51..9554c9d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name='squigglepy', - version='0.1', + version='0.2', author='Peter Hurford', author_email='peter@peterhurford.com', description='Squiggle programming language for intuitive probabilistic estimation features in Python', diff --git a/squigglepy/__init__.py b/squigglepy/__init__.py index ab733f4..9e22b86 100644 --- a/squigglepy/__init__.py +++ b/squigglepy/__init__.py @@ -1 +1,4 @@ -from sample import sample +from .distributions import * +from .numbers import * +from .sample import * +from .utils import * diff --git a/squigglepy/distributions.py b/squigglepy/distributions.py index 816ecf6..51da72a 100644 --- a/squigglepy/distributions.py +++ b/squigglepy/distributions.py @@ -10,11 +10,12 @@ def const(x): def lognorm(x, y, lclip=None, rclip=None): return [x, y, 'log', lclip, rclip] -def distributed_lognorm(logs, weights, lclip=None, rclip=None): - return [logs, weights, 'distributed_log', lclip, rclip] - def tdist(x, y, t, lclip=None, rclip=None): return [x, y, 'tdist', t, lclip, rclip] def log_tdist(x, y, t, lclip=None, rclip=None): return [x, y, 'log-tdist', t, lclip, rclip] + +def mixture(dists, weights, lclip=None, rclip=None): + return [dists, weights, 'mixture', lclip, rclip] + diff --git a/squigglepy/sample.py b/squigglepy/sample.py index a20eac5..a7e07bd 100644 --- a/squigglepy/sample.py +++ b/squigglepy/sample.py @@ -82,29 +82,29 @@ def sample(var, credibility=0.9, n=1): elif var[2] == 'log': out = lognormal_sample(var[0], var[1], credibility) - elif var[2] == 'distributed_log': + elif var[2] == 'tdist': + out = t_sample(var[0], var[1], var[3], credibility) + + elif var[2] == 'log-tdist': + out = log_t_sample(var[0], var[1], var[3], credibility) + + elif var[2] == 'mixture': weights = var[1] sum_weights = sum(weights) if sum_weights <= 0.99 or sum_weights >= 1.01: - raise ValueError('distributed_log weights don\'t sum to 1 - they sum to {}'.format(sum_weights)) + raise ValueError('mixture weights don\'t sum to 1 - they sum to {}'.format(sum_weights)) if len(weights) != len(var[0]): - raise ValueError('distributed_log weights and distributions not same length') + raise ValueError('mixture weights and distributions not same length') r_ = random.random() weights = np.cumsum(weights) done = False - for i, log_data in enumerate(var[0]): + for i, dist in enumerate(var[0]): if not done: weight = weights[i] if r_ <= weight: - out = lognormal_sample(log_data[0], log_data[1], credibility) + out = sample(dist, credibility=credibility) done = True - elif var[2] == 'tdist': - out = t_sample(var[0], var[1], var[3], credibility) - - elif var[2] == 'log-tdist': - out = log_t_sample(var[0], var[1], var[3], credibility) - else: raise ValueError('{} sampler not found'.format(var[2]))