Skip to content

Commit

Permalink
Change library to single import; change distributed_log to mixture
Browse files Browse the repository at this point in the history
  • Loading branch information
peterhurford committed Sep 5, 2022
1 parent 2dbf7c9 commit c7886b6
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 47 deletions.
4 changes: 2 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## v0.2

* Move distributions into a separate library (`from squigglepy.distributions import *` will import all the distribution functions)
* Move `sample` into the base library (`import squigglepy` will import `sample`)
* Change `distributed_log` to `mixture` (to follow Squiggle) and allow it to implement any sub-distribution.
* Changed library to single import.
* Remove `weighted_log` as a distribution.


Expand Down
59 changes: 30 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,73 +8,74 @@
Here's the Squigglepy implementation of [the example from Squiggle Docs](https://www.squiggle-language.com/docs/Overview):

```Python
import squigglepy
from squigglepy.distributions import *
from squigglepy.numbers import *
from squigglepy.utils import *
import squigglepy as sq

populationOfNewYork2022 = to(8.1*million(), 8.4*million()) # This means that you're 90% confident the value is between 8.1 and 8.4 Million.
populationOfNewYork2022 = sq.to(8.1*million(), 8.4*million()) # This means that you're 90% confident the value is between 8.1 and 8.4 Million.

def proportionOfPopulationWithPianos():
percentage = to(.2, 1)
return sample(percentage) * 0.01 # We assume there are almost no people with multiple pianos
percentage = sq.to(.2, 1)
return sq.sample(percentage) * 0.01 # We assume there are almost no people with multiple pianos

def pianoTunersPerPiano():
pianosPerPianoTuner = to(2*thousand(), 50*thousand())
return 1 / sample(pianosPerPianoTuner)
pianosPerPianoTuner = sq.to(2*thousand(), 50*thousand())
return 1 / sq.sample(pianosPerPianoTuner)

def totalTunersIn2022():
return (sample(populationOfNewYork2022) *
return (sq.sample(populationOfNewYork2022) *
proportionOfPopulationWithPianos() *
pianoTunersPerPiano())

get_percentiles(sample(totalTunersIn2022, n=1000))
sq.get_percentiles(sq.sample(totalTunersIn2022, n=1000))
```

And the version from the Squiggle doc that incorporates time:

```Python
import squigglepy
from squigglepy.distributions import *
from squigglepy.numbers import *
from squigglepy.utils import *
import squigglepy as sq
K = sq.thousand(); M = sq.million()

populationOfNewYork2022 = to(8.1*million(), 8.4*million())
populationOfNewYork2022 = sq.to(8.1*M, 8.4*M)

def proportionOfPopulationWithPianos():
percentage = to(.2, 1)
return sample(percentage) * 0.01
percentage = sq.to(.2, 1)
return sq.sample(percentage) * 0.01

def proportionOfPopulationWithPianos():
percentage = to(.2, 1)
return sample(percentage) * 0.01
percentage = sq.to(.2, 1)
return sq.sample(percentage) * 0.01

def pianoTunersPerPiano():
pianosPerPianoTuner = to(2*thousand(), 50*thousand())
return 1 / sample(pianosPerPianoTuner)
pianosPerPianoTuner = sq.to(2*K, 50*K)
return 1 / sq.sample(pianosPerPianoTuner)

# Time in years after 2022
def populationAtTime(t):
averageYearlyPercentageChange = to(-0.01, 0.05) # We're expecting NYC to continuously grow with an mean of roughly between -1% and +4% per year
return sample(populationOfNewYork2022) * ((sample(averageYearlyPercentageChange) + 1) ** t)
}
averageYearlyPercentageChange = sq.to(-0.01, 0.05) # We're expecting NYC to continuously grow with an mean of roughly between -1% and +4% per year
return sq.sample(populationOfNewYork2022) * ((sq.sample(averageYearlyPercentageChange) + 1) ** t)

def totalTunersAtTime(t):
return (populationAtTime(t) *
proportionOfPopulationWithPianos() *
pianoTunersPerPiano())

get_percentiles(sample(lambda: totalTunersAtTime(2030-2022), n=1000))
sq.get_percentiles(sq.sample(lambda: totalTunersAtTime(2030-2022), n=1000))
```

## Additional Features

Additional distributions:

```Python
sample(norm(1, 3))
sample(lognorm(1, 10))
sample(tdist(1, 10, t=5))
import squigglepy as sq

sq.sample(sq.norm(1, 3))
sq.sample(sq.lognorm(1, 10))
sq.sample(sq.tdist(1, 10, t=5))

sq.sample(sq.mixture([sq.norm(1, 3),
sq.norm(4, 10),
sq.lognorm(1, 10)],
[0.3, 0.3, 0.4]))
```

## Installation
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name='squigglepy',
version='0.1',
version='0.2',
author='Peter Hurford',
author_email='[email protected]',
description='Squiggle programming language for intuitive probabilistic estimation features in Python',
Expand Down
5 changes: 4 additions & 1 deletion squigglepy/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
from sample import sample
from .distributions import *
from .numbers import *
from .sample import *
from .utils import *
7 changes: 4 additions & 3 deletions squigglepy/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ def const(x):
def lognorm(x, y, lclip=None, rclip=None):
return [x, y, 'log', lclip, rclip]

def distributed_lognorm(logs, weights, lclip=None, rclip=None):
return [logs, weights, 'distributed_log', lclip, rclip]

def tdist(x, y, t, lclip=None, rclip=None):
return [x, y, 'tdist', t, lclip, rclip]

def log_tdist(x, y, t, lclip=None, rclip=None):
return [x, y, 'log-tdist', t, lclip, rclip]

def mixture(dists, weights, lclip=None, rclip=None):
return [dists, weights, 'mixture', lclip, rclip]

22 changes: 11 additions & 11 deletions squigglepy/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,29 +82,29 @@ def sample(var, credibility=0.9, n=1):
elif var[2] == 'log':
out = lognormal_sample(var[0], var[1], credibility)

elif var[2] == 'distributed_log':
elif var[2] == 'tdist':
out = t_sample(var[0], var[1], var[3], credibility)

elif var[2] == 'log-tdist':
out = log_t_sample(var[0], var[1], var[3], credibility)

elif var[2] == 'mixture':
weights = var[1]
sum_weights = sum(weights)
if sum_weights <= 0.99 or sum_weights >= 1.01:
raise ValueError('distributed_log weights don\'t sum to 1 - they sum to {}'.format(sum_weights))
raise ValueError('mixture weights don\'t sum to 1 - they sum to {}'.format(sum_weights))
if len(weights) != len(var[0]):
raise ValueError('distributed_log weights and distributions not same length')
raise ValueError('mixture weights and distributions not same length')
r_ = random.random()
weights = np.cumsum(weights)
done = False
for i, log_data in enumerate(var[0]):
for i, dist in enumerate(var[0]):
if not done:
weight = weights[i]
if r_ <= weight:
out = lognormal_sample(log_data[0], log_data[1], credibility)
out = sample(dist, credibility=credibility)
done = True

elif var[2] == 'tdist':
out = t_sample(var[0], var[1], var[3], credibility)

elif var[2] == 'log-tdist':
out = log_t_sample(var[0], var[1], var[3], credibility)

else:
raise ValueError('{} sampler not found'.format(var[2]))

Expand Down

0 comments on commit c7886b6

Please sign in to comment.