Change library to single import; change distributed_log to mixture

rethinkpriorities · Sep 5, 2022 · c7886b6 · c7886b6
1 parent 2dbf7c9
commit c7886b6
Show file tree

Hide file tree

Showing 6 changed files with 52 additions and 47 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,7 +1,7 @@
 ## v0.2
 
-* Move distributions into a separate library (`from squigglepy.distributions import *` will import all the distribution functions)
-* Move `sample` into the base library (`import squigglepy` will import `sample`)
+* Change `distributed_log` to `mixture` (to follow Squiggle) and allow it to implement any sub-distribution.
+* Changed library to single import.
 * Remove `weighted_log` as a distribution.
 
 

diff --git a/README.md b/README.md
@@ -8,73 +8,74 @@
 Here's the Squigglepy implementation of [the example from Squiggle Docs](https://www.squiggle-language.com/docs/Overview):
 
 ```Python
-import squigglepy
-from squigglepy.distributions import *
-from squigglepy.numbers import *
-from squigglepy.utils import *
+import squigglepy as sq
 
-populationOfNewYork2022 = to(8.1*million(), 8.4*million()) # This means that you're 90% confident the value is between 8.1 and 8.4 Million.
+populationOfNewYork2022 = sq.to(8.1*million(), 8.4*million()) # This means that you're 90% confident the value is between 8.1 and 8.4 Million.
 
 def proportionOfPopulationWithPianos():
-    percentage = to(.2, 1)
-    return sample(percentage) * 0.01 # We assume there are almost no people with multiple pianos
+    percentage = sq.to(.2, 1)
+    return sq.sample(percentage) * 0.01 # We assume there are almost no people with multiple pianos
 
 def pianoTunersPerPiano():
-    pianosPerPianoTuner = to(2*thousand(), 50*thousand())
-    return 1 / sample(pianosPerPianoTuner)
+    pianosPerPianoTuner = sq.to(2*thousand(), 50*thousand())
+    return 1 / sq.sample(pianosPerPianoTuner)
 
 def totalTunersIn2022():
-    return (sample(populationOfNewYork2022) *
+    return (sq.sample(populationOfNewYork2022) *
             proportionOfPopulationWithPianos() *
             pianoTunersPerPiano())
 
-get_percentiles(sample(totalTunersIn2022, n=1000))
+sq.get_percentiles(sq.sample(totalTunersIn2022, n=1000))
 ```
 
 And the version from the Squiggle doc that incorporates time:
 
 ```Python
-import squigglepy
-from squigglepy.distributions import *
-from squigglepy.numbers import *
-from squigglepy.utils import *
+import squigglepy as sq
+K = sq.thousand(); M = sq.million()
 
-populationOfNewYork2022 = to(8.1*million(), 8.4*million())
+populationOfNewYork2022 = sq.to(8.1*M, 8.4*M)
 
 def proportionOfPopulationWithPianos():
-    percentage = to(.2, 1)
-    return sample(percentage) * 0.01
+    percentage = sq.to(.2, 1)
+    return sq.sample(percentage) * 0.01
 
 def proportionOfPopulationWithPianos():
-    percentage = to(.2, 1)
-    return sample(percentage) * 0.01
+    percentage = sq.to(.2, 1)
+    return sq.sample(percentage) * 0.01
 
 def pianoTunersPerPiano():
-    pianosPerPianoTuner = to(2*thousand(), 50*thousand())
-    return 1 / sample(pianosPerPianoTuner)
+    pianosPerPianoTuner = sq.to(2*K, 50*K)
+    return 1 / sq.sample(pianosPerPianoTuner)
 
 # Time in years after 2022
 def populationAtTime(t):
-    averageYearlyPercentageChange = to(-0.01, 0.05) # We're expecting NYC to continuously grow with an mean of roughly between -1% and +4% per year
-    return sample(populationOfNewYork2022) * ((sample(averageYearlyPercentageChange) + 1) ** t)
-}
+    averageYearlyPercentageChange = sq.to(-0.01, 0.05) # We're expecting NYC to continuously grow with an mean of roughly between -1% and +4% per year
+    return sq.sample(populationOfNewYork2022) * ((sq.sample(averageYearlyPercentageChange) + 1) ** t)
 
 def totalTunersAtTime(t):
 	  return (populationAtTime(t) *
             proportionOfPopulationWithPianos() *
             pianoTunersPerPiano())
 
-get_percentiles(sample(lambda: totalTunersAtTime(2030-2022), n=1000))
+sq.get_percentiles(sq.sample(lambda: totalTunersAtTime(2030-2022), n=1000))
 ```
 
 ## Additional Features
 
 Additional distributions:
 
 ```Python
-sample(norm(1, 3))
-sample(lognorm(1, 10))
-sample(tdist(1, 10, t=5))
+import squigglepy as sq
+
+sq.sample(sq.norm(1, 3))
+sq.sample(sq.lognorm(1, 10))
+sq.sample(sq.tdist(1, 10, t=5))
+
+sq.sample(sq.mixture([sq.norm(1, 3),
+                      sq.norm(4, 10),
+                      sq.lognorm(1, 10)],
+                      [0.3, 0.3, 0.4]))
 ```
 
 ## Installation

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
      name='squigglepy',  
-     version='0.1',
+     version='0.2',
      author='Peter Hurford',
      author_email='[email protected]',
      description='Squiggle programming language for intuitive probabilistic estimation features in Python',

diff --git a/squigglepy/__init__.py b/squigglepy/__init__.py
@@ -1 +1,4 @@
-from sample import sample 
+from .distributions import *
+from .numbers import *
+from .sample import *
+from .utils import *
diff --git a/squigglepy/distributions.py b/squigglepy/distributions.py
@@ -10,11 +10,12 @@ def const(x):
 def lognorm(x, y, lclip=None, rclip=None):
     return [x, y, 'log', lclip, rclip]
 
-def distributed_lognorm(logs, weights, lclip=None, rclip=None):
-    return [logs, weights, 'distributed_log', lclip, rclip]
-
 def tdist(x, y, t, lclip=None, rclip=None):
     return [x, y, 'tdist', t, lclip, rclip]
 
 def log_tdist(x, y, t, lclip=None, rclip=None):
     return [x, y, 'log-tdist', t, lclip, rclip]
+
+def mixture(dists, weights, lclip=None, rclip=None):
+    return [dists, weights, 'mixture', lclip, rclip]
+
diff --git a/squigglepy/sample.py b/squigglepy/sample.py
@@ -82,29 +82,29 @@ def sample(var, credibility=0.9, n=1):
     elif var[2] == 'log':
         out = lognormal_sample(var[0], var[1], credibility)
 
-    elif var[2] == 'distributed_log':
+    elif var[2] == 'tdist':
+        out = t_sample(var[0], var[1], var[3], credibility)
+
+    elif var[2] == 'log-tdist':
+        out = log_t_sample(var[0], var[1], var[3], credibility)
+
+    elif var[2] == 'mixture':
         weights = var[1]
         sum_weights = sum(weights)
         if sum_weights <= 0.99 or sum_weights >= 1.01:
-            raise ValueError('distributed_log weights don\'t sum to 1 - they sum to {}'.format(sum_weights))
+            raise ValueError('mixture weights don\'t sum to 1 - they sum to {}'.format(sum_weights))
         if len(weights) != len(var[0]):
-            raise ValueError('distributed_log weights and distributions not same length')
+            raise ValueError('mixture weights and distributions not same length')
         r_ = random.random()
         weights = np.cumsum(weights)
         done = False
-        for i, log_data in enumerate(var[0]):
+        for i, dist in enumerate(var[0]):
             if not done:
                 weight = weights[i]
                 if r_ <= weight:
-                    out = lognormal_sample(log_data[0], log_data[1], credibility)
+                    out = sample(dist, credibility=credibility)
                     done = True
 
-    elif var[2] == 'tdist':
-        out = t_sample(var[0], var[1], var[3], credibility)
-
-    elif var[2] == 'log-tdist':
-        out = log_t_sample(var[0], var[1], var[3], credibility)
-
     else:
         raise ValueError('{} sampler not found'.format(var[2]))