UDST · smmaurer · Sep 6, 2018 · Aug 6, 2018 · Aug 7, 2018 · Aug 8, 2018
diff --git a/choicemodels/__init__.py b/choicemodels/__init__.py
@@ -4,4 +4,4 @@
 from .choicemodels import *
 from .mnl import MultinomialLogit, MultinomialLogitResults
 
-version = __version__ = '0.1.1'
+version = __version__ = '0.2.dev1'
diff --git a/choicemodels/mnl.py b/choicemodels/mnl.py
@@ -8,6 +8,7 @@
 import scipy.optimize
 import scipy.stats
 import datetime
+from .tools import MergedChoiceTable
 from .tools import pmat
 from .tools.pmat import PMAT
 from collections import OrderedDict
@@ -45,7 +46,8 @@ class MultinomialLogit(object):
        location choice.
 
        The following parameters are required: 'data', 'observation_id_col', 'choice_col',
-       'model_expression' in Patsy format.
+       'model_expression' in Patsy format. If data is provided as a MergedChoiceTable,
+       the observation id and choice column names can be read directly from its metadata.
 
        To fit this type of model, ChoiceModels will use its own estimation engine adapted
        from the UrbanSim MNL codebase.
@@ -81,19 +83,11 @@ class MultinomialLogit(object):
     Parameters
     ----------
 
-    data : pandas.DataFrame
+    data : pandas.DataFrame or choicemodels.tools.MergedChoiceTable
         A table of estimation data in "long" format, with one row for each combination of
         chooser and alternative. Column labeling must be consistent with the
         'model_expression'. May include extra columns.
 
-    observation_id_col : str
-        Name of column containing the observation id. This should uniquely identify each
-        distinct choice scenario.
-
-    choice_col : str
-        Name of column containing an indication of which alternative has been chosen in
-        each scenario. Values should evaluate as binary: 1/0, True/False, etc.
-
     model_expression : Patsy 'formula-like' or PyLogit 'specification'
         For the simpler use case where each choice scenario has the same number of
         alternatives and each alternative has the same model expression, this should be a
@@ -106,14 +100,25 @@ class MultinomialLogit(object):
         model specification. See here:
         https://github.com/timothyb0912/pylogit/blob/master/pylogit/pylogit.py#L116-L130
 
+    observation_id_col : str, optional
+        Name of column or index containing the observation id. This should uniquely 
+        identify each distinct choice scenario. Not required if data is passed as a 
+        MergedChoiceTable.
+
+    choice_col : str, optional
+        Name of column containing an indication of which alternative has been chosen in
+        each scenario. Values should evaluate as binary: 1/0, True/False, etc. Not
+        required if data is passed as a MergedChoiceTable.
+
     model_labels : PyLogit 'names', optional
         If the model expression is a PyLogit OrderedDict, you can provide a corresponding
         OrderedDict of labels. See here:
         https://github.com/timothyb0912/pylogit/blob/master/pylogit/pylogit.py#L151-L165
 
     alternative_id_col : str, optional
-        Name of column containing the alternative id. This is only required if the model
-        expression varies for different alternatives.
+        Name of column or index containing the alternative id. This is only required if 
+        the model expression varies for different alternatives. Not required if data is 
+        passed as a MergedChoiceTable.
 
     initial_coefs : float, list, or 1D array, optional
         Initial coefficients (beta values) to begin the optimization process with. Provide
@@ -124,18 +129,27 @@ class MultinomialLogit(object):
         NOT YET IMPLEMENTED - Estimation weights.
 
     """
-    def __init__(self, data, observation_id_col, choice_col, model_expression,
+    def __init__(self, data, model_expression, observation_id_col=None, choice_col=None,
                  model_labels=None, alternative_id_col=None, initial_coefs=None,
                  weights=None):
         self._data = data
+        self._model_expression = model_expression
         self._observation_id_col = observation_id_col
         self._alternative_id_col = alternative_id_col
         self._choice_col = choice_col
-        self._model_expression = model_expression
         self._model_labels = model_labels
         self._initial_coefs = initial_coefs
         self._weights = weights
 
+        if isinstance(self._data, MergedChoiceTable):
+            self._df = self._data.to_frame()
+            self._observation_id_col = self._data.observation_id_col
+            self._alternative_id_col = self._data.alternative_id_col
+            self._choice_col = self._data.choice_col
+
+        else:
+            self._df = self._data
+
         if isinstance(self._model_expression, OrderedDict):
             self._estimation_engine = 'PyLogit'
 
@@ -152,9 +166,9 @@ def __init__(self, data, observation_id_col, choice_col, model_expression,
 
         else:
             self._estimation_engine = 'ChoiceModels'
-            self._numobs = self._data[[self._observation_id_col]].\
+            self._numobs = self._df.reset_index()[[self._observation_id_col]].\
                                     drop_duplicates().shape[0]
-            self._numalts = self._data.shape[0] // self._numobs
+            self._numalts = self._df.shape[0] // self._numobs
 
             # TO DO: parse initial coefs
 
@@ -169,9 +183,9 @@ def _validate_input_data(self):
          - verify chosen alternative listed first]
 
         """
-        self._data = self._data.sort_values(by = [self._observation_id_col,
-                                                  self._choice_col],
-                                            ascending = [True, False])
+        self._data = self._data.reset_index().sort_values(by = [self._observation_id_col,
+                                                                self._choice_col],
+                                                          ascending = [True, False])
         return
 
     def fit(self):
@@ -198,7 +212,7 @@ def fit(self):
         """
         if (self._estimation_engine == 'PyLogit'):
 
-            m = pylogit.create_choice_model(data = self._data,
+            m = pylogit.create_choice_model(data = self._df,
                                             obs_id_col = self._observation_id_col,
                                             alt_id_col = self._alternative_id_col,
                                             choice_col = self._choice_col,
@@ -212,11 +226,11 @@ def fit(self):
 
         elif (self._estimation_engine == 'ChoiceModels'):
 
-            model_design = dmatrix(self._model_expression, data=self._data,
+            model_design = dmatrix(self._model_expression, data=self._df,
                                    return_type='dataframe')
 
             # generate 2D array from choice column, for mnl_estimate()
-            chosen = np.reshape(self._data[[self._choice_col]].as_matrix(),
+            chosen = np.reshape(self._df[[self._choice_col]].as_matrix(),
                                 (self._numobs, self._numalts))
 
             log_lik, fit = mnl_estimate(model_design.as_matrix(), chosen, self._numalts)

diff --git a/choicemodels/tools/__init__.py b/choicemodels/tools/__init__.py
@@ -1,4 +1,4 @@
 # ChoiceModels
 # See full license in LICENSE
 
-from .interaction import *
+from .mergedchoicetable import *
diff --git a/choicemodels/tools/interaction.py b/choicemodels/tools/interaction.py