You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
hi there, when I use ModelBuilder, it can fit and save, then load and predict well in the same process, but load and predict fails in another process.
the code is below:
`
from typing import Dict, List, Optional, Tuple, Union
import numpy as np
import pandas as pd
import pymc as pm
import xarray as xr
from pymc_experimental.model_builder import ModelBuilder
from numpy.random import RandomState
def makeData(trainNum=1000, testNum=200):
cols = ['f1', 'f2', 'f3', 'f4', 'f5']
target = ['t']
x_train, x_test = pd.DataFrame(np.random.random((trainNum, 5)), columns=cols), pd.DataFrame(
np.random.random((testNum, 5)), columns=cols)
y_train = pd.Series(np.random.randint(0, 2, trainNum))
return x_train, x_test, y_train
x_train, x_test, y_train = makeData()
class BartModel(ModelBuilder):
# Give the model a name
_model_type = "BartModel"
# And a version
version = "0.1"
def build_model(self, X: pd.DataFrame, y: pd.Series, **kwargs):
# Check the type of X and y and adjust access accordingly
X_values = X.values
y_values = y
self._generate_and_preprocess_model_data(X_values, y_values)
with pm.Model(coords=self.model_coords) as self.model:
# Create mutable data containers
import pymc_bart as pmb
x_data = pm.MutableData("x_data", X_values)
# y_data = pm.MutableData("y_data", y_values)
mu = pmb.BART("mu", x_data, y_values)
p = pm.Deterministic("p", pm.math.invlogit(mu))
obs = pm.Bernoulli("y", p=p, shape=mu.shape, observed=y_values)
def _data_setter(
self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray] = None
):
if isinstance(X, pd.DataFrame):
x_values = X.values
else:
# Assuming "input" is the first column
x_values = X[:, 0]
with self.model:
pm.set_data({"x_data": x_values})
@staticmethod
def get_default_model_config() -> Dict:
"""
Returns a class default config dict for model builder if no model_config is provided on class initialization.
The model config dict is generally used to specify the prior values we want to build the model with.
It supports more complex data structures like lists, dictionaries, etc.
It will be passed to the class instance on initialization, in case the user doesn't provide any model_config of their own.
"""
model_config: Dict = {
}
return model_config
@staticmethod
def get_default_sampler_config() -> Dict:
"""
Returns a class default sampler dict for model builder if no sampler_config is provided on class initialization.
The sampler config dict is used to send parameters to the sampler .
It will be used during fitting in case the user doesn't provide any sampler_config of their own.
"""
sampler_config: Dict = {
"draws": 100,
"tune": 100,
"chains": 1,
#"target_accept": 0.95,
}
return sampler_config
@property
def output_var(self):
return "y"
@property
def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
"""
_serializable_model_config is a property that returns a dictionary with all the model parameters that we want to save.
as some of the data structures are not json serializable, we need to convert them to json serializable objects.
Some models will need them, others can just define them to return the model_config.
"""
return self.model_config
def _save_input_params(self, idata) -> None:
"""
Saves any additional model parameters (other than the dataset) to the idata object.
These parameters are stored within `idata.attrs` using keys that correspond to the parameter names.
If you don't need to store any extra parameters, you can leave this method unimplemented.
Example:
For saving customer IDs provided as an 'customer_ids' input to the model:
self.customer_ids = customer_ids.values #this line is done outside of the function, preferably at the initialization of the model object.
idata.attrs["customer_ids"] = json.dumps(self.customer_ids.tolist()) # Convert numpy array to a JSON-serializable list.
"""
pass
pass
def _generate_and_preprocess_model_data(
self, X: Union[pd.DataFrame, pd.Series], y: Union[pd.Series, np.ndarray]
) -> None:
"""
Depending on the model, we might need to preprocess the data before fitting the model.
all required preprocessing and conditional assignments should be defined here.
"""
self.model_coords = None # in our case we're not using coords, but if we were, we would define them here, or later on in the function, if extracting them from the data.
# as we don't do any data preprocessing, we just assign the data given by the user. Note that it's a very basic model,
# and usually we would need to do some preprocessing, or generate the coords from the data.
self.X = X
self.y = y
my question is:
if I call them both: fitNsave() loadTest()
it works well
but,
if I call fitNSave in one process, and call loadTest in another process,the shape error raise: raise ValueError("size does not match the broadcast shape of " ValueError: size does not match the broadcast shape of the parameters. (200,), (200,), (1000,)
The text was updated successfully, but these errors were encountered:
I'm not familiar with pymc-BART, but I guess it has some tree that it comes up with so that it produces a mapping f(X) -> y. I don't don't know what they would be, but I don't see anything that looks like BART parameters that are added to the trace (idata) by pm.sample(). The trace is what is saved and restored to save and load the model. I suspect this could be related to this pymc-bart issue: pymc-devs/pymc-bart#123
hi there, when I use ModelBuilder, it can fit and save, then load and predict well in the same process, but load and predict fails in another process.
the code is below:
`
from typing import Dict, List, Optional, Tuple, Union
import numpy as np
import pandas as pd
import pymc as pm
import xarray as xr
from pymc_experimental.model_builder import ModelBuilder
from numpy.random import RandomState
def makeData(trainNum=1000, testNum=200):
cols = ['f1', 'f2', 'f3', 'f4', 'f5']
target = ['t']
x_train, x_test = pd.DataFrame(np.random.random((trainNum, 5)), columns=cols), pd.DataFrame(
np.random.random((testNum, 5)), columns=cols)
y_train = pd.Series(np.random.randint(0, 2, trainNum))
return x_train, x_test, y_train
x_train, x_test, y_train = makeData()
class BartModel(ModelBuilder):
# Give the model a name
_model_type = "BartModel"
modelPath = "./t.model"
def fitNsave():
model = BartModel()
model.fit(x_train, y_train)
model.predict(x_test)
model.save(modelPath)
def loadTest():
model2 = BartModel.load(modelPath)
tval = model2.predict(x_test)
print(f"load test done!")
`
my question is:
if I call them both:
fitNsave() loadTest()
it works well
but,
if I call fitNSave in one process, and call loadTest in another process,the shape error raise:
raise ValueError("size does not match the broadcast shape of " ValueError: size does not match the broadcast shape of the parameters. (200,), (200,), (1000,)
The text was updated successfully, but these errors were encountered: