Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@ class KernelPCA(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, n_components: int = 10,
kernel: str = 'rbf', degree: int = 3,
gamma: float = 0.01, coef0: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState]] = None
random_state: Optional[np.random.RandomState] = None
) -> None:
self.n_components = n_components
self.kernel = kernel
self.degree = degree
self.gamma = gamma
self.coef0 = coef0
self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

self.add_fit_requirements([
FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@ class Nystroem(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, n_components: int = 10,
kernel: str = 'rbf', degree: int = 3,
gamma: float = 0.01, coef0: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState]] = None
random_state: Optional[np.random.RandomState] = None
) -> None:
self.n_components = n_components
self.kernel = kernel
self.degree = degree
self.gamma = gamma
self.coef0 = coef0
self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@
class PolynomialFeatures(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, degree: int = 2, interaction_only: bool = False,
include_bias: bool = False,
random_state: Optional[Union[int, np.random.RandomState]] = None):
random_state: Optional[np.random.RandomState] = None):
self.degree = degree
self.interaction_only = interaction_only
self.include_bias = include_bias

self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
self.preprocessor['numerical'] = sklearn.preprocessing.PolynomialFeatures(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@

class PowerTransformer(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, standardize: bool = True,
random_state: Optional[Union[int, np.random.RandomState]] = None):
random_state: Optional[np.random.RandomState] = None):
self.standardize = standardize

self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
self.preprocessor['numerical'] = sklearn.preprocessing.PowerTransformer(method="yeo-johnson",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@
class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, n_components: int = 100,
gamma: float = 1.0,
random_state: Optional[Union[int, np.random.RandomState]] = None
random_state: Optional[np.random.RandomState] = None
) -> None:
self.n_components = n_components
self.gamma = gamma
self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@

class TruncatedSVD(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, target_dim: int = 128,
random_state: Optional[Union[int, np.random.RandomState]] = None):
random_state: Optional[np.random.RandomState] = None):
self.target_dim = target_dim

self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

import numpy as np

from sklearn.utils import check_random_state

from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
autoPyTorchTabularPreprocessingComponent
Expand All @@ -8,7 +12,13 @@
class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
_required_properties: List[str] = ['handles_sparse']

def __init__(self) -> None:
def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None:
if random_state is None:
# A trainer components need a random state for
# sampling -- for example in MixUp training
self.random_state = check_random_state(1)
else:
self.random_state = random_state
super().__init__()

def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
Expand Down
36 changes: 22 additions & 14 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
from autoPyTorch.utils.pipeline import get_dataset_requirements


N_SAMPLES = 200


@pytest.fixture(scope="session")
def callattr_ahead_of_alltests(request):
"""
Expand Down Expand Up @@ -191,7 +194,7 @@ def session_run_at_end():
def get_tabular_data(task):
if task == "classification_numerical_only":
X, y = make_classification(
n_samples=200,
n_samples=N_SAMPLES,
n_features=4,
n_informative=3,
n_redundant=1,
Expand All @@ -207,18 +210,18 @@ def get_tabular_data(task):
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
X = X[categorical_columns]
X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())

elif task == "classification_numerical_and_categorical":
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())

elif task == "regression_numerical_only":
X, y = make_regression(n_samples=200,
X, y = make_regression(n_samples=N_SAMPLES,
n_features=4,
n_informative=3,
n_targets=1,
Expand All @@ -240,8 +243,8 @@ def get_tabular_data(task):
else:
X[column] = X[column].fillna(0)

X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
y = (y - y.mean()) / y.std()
validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())

Expand All @@ -256,8 +259,8 @@ def get_tabular_data(task):
else:
X[column] = X[column].fillna(0)

X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
y = (y - y.mean()) / y.std()
validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())
elif task == 'iris':
Expand Down Expand Up @@ -288,7 +291,7 @@ def get_fit_dictionary(X, y, validator, backend):
'num_run': np.random.randint(50),
'device': 'cpu',
'budget_type': 'epochs',
'epochs': 100,
'epochs': 5,
'torch_num_threads': 1,
'early_stopping': 10,
'working_dir': '/tmp',
Expand Down Expand Up @@ -326,7 +329,7 @@ def dataset(request):
@pytest.fixture
def dataset_traditional_classifier_num_only():
X, y = make_classification(
n_samples=200,
n_samples=N_SAMPLES,
n_features=4,
n_informative=3,
n_redundant=1,
Expand All @@ -344,15 +347,15 @@ def dataset_traditional_classifier_categorical_only():
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
X = X[categorical_columns]
X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
return X, y


@pytest.fixture
def dataset_traditional_classifier_num_categorical():
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
y = y.astype(np.int)
X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
return X, y


Expand Down Expand Up @@ -456,3 +459,8 @@ def loss_mse():
@pytest.fixture
def loss_details(request):
return request.getfixturevalue(request.param)


@pytest.fixture
def n_samples():
return N_SAMPLES
Loading