Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
9d50cb6
removed old supported_tasks dictionary from heads, added some docstri…
bastiscode Feb 1, 2021
b7c8773
removed old supported_tasks attribute and updated doc strings in base…
bastiscode Feb 1, 2021
725faf2
removed old supported_tasks attribute from network backbones
bastiscode Feb 1, 2021
740a604
put time series backbones in separate files, add doc strings and refa…
bastiscode Feb 1, 2021
b727016
split image networks into separate files, add doc strings and refacto…
bastiscode Feb 1, 2021
bc77ca3
fix typo
bastiscode Feb 1, 2021
f8de549
add an intial simple backbone test similar to the network head test
bastiscode Feb 1, 2021
480b8ea
fix flake8
bastiscode Feb 1, 2021
f461c7e
fixed imports in backbones and heads
bastiscode Feb 2, 2021
cab8f83
added new network backbone and head tests
bastiscode Feb 2, 2021
ab2f5e9
enabled tests for adding custom backbones and heads, added required p…
bastiscode Feb 2, 2021
41e5974
adding tabular regression pipeline
bastiscode Feb 4, 2021
d7037ac
upstream changes
bastiscode Feb 4, 2021
1b5fc46
fix flake8
bastiscode Feb 1, 2021
7987c86
adding tabular regression pipeline
bastiscode Feb 4, 2021
75f49b1
merged remote
bastiscode Feb 4, 2021
1dbf53f
fix flake8
bastiscode Feb 4, 2021
1726105
fix regression test
bastiscode Feb 4, 2021
eb02feb
fix indentation and comments, undo change in base network
bastiscode Feb 9, 2021
34e6bf4
pipeline fitting tests now check the expected output shape dynamicall…
bastiscode Feb 9, 2021
1f0444f
refactored trainer tests, added trainer test for regression
bastiscode Feb 9, 2021
7efd048
remove regression from mixup unitest
bastiscode Feb 9, 2021
2aebcee
use pandas unique instead of numpy
bastiscode Feb 10, 2021
6668509
[IMPORTANT] added proper target casting based on task type to base tr…
bastiscode Feb 10, 2021
29bbdef
adding tabular regression task to api
bastiscode Feb 10, 2021
fb9e175
adding tabular regression example, some small fixes
bastiscode Feb 10, 2021
04521f8
new/more tests for tabular regression
bastiscode Feb 10, 2021
071f3b8
Merge branch 'refactor_development' into refactor_development
bastiscode Feb 10, 2021
8833bc6
fix mypy and flake8 errors from merge
bastiscode Feb 10, 2021
73ccc7c
fix issues with new weighted loss and regression tasks
bastiscode Feb 10, 2021
760296e
change tabular column transformer to use net fit_dictionary_tabular f…
bastiscode Feb 10, 2021
506e55d
fixing tests, replaced num_classes with output_shape
bastiscode Feb 10, 2021
5c46da7
Merge branch 'refactor_development' of github.com:automl/Auto-PyTorch…
bastiscode Feb 15, 2021
85f9995
fixes after merge
bastiscode Feb 15, 2021
1a507e6
adding voting regressor wrapper
bastiscode Feb 16, 2021
44f1980
fix mypy and flake
bastiscode Feb 16, 2021
5a19140
updated example
bastiscode Feb 16, 2021
7d7da2e
lower r2 target
bastiscode Feb 16, 2021
17c2086
address comments
bastiscode Feb 17, 2021
a29dbee
increasing timeout
bastiscode Feb 17, 2021
927fe87
increase number of labels in test_losses because it occasionally fail…
bastiscode Feb 18, 2021
5d582dc
lower regression lr in score test until seeding properly works
bastiscode Feb 18, 2021
07e75f6
fix randomization in feature validator test
bastiscode Feb 18, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,9 @@ def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torc
if self.preprocessor is None:
raise ValueError("cant call {} without fitting the column transformer first."
.format(self.__class__.__name__))

if len(X.shape) == 1:
# expand batch dimension when called on a single record
X = X[np.newaxis, ...]

return self.preprocessor.transform(X)
24 changes: 13 additions & 11 deletions autoPyTorch/pipeline/components/setup/network/base_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from autoPyTorch.constants import CLASSIFICATION_TASKS, STRING_TO_TASK_TYPES
from autoPyTorch.pipeline.components.training.base_training import autoPyTorchTrainingComponent
from autoPyTorch.utils.common import FitRequirement
from autoPyTorch.utils.common import FitRequirement, get_device_from_fit_dictionary


class NetworkComponent(autoPyTorchTrainingComponent):
Expand All @@ -21,14 +21,11 @@ class NetworkComponent(autoPyTorchTrainingComponent):
def __init__(
self,
network: Optional[torch.nn.Module] = None,
random_state: Optional[np.random.RandomState] = None,
device: Optional[torch.device] = None
random_state: Optional[np.random.RandomState] = None
) -> None:
super(NetworkComponent, self).__init__()
self.network = network
self.random_state = random_state
self.device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu") if device is None else device
self.device = None
self.add_fit_requirements([
FitRequirement("network_head", (torch.nn.Module,), user_defined=False, dataset_property=False),
FitRequirement("network_backbone", (torch.nn.Module,), user_defined=False, dataset_property=False),
Expand All @@ -53,6 +50,9 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchTrainingComponent:
self.network = torch.nn.Sequential(X['network_backbone'], X['network_head'])

# Properly set the network training device
if self.device is None:
self.device = get_device_from_fit_dictionary(X)

self.to(self.device)

if STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']] in CLASSIFICATION_TASKS:
Expand Down Expand Up @@ -113,12 +113,14 @@ def predict(self, loader: torch.utils.data.DataLoader) -> torch.Tensor:

for i, (X_batch, Y_batch) in enumerate(loader):
# Predict on batch
X_batch = torch.autograd.Variable(X_batch).float().to(self.device)
X_batch = X_batch.float().to(self.device)

with torch.no_grad():
Y_batch_pred = self.network(X_batch)
if self.final_activation is not None:
Y_batch_pred = self.final_activation(Y_batch_pred)

Y_batch_pred = self.network(X_batch).detach().cpu()
if self.final_activation is not None:
Y_batch_pred = self.final_activation(Y_batch_pred)
Y_batch_preds.append(Y_batch_pred)
Y_batch_preds.append(Y_batch_pred.cpu())

return torch.cat(Y_batch_preds, 0).cpu().numpy()

Expand Down
20 changes: 11 additions & 9 deletions autoPyTorch/pipeline/components/training/trainer/base_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import numpy as np

import torch
from torch.autograd import Variable
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.tensorboard.writer import SummaryWriter

from autoPyTorch.constants import REGRESSION_TASKS
from autoPyTorch.pipeline.components.training.base_training import autoPyTorchTrainingComponent
from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score
from autoPyTorch.utils.logging_ import PicklableClientLogger
Expand Down Expand Up @@ -253,8 +253,8 @@ def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int,
loss, outputs = self.train_step(data, targets)

# save for metric evaluation
outputs_data.append(outputs.detach())
targets_data.append(targets.detach())
outputs_data.append(outputs.detach().cpu())
targets_data.append(targets.detach().cpu())

batch_size = data.size(0)
loss_sum += loss * batch_size
Expand Down Expand Up @@ -286,10 +286,12 @@ def train_step(self, data: np.ndarray, targets: np.ndarray) -> Tuple[float, torc
"""
# prepare
data = data.float().to(self.device)
targets = targets.long().to(self.device)
if self.task_type in REGRESSION_TASKS:
targets = targets.float().to(self.device)
else:
targets = targets.long().to(self.device)

data, criterion_kwargs = self.data_preparation(data, targets)
data = Variable(data)

# training
self.optimizer.zero_grad()
Expand Down Expand Up @@ -338,8 +340,8 @@ def evaluate(self, test_loader: torch.utils.data.DataLoader, epoch: int,
loss_sum += loss.item() * batch_size
N += batch_size

outputs_data.append(outputs.detach())
targets_data.append(targets.detach())
outputs_data.append(outputs.detach().cpu())
targets_data.append(targets.detach().cpu())

if writer:
writer.add_scalar(
Expand All @@ -354,8 +356,8 @@ def evaluate(self, test_loader: torch.utils.data.DataLoader, epoch: int,
def compute_metrics(self, outputs_data: np.ndarray, targets_data: np.ndarray
) -> Dict[str, float]:
# TODO: change once Ravin Provides the PR
outputs_data = torch.cat(outputs_data, dim=0)
targets_data = torch.cat(targets_data, dim=0)
outputs_data = torch.cat(outputs_data, dim=0).numpy()
targets_data = torch.cat(targets_data, dim=0).numpy()
return calculate_score(targets_data, outputs_data, self.task_type, self.metrics)

def data_preparation(self, X: np.ndarray, y: np.ndarray,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
BudgetTracker,
RunSummary,
)
from autoPyTorch.utils.common import FitRequirement
from autoPyTorch.utils.common import FitRequirement, get_device_from_fit_dictionary
from autoPyTorch.utils.logging_ import get_named_client_logger

trainer_directory = os.path.split(__file__)[0]
Expand All @@ -56,6 +56,7 @@ class TrainerChoice(autoPyTorchChoice):
epoch happens, that is, how batches of data are fed and used to train the network.

"""

def __init__(self,
dataset_properties: Dict[str, Any],
random_state: Optional[np.random.RandomState] = None
Expand Down Expand Up @@ -96,13 +97,11 @@ def get_components(self) -> Dict[str, autoPyTorchComponent]:
components.update(_addons.components)
return components

def get_hyperparameter_search_space(
self,
dataset_properties: Optional[Dict[str, str]] = None,
default: Optional[str] = None,
include: Optional[List[str]] = None,
exclude: Optional[List[str]] = None,
) -> ConfigurationSpace:
def get_hyperparameter_search_space(self,
dataset_properties: Optional[Dict[str, str]] = None,
default: Optional[str] = None,
include: Optional[List[str]] = None,
exclude: Optional[List[str]] = None) -> ConfigurationSpace:
"""Returns the configuration space of the current chosen components

Args:
Expand Down Expand Up @@ -189,8 +188,7 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom
self.logger = get_named_client_logger(
name=X['num_run'],
# Log to a user provided port else to the default logging port
port=X['logger_port'
] if 'logger_port' in X else logging.handlers.DEFAULT_TCP_LOGGING_PORT,
port=X['logger_port'] if 'logger_port' in X else logging.handlers.DEFAULT_TCP_LOGGING_PORT,
)

fit_function = self._fit
Expand Down Expand Up @@ -267,7 +265,7 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> torch.nn.Modu
name=additional_losses),
budget_tracker=budget_tracker,
optimizer=X['optimizer'],
device=self.get_device(X),
device=get_device_from_fit_dictionary(X),
metrics_during_training=X['metrics_during_training'],
scheduler=X['lr_scheduler'],
task_type=STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']]
Expand Down Expand Up @@ -490,21 +488,6 @@ def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
config_option
))

def get_device(self, X: Dict[str, Any]) -> torch.device:
"""
Returns the device to do torch operations

Args:
X (Dict[str, Any]): A fit dictionary to control how the pipeline
is fitted

Returns:
torch.device: the device in which to compute operations. Cuda/cpu
"""
if not torch.cuda.is_available():
return torch.device('cpu')
return torch.device(X['device'])

@staticmethod
def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
"""
Expand Down
45 changes: 21 additions & 24 deletions autoPyTorch/pipeline/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,27 +58,25 @@ class TabularClassificationPipeline(ClassifierMixin, BasePipeline):
Examples
"""

def __init__(
self,
config: Optional[Configuration] = None,
steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
dataset_properties: Optional[Dict[str, Any]] = None,
include: Optional[Dict[str, Any]] = None,
exclude: Optional[Dict[str, Any]] = None,
random_state: Optional[np.random.RandomState] = None,
init_params: Optional[Dict[str, Any]] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
):
def __init__(self,
config: Optional[Configuration] = None,
steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
dataset_properties: Optional[Dict[str, Any]] = None,
include: Optional[Dict[str, Any]] = None,
exclude: Optional[Dict[str, Any]] = None,
random_state: Optional[np.random.RandomState] = None,
init_params: Optional[Dict[str, Any]] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
):
super().__init__(
config, steps, dataset_properties, include, exclude,
random_state, init_params, search_space_updates)

def fit_transformer(
self,
X: np.ndarray,
y: np.ndarray,
fit_params: Optional[Dict[str, Any]] = None
) -> Tuple[np.ndarray, Optional[Dict[str, Any]]]:
def fit_transformer(self,
X: np.ndarray,
y: np.ndarray,
fit_params: Optional[Dict[str, Any]] = None
) -> Tuple[np.ndarray, Optional[Dict[str, Any]]]:
"""Fits the pipeline given a training (X,y) pair

Args:
Expand Down Expand Up @@ -167,12 +165,11 @@ def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.n

return y

def _get_hyperparameter_search_space(
self,
dataset_properties: Dict[str, Any],
include: Optional[Dict[str, Any]] = None,
exclude: Optional[Dict[str, Any]] = None,
) -> ConfigurationSpace:
def _get_hyperparameter_search_space(self,
dataset_properties: Dict[str, Any],
include: Optional[Dict[str, Any]] = None,
exclude: Optional[Dict[str, Any]] = None,
) -> ConfigurationSpace:
"""Create the hyperparameter configuration space.

For the given steps, and the Choices within that steps,
Expand Down Expand Up @@ -243,7 +240,7 @@ def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
("preprocessing", EarlyPreprocessing()),
("network_backbone", NetworkBackboneChoice(default_dataset_properties)),
("network_head", NetworkHeadChoice(default_dataset_properties)),
("network", NetworkComponent(default_dataset_properties)),
("network", NetworkComponent()),
("network_init", NetworkInitializerChoice(default_dataset_properties)),
("optimizer", OptimizerChoice(default_dataset_properties)),
("lr_scheduler", SchedulerChoice(default_dataset_properties)),
Expand Down
Loading