From a408f51604d31919a8377bb6acb6e46fc0462939 Mon Sep 17 00:00:00 2001 From: AmrMKayid Date: Thu, 24 Sep 2020 00:02:31 +0200 Subject: [PATCH 1/8] Use ray WandbLogger for oatomobile logger --- oatomobile/utils/loggers/__init__.py | 1 + oatomobile/utils/loggers/wandb.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/oatomobile/utils/loggers/__init__.py b/oatomobile/utils/loggers/__init__.py index 6d8018d..ba7104d 100644 --- a/oatomobile/utils/loggers/__init__.py +++ b/oatomobile/utils/loggers/__init__.py @@ -18,3 +18,4 @@ from oatomobile.utils.loggers.base import LoggingData from oatomobile.utils.loggers.csv import CSVLogger from oatomobile.utils.loggers.terminal import TerminalLogger +from oatomobile.utils.loggers.wandb import WandBLogger diff --git a/oatomobile/utils/loggers/wandb.py b/oatomobile/utils/loggers/wandb.py index 1905b9b..8c05707 100644 --- a/oatomobile/utils/loggers/wandb.py +++ b/oatomobile/utils/loggers/wandb.py @@ -16,13 +16,14 @@ import wandb from absl import flags +from ray.tune.integration.wandb import WandbLogger from oatomobile.utils.loggers import base wandb.init(project="oatomobile", config=flags.FLAGS) -class WandBLogger(base.Logger): +class WandBLogger(base.Logger, WandbLogger): """Logs to a `wandb` dashboard.""" def write(self, values: base.LoggingData) -> None: From 6ddcb63c944de059fe93dd66ae9ccbe54e2c7a5d Mon Sep 17 00:00:00 2001 From: AmrMKayid Date: Thu, 24 Sep 2020 00:07:22 +0200 Subject: [PATCH 2/8] Create tune script for DIM Agent --- oatomobile/baselines/torch/dim/train.py | 2 +- oatomobile/baselines/torch/dim/tune.py | 364 ++++++++++++++++++++++++ 2 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 oatomobile/baselines/torch/dim/tune.py diff --git a/oatomobile/baselines/torch/dim/train.py b/oatomobile/baselines/torch/dim/train.py index 07b5400..057087f 100644 --- a/oatomobile/baselines/torch/dim/train.py +++ b/oatomobile/baselines/torch/dim/train.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Trains the deep imitative model on expert demostrations.""" +"""Trains the deep imitative model on expert demonstrations.""" import os from typing import Mapping diff --git a/oatomobile/baselines/torch/dim/tune.py b/oatomobile/baselines/torch/dim/tune.py new file mode 100644 index 0000000..72a80fe --- /dev/null +++ b/oatomobile/baselines/torch/dim/tune.py @@ -0,0 +1,364 @@ +# Copyright 2020 The OATomobile Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tune the deep imitative model on expert demonstrations.""" + +import os +from typing import Mapping + +import torch +import torch.distributions as D +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import tqdm +from absl import app +from absl import flags +from absl import logging +from ray import tune + +from oatomobile.baselines.torch.dim.model import ImitativeModel +from oatomobile.datasets.carla import CARLADataset +from oatomobile.torch import types +from oatomobile.torch.loggers import TensorBoardLogger +from oatomobile.torch.savers import Checkpointer +from oatomobile.utils.loggers import WandBLogger + +logging.set_verbosity(logging.DEBUG) +FLAGS = flags.FLAGS +flags.DEFINE_string( + name="dataset_dir", + default=None, + help="The full path to the processed dataset.", +) +flags.DEFINE_string( + name="output_dir", + default=None, + help="The full path to the output directory (for logs, ckpts).", +) +flags.DEFINE_integer( + name="batch_size", + default=[32, 64, 128, 256, 512, 1024], + help="The batch size used for training the neural network.", +) +flags.DEFINE_integer( + name="num_epochs", + default=None, + help="The number of training epochs for the neural network.", +) +flags.DEFINE_integer( + name="save_model_frequency", + default=4, + help="The number epochs between saves of the model.", +) +flags.DEFINE_float( + name="learning_rate", + default=[1e-1, 1e-2, 1e-3, 1e-4, 1e-5], + help="The ADAM learning rate.", +) +flags.DEFINE_integer( + name="num_timesteps_to_keep", + default=4, + help="The numbers of time-steps to keep from the target, with downsampling.", +) +flags.DEFINE_float( + name="weight_decay", + default=[0.0, 1e-2, 1e-3], + help="The L2 penalty (regularization) coefficient.", +) +flags.DEFINE_bool( + name="clip_gradients", + default=False, + help="If True it clips the gradients norm to 1.0.", +) + + +def main(config): + + # Parses command line arguments. + dataset_dir = config["dataset_dir"] + output_dir = config["output_dir"] + save_model_frequency = config["save_model_frequency"] + num_timesteps_to_keep = config["num_timesteps_to_keep"] + clip_gradients = config["clip_gradients"] + + ## Parse Ray Config + batch_size = config["batch_size"] + num_epochs = config["num_epochs"] + learning_rate = config["learning_rate"] + weight_decay = config["weight_decay"] + noise_level = config["noise_level"] + + # Determines device, accelerator. + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # pylint: disable=no-member + + # Creates the necessary output directory. + os.makedirs(output_dir, exist_ok=True) + log_dir = os.path.join(output_dir, "logs") + os.makedirs(log_dir, exist_ok=True) + ckpt_dir = os.path.join(output_dir, "ckpts") + os.makedirs(ckpt_dir, exist_ok=True) + + # Initializes the model and its optimizer. + output_shape = [num_timesteps_to_keep, 2] + model = ImitativeModel(output_shape=output_shape).to(device) + optimizer = optim.Adam( + model.parameters(), + lr=learning_rate, + weight_decay=weight_decay, + ) + writer = TensorBoardLogger(log_dir=log_dir) + checkpointer = Checkpointer(model=model, ckpt_dir=ckpt_dir) + + def transform(batch: Mapping[str, types.Array]) -> Mapping[str, torch.Tensor]: + """Preprocesses a batch for the model. + + Args: + batch: (keyword arguments) The raw batch variables. + + Returns: + The processed batch. + """ + # Sends tensors to `device`. + batch = {key: tensor.to(device) for (key, tensor) in batch.items()} + # Preprocesses batch for the model. + batch = model.transform(batch) + return batch + + # Setups the dataset and the dataloader. + modalities = ( + "lidar", + "is_at_traffic_light", + "traffic_light_state", + "player_future", + "velocity", + ) + dataset_train = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "train"), + modalities=modalities, + ) + dataloader_train = torch.utils.data.DataLoader( + dataset_train, + batch_size=batch_size, + shuffle=True, + num_workers=2, + ) + dataset_val = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "val"), + modalities=modalities, + ) + dataloader_val = torch.utils.data.DataLoader( + dataset_val, + batch_size=batch_size * 5, + shuffle=True, + num_workers=2, + ) + + # Theoretical limit of NLL. + nll_limit = -torch.sum( # pylint: disable=no-member + D.MultivariateNormal( + loc=torch.zeros(output_shape[-2] * output_shape[-1]), # pylint: disable=no-member + scale_tril=torch.eye(output_shape[-2] * output_shape[-1]) * # pylint: disable=no-member + noise_level, # pylint: disable=no-member + ).log_prob(torch.zeros(output_shape[-2] * output_shape[-1]))) # pylint: disable=no-member + + def train_step( + model: ImitativeModel, + optimizer: optim.Optimizer, + batch: Mapping[str, torch.Tensor], + clip: bool = False, + ) -> torch.Tensor: + """Performs a single gradient-descent optimisation step.""" + # Resets optimizer's gradients. + optimizer.zero_grad() + + # Perturb target. + y = torch.normal( # pylint: disable=no-member + mean=batch["player_future"][..., :2], + std=torch.ones_like(batch["player_future"][..., :2]) * noise_level, # pylint: disable=no-member + ) + + # Forward pass from the model. + z = model._params( + velocity=batch["velocity"], + visual_features=batch["visual_features"], + is_at_traffic_light=batch["is_at_traffic_light"], + traffic_light_state=batch["traffic_light_state"], + ) + _, log_prob, logabsdet = model._decoder._inverse(y=y, z=z) + + # Calculates loss (NLL). + loss = -torch.mean(log_prob - logabsdet, dim=0) # pylint: disable=no-member + + # Backward pass. + loss.backward() + + # Clips gradients norm. + if clip: + torch.nn.utils.clip_grad_norm(model.parameters(), 1.0) + + # Performs a gradient descent step. + optimizer.step() + + return loss + + def train_epoch( + model: ImitativeModel, + optimizer: optim.Optimizer, + dataloader: torch.utils.data.DataLoader, + ) -> torch.Tensor: + """Performs an epoch of gradient descent optimization on `dataloader`.""" + model.train() + loss = 0.0 + with tqdm.tqdm(dataloader) as pbar: + for batch in pbar: + # Prepares the batch. + batch = transform(batch) + # Performs a gradien-descent step. + loss += train_step(model, optimizer, batch, clip=clip_gradients) + # Reporting loss to ray tune + tune.report(loss=loss) + + return loss / len(dataloader) + + def evaluate_step( + model: ImitativeModel, + batch: Mapping[str, torch.Tensor], + ) -> torch.Tensor: + """Evaluates `model` on a `batch`.""" + # Forward pass from the model. + z = model._params( + velocity=batch["velocity"], + visual_features=batch["visual_features"], + is_at_traffic_light=batch["is_at_traffic_light"], + traffic_light_state=batch["traffic_light_state"], + ) + _, log_prob, logabsdet = model._decoder._inverse( + y=batch["player_future"][..., :2], + z=z, + ) + + # Calculates loss (NLL). + loss = -torch.mean(log_prob - logabsdet, dim=0) # pylint: disable=no-member + + return loss + + def evaluate_epoch( + model: ImitativeModel, + dataloader: torch.utils.data.DataLoader, + ) -> torch.Tensor: + """Performs an evaluation of the `model` on the `dataloader.""" + model.eval() + loss = 0.0 + with tqdm.tqdm(dataloader) as pbar: + for batch in pbar: + # Prepares the batch. + batch = transform(batch) + # Accumulates loss in dataset. + with torch.no_grad(): + loss += evaluate_step(model, batch) + # Reporting loss to ray tune + tune.report(eval_loss=loss) + + return loss / len(dataloader) + + def write( + model: ImitativeModel, + dataloader: torch.utils.data.DataLoader, + writer: TensorBoardLogger, + split: str, + loss: torch.Tensor, + epoch: int, + ) -> None: + """Visualises model performance on `TensorBoard`.""" + # Gets a sample from the dataset. + batch = next(iter(dataloader)) + # Prepares the batch. + batch = transform(batch) + # Turns off gradients for model parameters. + for params in model.parameters(): + params.requires_grad = False + # Generates predictions. + predictions = model(num_steps=20, **batch) + # Turns on gradients for model parameters. + for params in model.parameters(): + params.requires_grad = True + # Logs on `TensorBoard`. + writer.log( + split=split, + loss=loss.detach().cpu().numpy().item(), + overhead_features=batch["visual_features"].detach().cpu().numpy()[:8], + predictions=predictions.detach().cpu().numpy()[:8], + ground_truth=batch["player_future"].detach().cpu().numpy()[:8], + global_step=epoch, + ) + + with tqdm.tqdm(range(num_epochs)) as pbar_epoch: + for epoch in pbar_epoch: + # Trains model on whole training dataset, and writes on `TensorBoard`. + loss_train = train_epoch(model, optimizer, dataloader_train) + write(model, dataloader_train, writer, "train", loss_train, epoch) + + # Evaluates model on whole validation dataset, and writes on `TensorBoard`. + loss_val = evaluate_epoch(model, dataloader_val) + write(model, dataloader_val, writer, "val", loss_val, epoch) + + # Checkpoints model weights. + if epoch % save_model_frequency == 0: + checkpointer.save(epoch) + + # Updates progress bar description. + pbar_epoch.set_description( + "TL: {:.2f} | VL: {:.2f} | THEORYMIN: {:.2f}".format( + loss_train.detach().cpu().numpy().item(), + loss_val.detach().cpu().numpy().item(), + nll_limit, + )) + + +def run_experiments(argv): + # Debugging purposes. + logging.debug(argv) + logging.debug(FLAGS) + + analysis = tune.run( + main, + loggers=[WandBLogger], + num_samples=1, + config={ + "monitor": True, + "wandb": { + "project": "oatomobile", + "monitor_gym": True, + }, + "dataset_dir": FLAGS.dataset_dir, + "output_dir": FLAGS.output_dir, + "save_model_frequency": FLAGS.save_model_frequency, + "num_timesteps_to_keep": FLAGS.num_timesteps_to_keep, + "clip_gradients": FLAGS.clip_gradients, + "batch_size": tune.grid_search(FLAGS.batch_size), + "num_epochs": tune.grid_search(FLAGS.num_epochs), + "learning_rate": tune.grid_search(FLAGS.learning_rate), + "weight_decay": tune.grid_search(FLAGS.weight_decay), + "noise_level": tune.grid_search([1e-1, 1e-2, 1e-3]), + }) + + print("Best config: ", analysis.get_best_config(metric="loss")) + + +if __name__ == "__main__": + flags.mark_flag_as_required("dataset_dir") + flags.mark_flag_as_required("output_dir") + flags.mark_flag_as_required("num_epochs") + app.run(run_experiments) From 227e82d705c9c6409bf65e86a6a40122606df367 Mon Sep 17 00:00:00 2001 From: AmrMKayid Date: Thu, 24 Sep 2020 00:07:34 +0200 Subject: [PATCH 3/8] Update setup.py --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 48d2153..04e037a 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,8 @@ "tabulate==0.8.7", "scikit-image==0.15.0", "dm-tree==0.1.5", + "ray==0.8.7", + "wandb==0.10.2", ], tests_require=[ "pytest", From a108e89d7110b5cc9edff31d30103a7b0236c5e6 Mon Sep 17 00:00:00 2001 From: AmrMKayid Date: Thu, 24 Sep 2020 10:30:34 +0200 Subject: [PATCH 4/8] Create tune script for CIL Agent --- oatomobile/baselines/torch/cil/tune.py | 325 +++++++++++++++++++++++++ oatomobile/baselines/torch/dim/tune.py | 2 +- 2 files changed, 326 insertions(+), 1 deletion(-) create mode 100644 oatomobile/baselines/torch/cil/tune.py diff --git a/oatomobile/baselines/torch/cil/tune.py b/oatomobile/baselines/torch/cil/tune.py new file mode 100644 index 0000000..f9bf35e --- /dev/null +++ b/oatomobile/baselines/torch/cil/tune.py @@ -0,0 +1,325 @@ +# Copyright 2020 The OATomobile Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Trains the behavioural cloning agent's model on expert demonstrations.""" + +import os +from typing import Mapping + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import tqdm +from absl import app +from absl import flags +from absl import logging +from ray import tune + +from oatomobile.baselines.torch.cil.model import BehaviouralModel +from oatomobile.datasets.carla import CARLADataset +from oatomobile.torch import types +from oatomobile.torch.loggers import TensorBoardLogger +from oatomobile.torch.savers import Checkpointer + +logging.set_verbosity(logging.DEBUG) +FLAGS = flags.FLAGS +flags.DEFINE_string( + name="dataset_dir", + default=None, + help="The full path to the processed dataset.", +) +flags.DEFINE_string( + name="output_dir", + default=None, + help="The full path to the output directory (for logs, ckpts).", +) +flags.DEFINE_integer( + name="batch_size", + default=[32, 64, 128, 256, 512, 1024], + help="The batch size used for training the neural network.", +) +flags.DEFINE_integer( + name="num_epochs", + default=None, + help="The number of training epochs for the neural network.", +) +flags.DEFINE_integer( + name="save_model_frequency", + default=4, + help="The number epochs between saves of the model.", +) +flags.DEFINE_float( + name="learning_rate", + default=[1e-1, 1e-2, 1e-3, 1e-4, 1e-5], + help="The ADAM learning rate.", +) +flags.DEFINE_integer( + name="num_timesteps_to_keep", + default=4, + help="The numbers of time-steps to keep from the target, with downsampling.", +) +flags.DEFINE_float( + name="weight_decay", + default=[0.0, 1e-2, 1e-3], + help="The L2 penalty (regularization) coefficient.", +) +flags.DEFINE_bool( + name="clip_gradients", + default=False, + help="If True it clips the gradients norm to 1.0.", +) + + +def main(config): + + # Parses config arguments. + dataset_dir = config["dataset_dir"] + output_dir = config["output_dir"] + save_model_frequency = config["save_model_frequency"] + num_timesteps_to_keep = config["num_timesteps_to_keep"] + clip_gradients = config["clip_gradients"] + + ## Parse Ray Config + batch_size = config["batch_size"] + num_epochs = config["num_epochs"] + learning_rate = config["learning_rate"] + weight_decay = config["weight_decay"] + noise_level = config["noise_level"] + + # Determines device, accelerator. + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # pylint: disable=no-member + + # Creates the necessary output directory. + os.makedirs(output_dir, exist_ok=True) + log_dir = os.path.join(output_dir, "logs") + os.makedirs(log_dir, exist_ok=True) + ckpt_dir = os.path.join(output_dir, "ckpts") + os.makedirs(ckpt_dir, exist_ok=True) + + # Initializes the model and its optimizer. + output_shape = [num_timesteps_to_keep, 2] + model = BehaviouralModel(output_shape=output_shape).to(device) + criterion = nn.L1Loss(reduction="none") + optimizer = optim.Adam( + model.parameters(), + lr=learning_rate, + weight_decay=weight_decay, + ) + writer = TensorBoardLogger(log_dir=log_dir) + checkpointer = Checkpointer(model=model, ckpt_dir=ckpt_dir) + + def transform(batch: Mapping[str, types.Array]) -> Mapping[str, torch.Tensor]: + """Preprocesses a batch for the model. + + Args: + batch: (keyword arguments) The raw batch variables. + + Returns: + The processed batch. + """ + # Sends tensors to `device`. + batch = {key: tensor.to(device) for (key, tensor) in batch.items()} + # Preprocesses batch for the model. + batch = model.transform(batch) + return batch + + # Setups the dataset and the dataloader. + modalities = ( + "lidar", + "is_at_traffic_light", + "traffic_light_state", + "player_future", + "velocity", + ) + dataset_train = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "train"), + modalities=modalities, + mode=True, + ) + dataloader_train = torch.utils.data.DataLoader( + dataset_train, + batch_size=batch_size, + shuffle=True, + num_workers=2, + ) + dataset_val = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "val"), + modalities=modalities, + mode=True, + ) + dataloader_val = torch.utils.data.DataLoader( + dataset_val, + batch_size=batch_size * 5, + shuffle=True, + num_workers=2, + ) + + def train_step( + model: BehaviouralModel, + optimizer: optim.Optimizer, + batch: Mapping[str, torch.Tensor], + clip: bool = False, + ) -> torch.Tensor: + """Performs a single gradient-descent optimisation step.""" + # Resets optimizer's gradients. + optimizer.zero_grad() + # Forward pass from the model. + predictions = model(**batch) + # Calculates loss. + loss = criterion(predictions, batch["player_future"][..., :2]) + loss = torch.sum(loss, dim=[-2, -1]) # pylint: disable=no-member + loss = torch.mean(loss, dim=0) # pylint: disable=no-member + # Backward pass. + loss.backward() + # Clips gradients norm. + if clip: + torch.nn.utils.clip_grad_norm(model.parameters(), 1.0) + # Performs a gradient descent step. + optimizer.step() + return loss + + def train_epoch( + model: BehaviouralModel, + optimizer: optim.Optimizer, + dataloader: torch.utils.data.DataLoader, + ) -> torch.Tensor: + """Performs an epoch of gradient descent optimization on `dataloader`.""" + model.train() + loss = 0.0 + with tqdm.tqdm(dataloader) as pbar: + for batch in pbar: + # Prepares the batch. + batch = transform(batch) + # Performs a gradien-descent step. + loss += train_step(model, optimizer, batch, clip=clip_gradients) + # Reporting loss to ray tune + tune.report(loss=loss) + + return loss / len(dataloader) + + def evaluate_step( + model: BehaviouralModel, + batch: Mapping[str, torch.Tensor], + ) -> torch.Tensor: + """Evaluates `model` on a `batch`.""" + # Forward pass from the model. + predictions = model(**batch) + # Calculates loss on mini-batch. + loss = criterion(predictions, batch["player_future"][..., :2]) + loss = torch.sum(loss, dim=[-2, -1]) # pylint: disable=no-member + loss = torch.mean(loss, dim=0) # pylint: disable=no-member + return loss + + def evaluate_epoch( + model: BehaviouralModel, + dataloader: torch.utils.data.DataLoader, + ) -> torch.Tensor: + """Performs an evaluation of the `model` on the `dataloader.""" + model.eval() + loss = 0.0 + with tqdm.tqdm(dataloader) as pbar: + for batch in pbar: + # Prepares the batch. + batch = transform(batch) + # Accumulates loss in dataset. + with torch.no_grad(): + loss += evaluate_step(model, batch) + # Reporting loss to ray tune + tune.report(eval_loss=loss) + + return loss / len(dataloader) + + def write( + model: BehaviouralModel, + dataloader: torch.utils.data.DataLoader, + writer: TensorBoardLogger, + split: str, + loss: torch.Tensor, + epoch: int, + ) -> None: + """Visualises model performance on `TensorBoard`.""" + # Gets a sample from the dataset. + batch = next(iter(dataloader)) + # Prepares the batch. + batch = transform(batch) + # Generates predictions. + with torch.no_grad(): + predictions = model(**batch) + # Logs on `TensorBoard`. + writer.log( + split=split, + loss=loss.detach().cpu().numpy().item(), + overhead_features=batch["visual_features"].detach().cpu().numpy()[:8], + predictions=predictions.detach().cpu().numpy()[:8], + ground_truth=batch["player_future"].detach().cpu().numpy()[:8], + global_step=epoch, + ) + + with tqdm.tqdm(range(num_epochs)) as pbar_epoch: + for epoch in pbar_epoch: + # Trains model on whole training dataset, and writes on `TensorBoard`. + loss_train = train_epoch(model, optimizer, dataloader_train) + write(model, dataloader_train, writer, "train", loss_train, epoch) + + # Evaluates model on whole validation dataset, and writes on `TensorBoard`. + loss_val = evaluate_epoch(model, dataloader_val) + write(model, dataloader_val, writer, "val", loss_val, epoch) + + # Checkpoints model weights. + if epoch % save_model_frequency == 0: + checkpointer.save(epoch) + + # Updates progress bar description. + pbar_epoch.set_description("TL: {:.2f} | VL: {:.2f}".format( + loss_train.detach().cpu().numpy().item(), + loss_val.detach().cpu().numpy().item(), + )) + + +def run_experiments(argv): + # Debugging purposes. + logging.debug(argv) + logging.debug(FLAGS) + + analysis = tune.run( + main, + loggers=[WandBLogger], + num_samples=1, + config={ + "monitor": True, + "wandb": { + "project": "oatomobile", + "monitor_gym": True, + }, + "dataset_dir": FLAGS.dataset_dir, + "output_dir": FLAGS.output_dir, + "save_model_frequency": FLAGS.save_model_frequency, + "num_timesteps_to_keep": FLAGS.num_timesteps_to_keep, + "clip_gradients": FLAGS.clip_gradients, + "batch_size": tune.grid_search(FLAGS.batch_size), + "num_epochs": tune.grid_search(FLAGS.num_epochs), + "learning_rate": tune.grid_search(FLAGS.learning_rate), + "weight_decay": tune.grid_search(FLAGS.weight_decay), + "noise_level": tune.grid_search([1e-1, 1e-2, 1e-3]), + }) + + print("Best config: ", analysis.get_best_config(metric="loss")) + + +if __name__ == "__main__": + flags.mark_flag_as_required("dataset_dir") + flags.mark_flag_as_required("output_dir") + flags.mark_flag_as_required("num_epochs") + app.run(run_experiments) diff --git a/oatomobile/baselines/torch/dim/tune.py b/oatomobile/baselines/torch/dim/tune.py index 72a80fe..71cbd02 100644 --- a/oatomobile/baselines/torch/dim/tune.py +++ b/oatomobile/baselines/torch/dim/tune.py @@ -86,7 +86,7 @@ def main(config): - # Parses command line arguments. + # Parses config arguments. dataset_dir = config["dataset_dir"] output_dir = config["output_dir"] save_model_frequency = config["save_model_frequency"] From 8bd2948cd8b52750133c425a891a48a48a60ec0a Mon Sep 17 00:00:00 2001 From: AmrMKayid Date: Thu, 24 Sep 2020 10:58:22 +0200 Subject: [PATCH 5/8] Update flags type --- oatomobile/baselines/torch/cil/tune.py | 6 +++--- oatomobile/baselines/torch/dim/tune.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/oatomobile/baselines/torch/cil/tune.py b/oatomobile/baselines/torch/cil/tune.py index f9bf35e..3b3579a 100644 --- a/oatomobile/baselines/torch/cil/tune.py +++ b/oatomobile/baselines/torch/cil/tune.py @@ -45,7 +45,7 @@ default=None, help="The full path to the output directory (for logs, ckpts).", ) -flags.DEFINE_integer( +flags.DEFINE_list( name="batch_size", default=[32, 64, 128, 256, 512, 1024], help="The batch size used for training the neural network.", @@ -60,7 +60,7 @@ default=4, help="The number epochs between saves of the model.", ) -flags.DEFINE_float( +flags.DEFINE_list( name="learning_rate", default=[1e-1, 1e-2, 1e-3, 1e-4, 1e-5], help="The ADAM learning rate.", @@ -70,7 +70,7 @@ default=4, help="The numbers of time-steps to keep from the target, with downsampling.", ) -flags.DEFINE_float( +flags.DEFINE_list( name="weight_decay", default=[0.0, 1e-2, 1e-3], help="The L2 penalty (regularization) coefficient.", diff --git a/oatomobile/baselines/torch/dim/tune.py b/oatomobile/baselines/torch/dim/tune.py index 71cbd02..79cebaa 100644 --- a/oatomobile/baselines/torch/dim/tune.py +++ b/oatomobile/baselines/torch/dim/tune.py @@ -47,7 +47,7 @@ default=None, help="The full path to the output directory (for logs, ckpts).", ) -flags.DEFINE_integer( +flags.DEFINE_list( name="batch_size", default=[32, 64, 128, 256, 512, 1024], help="The batch size used for training the neural network.", @@ -62,7 +62,7 @@ default=4, help="The number epochs between saves of the model.", ) -flags.DEFINE_float( +flags.DEFINE_list( name="learning_rate", default=[1e-1, 1e-2, 1e-3, 1e-4, 1e-5], help="The ADAM learning rate.", @@ -72,7 +72,7 @@ default=4, help="The numbers of time-steps to keep from the target, with downsampling.", ) -flags.DEFINE_float( +flags.DEFINE_list( name="weight_decay", default=[0.0, 1e-2, 1e-3], help="The L2 penalty (regularization) coefficient.", From b3a8ed1dce61cd83ebb7225399ee9c6f87158d7b Mon Sep 17 00:00:00 2001 From: AmrMKayid Date: Fri, 25 Sep 2020 00:41:48 +0200 Subject: [PATCH 6/8] Fix num_epochs flag type --- oatomobile/baselines/torch/cil/tune.py | 2 +- oatomobile/baselines/torch/dim/tune.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/oatomobile/baselines/torch/cil/tune.py b/oatomobile/baselines/torch/cil/tune.py index 3b3579a..964bbd3 100644 --- a/oatomobile/baselines/torch/cil/tune.py +++ b/oatomobile/baselines/torch/cil/tune.py @@ -50,7 +50,7 @@ default=[32, 64, 128, 256, 512, 1024], help="The batch size used for training the neural network.", ) -flags.DEFINE_integer( +flags.DEFINE_list( name="num_epochs", default=None, help="The number of training epochs for the neural network.", diff --git a/oatomobile/baselines/torch/dim/tune.py b/oatomobile/baselines/torch/dim/tune.py index 79cebaa..92df078 100644 --- a/oatomobile/baselines/torch/dim/tune.py +++ b/oatomobile/baselines/torch/dim/tune.py @@ -52,7 +52,7 @@ default=[32, 64, 128, 256, 512, 1024], help="The batch size used for training the neural network.", ) -flags.DEFINE_integer( +flags.DEFINE_list( name="num_epochs", default=None, help="The number of training epochs for the neural network.", From 3fd34b06595a76bc9547ac8b136606e5df0c0097 Mon Sep 17 00:00:00 2001 From: AmrMKayid Date: Sun, 27 Sep 2020 16:43:42 +0200 Subject: [PATCH 7/8] Load data once inside run_experiments method --- oatomobile/baselines/torch/cil/tune.py | 107 +++++++++++++++---------- oatomobile/baselines/torch/dim/tune.py | 102 ++++++++++++++--------- 2 files changed, 127 insertions(+), 82 deletions(-) diff --git a/oatomobile/baselines/torch/cil/tune.py b/oatomobile/baselines/torch/cil/tune.py index 964bbd3..4e7fc2c 100644 --- a/oatomobile/baselines/torch/cil/tune.py +++ b/oatomobile/baselines/torch/cil/tune.py @@ -32,6 +32,7 @@ from oatomobile.torch import types from oatomobile.torch.loggers import TensorBoardLogger from oatomobile.torch.savers import Checkpointer +from oatomobile.utils.loggers import WandBLogger logging.set_verbosity(logging.DEBUG) FLAGS = flags.FLAGS @@ -52,7 +53,7 @@ ) flags.DEFINE_list( name="num_epochs", - default=None, + default=[1024], help="The number of training epochs for the neural network.", ) flags.DEFINE_integer( @@ -135,36 +136,9 @@ def transform(batch: Mapping[str, types.Array]) -> Mapping[str, torch.Tensor]: batch = model.transform(batch) return batch - # Setups the dataset and the dataloader. - modalities = ( - "lidar", - "is_at_traffic_light", - "traffic_light_state", - "player_future", - "velocity", - ) - dataset_train = CARLADataset.as_torch( - dataset_dir=os.path.join(dataset_dir, "train"), - modalities=modalities, - mode=True, - ) - dataloader_train = torch.utils.data.DataLoader( - dataset_train, - batch_size=batch_size, - shuffle=True, - num_workers=2, - ) - dataset_val = CARLADataset.as_torch( - dataset_dir=os.path.join(dataset_dir, "val"), - modalities=modalities, - mode=True, - ) - dataloader_val = torch.utils.data.DataLoader( - dataset_val, - batch_size=batch_size * 5, - shuffle=True, - num_workers=2, - ) + ## Data Loader + dataloader_train = config["dataloader_train"] + dataloader_val = config["dataloader_val"] def train_step( model: BehaviouralModel, @@ -293,27 +267,76 @@ def run_experiments(argv): logging.debug(argv) logging.debug(FLAGS) + # Parses command line arguments. + dataset_dir = FLAGS.dataset_dir + output_dir = FLAGS.output_dir + batch_size = FLAGS.batch_size + num_epochs = FLAGS.num_epochs + learning_rate = FLAGS.learning_rate + save_model_frequency = FLAGS.save_model_frequency + num_timesteps_to_keep = FLAGS.num_timesteps_to_keep + weight_decay = FLAGS.weight_decay + clip_gradients = FLAGS.clip_gradients + noise_level = [1e-1, 1e-2, 1e-3] + + # Setups the dataset and the dataloader. + modalities = ( + "lidar", + "is_at_traffic_light", + "traffic_light_state", + "player_future", + "velocity", + ) + dataset_train = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "train"), + modalities=modalities, + mode=True, + ) + dataloader_train = torch.utils.data.DataLoader( + dataset_train, + batch_size=batch_size, + shuffle=True, + num_workers=2, + ) + dataset_val = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "val"), + modalities=modalities, + mode=True, + ) + dataloader_val = torch.utils.data.DataLoader( + dataset_val, + batch_size=batch_size * 5, + shuffle=True, + num_workers=2, + ) + analysis = tune.run( main, loggers=[WandBLogger], num_samples=1, + resources_per_trial={ + 'gpu': torch.cuda.device_count(), + }, config={ "monitor": True, "wandb": { "project": "oatomobile", "monitor_gym": True, }, - "dataset_dir": FLAGS.dataset_dir, - "output_dir": FLAGS.output_dir, - "save_model_frequency": FLAGS.save_model_frequency, - "num_timesteps_to_keep": FLAGS.num_timesteps_to_keep, - "clip_gradients": FLAGS.clip_gradients, - "batch_size": tune.grid_search(FLAGS.batch_size), - "num_epochs": tune.grid_search(FLAGS.num_epochs), - "learning_rate": tune.grid_search(FLAGS.learning_rate), - "weight_decay": tune.grid_search(FLAGS.weight_decay), - "noise_level": tune.grid_search([1e-1, 1e-2, 1e-3]), - }) + "dataset_dir": dataset_dir, + "output_dir": output_dir, + "dataloader_train": dataloader_train, + "dataloader_val": dataloader_val, + "save_model_frequency": save_model_frequency, + "num_timesteps_to_keep": num_timesteps_to_keep, + "clip_gradients": clip_gradients, + "batch_size": tune.grid_search(batch_size), + "num_epochs": tune.grid_search(num_epochs), + "learning_rate": tune.grid_search(learning_rate), + "weight_decay": tune.grid_search(weight_decay), + "noise_level": tune.grid_search(noise_level), + }, + ) print("Best config: ", analysis.get_best_config(metric="loss")) diff --git a/oatomobile/baselines/torch/dim/tune.py b/oatomobile/baselines/torch/dim/tune.py index 92df078..088c953 100644 --- a/oatomobile/baselines/torch/dim/tune.py +++ b/oatomobile/baselines/torch/dim/tune.py @@ -54,7 +54,7 @@ ) flags.DEFINE_list( name="num_epochs", - default=None, + default=[1024], help="The number of training epochs for the neural network.", ) flags.DEFINE_integer( @@ -136,34 +136,9 @@ def transform(batch: Mapping[str, types.Array]) -> Mapping[str, torch.Tensor]: batch = model.transform(batch) return batch - # Setups the dataset and the dataloader. - modalities = ( - "lidar", - "is_at_traffic_light", - "traffic_light_state", - "player_future", - "velocity", - ) - dataset_train = CARLADataset.as_torch( - dataset_dir=os.path.join(dataset_dir, "train"), - modalities=modalities, - ) - dataloader_train = torch.utils.data.DataLoader( - dataset_train, - batch_size=batch_size, - shuffle=True, - num_workers=2, - ) - dataset_val = CARLADataset.as_torch( - dataset_dir=os.path.join(dataset_dir, "val"), - modalities=modalities, - ) - dataloader_val = torch.utils.data.DataLoader( - dataset_val, - batch_size=batch_size * 5, - shuffle=True, - num_workers=2, - ) + ## Data Loader + dataloader_train = config["dataloader_train"] + dataloader_val = config["dataloader_val"] # Theoretical limit of NLL. nll_limit = -torch.sum( # pylint: disable=no-member @@ -332,27 +307,74 @@ def run_experiments(argv): logging.debug(argv) logging.debug(FLAGS) + # Parses command line arguments. + dataset_dir = FLAGS.dataset_dir + output_dir = FLAGS.output_dir + batch_size = FLAGS.batch_size + num_epochs = FLAGS.num_epochs + learning_rate = FLAGS.learning_rate + save_model_frequency = FLAGS.save_model_frequency + num_timesteps_to_keep = FLAGS.num_timesteps_to_keep + weight_decay = FLAGS.weight_decay + clip_gradients = FLAGS.clip_gradients + noise_level = [1e-1, 1e-2, 1e-3] + + # Setups the dataset and the dataloader. + modalities = ( + "lidar", + "is_at_traffic_light", + "traffic_light_state", + "player_future", + "velocity", + ) + dataset_train = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "train"), + modalities=modalities, + ) + dataloader_train = torch.utils.data.DataLoader( + dataset_train, + batch_size=batch_size, + shuffle=True, + num_workers=2, + ) + dataset_val = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "val"), + modalities=modalities, + ) + dataloader_val = torch.utils.data.DataLoader( + dataset_val, + batch_size=batch_size * 5, + shuffle=True, + num_workers=2, + ) + analysis = tune.run( main, loggers=[WandBLogger], num_samples=1, + resources_per_trial={ + 'gpu': torch.cuda.device_count(), + }, config={ "monitor": True, "wandb": { "project": "oatomobile", "monitor_gym": True, }, - "dataset_dir": FLAGS.dataset_dir, - "output_dir": FLAGS.output_dir, - "save_model_frequency": FLAGS.save_model_frequency, - "num_timesteps_to_keep": FLAGS.num_timesteps_to_keep, - "clip_gradients": FLAGS.clip_gradients, - "batch_size": tune.grid_search(FLAGS.batch_size), - "num_epochs": tune.grid_search(FLAGS.num_epochs), - "learning_rate": tune.grid_search(FLAGS.learning_rate), - "weight_decay": tune.grid_search(FLAGS.weight_decay), - "noise_level": tune.grid_search([1e-1, 1e-2, 1e-3]), - }) + "dataset_dir": dataset_dir, + "output_dir": output_dir, + "dataloader_train": dataloader_train, + "dataloader_val": dataloader_val, + "save_model_frequency": save_model_frequency, + "num_timesteps_to_keep": num_timesteps_to_keep, + "clip_gradients": clip_gradients, + "batch_size": tune.grid_search(batch_size), + "num_epochs": tune.grid_search(num_epochs), + "learning_rate": tune.grid_search(learning_rate), + "weight_decay": tune.grid_search(weight_decay), + "noise_level": tune.grid_search(noise_level), + }, + ) print("Best config: ", analysis.get_best_config(metric="loss")) From 5b55f4ae26ac65169ca3c230935ae39b033c5c2f Mon Sep 17 00:00:00 2001 From: AmrMKayid Date: Mon, 28 Sep 2020 17:00:52 +0200 Subject: [PATCH 8/8] Revert data loading back to main method Experimenting with multiple batch_sizes --- oatomobile/baselines/torch/cil/tune.py | 66 ++++++++++++-------------- oatomobile/baselines/torch/dim/tune.py | 62 +++++++++++------------- 2 files changed, 58 insertions(+), 70 deletions(-) diff --git a/oatomobile/baselines/torch/cil/tune.py b/oatomobile/baselines/torch/cil/tune.py index 4e7fc2c..3e94082 100644 --- a/oatomobile/baselines/torch/cil/tune.py +++ b/oatomobile/baselines/torch/cil/tune.py @@ -136,9 +136,36 @@ def transform(batch: Mapping[str, types.Array]) -> Mapping[str, torch.Tensor]: batch = model.transform(batch) return batch - ## Data Loader - dataloader_train = config["dataloader_train"] - dataloader_val = config["dataloader_val"] + # Setups the dataset and the dataloader. + modalities = ( + "lidar", + "is_at_traffic_light", + "traffic_light_state", + "player_future", + "velocity", + ) + dataset_train = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "train"), + modalities=modalities, + mode=True, + ) + dataloader_train = torch.utils.data.DataLoader( + dataset_train, + batch_size=batch_size, + shuffle=True, + num_workers=2, + ) + dataset_val = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "val"), + modalities=modalities, + mode=True, + ) + dataloader_val = torch.utils.data.DataLoader( + dataset_val, + batch_size=batch_size * 5, + shuffle=True, + num_workers=2, + ) def train_step( model: BehaviouralModel, @@ -279,37 +306,6 @@ def run_experiments(argv): clip_gradients = FLAGS.clip_gradients noise_level = [1e-1, 1e-2, 1e-3] - # Setups the dataset and the dataloader. - modalities = ( - "lidar", - "is_at_traffic_light", - "traffic_light_state", - "player_future", - "velocity", - ) - dataset_train = CARLADataset.as_torch( - dataset_dir=os.path.join(dataset_dir, "train"), - modalities=modalities, - mode=True, - ) - dataloader_train = torch.utils.data.DataLoader( - dataset_train, - batch_size=batch_size, - shuffle=True, - num_workers=2, - ) - dataset_val = CARLADataset.as_torch( - dataset_dir=os.path.join(dataset_dir, "val"), - modalities=modalities, - mode=True, - ) - dataloader_val = torch.utils.data.DataLoader( - dataset_val, - batch_size=batch_size * 5, - shuffle=True, - num_workers=2, - ) - analysis = tune.run( main, loggers=[WandBLogger], @@ -325,8 +321,6 @@ def run_experiments(argv): }, "dataset_dir": dataset_dir, "output_dir": output_dir, - "dataloader_train": dataloader_train, - "dataloader_val": dataloader_val, "save_model_frequency": save_model_frequency, "num_timesteps_to_keep": num_timesteps_to_keep, "clip_gradients": clip_gradients, diff --git a/oatomobile/baselines/torch/dim/tune.py b/oatomobile/baselines/torch/dim/tune.py index 088c953..3de0520 100644 --- a/oatomobile/baselines/torch/dim/tune.py +++ b/oatomobile/baselines/torch/dim/tune.py @@ -136,9 +136,34 @@ def transform(batch: Mapping[str, types.Array]) -> Mapping[str, torch.Tensor]: batch = model.transform(batch) return batch - ## Data Loader - dataloader_train = config["dataloader_train"] - dataloader_val = config["dataloader_val"] + # Setups the dataset and the dataloader. + modalities = ( + "lidar", + "is_at_traffic_light", + "traffic_light_state", + "player_future", + "velocity", + ) + dataset_train = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "train"), + modalities=modalities, + ) + dataloader_train = torch.utils.data.DataLoader( + dataset_train, + batch_size=batch_size, + shuffle=True, + num_workers=2, + ) + dataset_val = CARLADataset.as_torch( + dataset_dir=os.path.join(dataset_dir, "val"), + modalities=modalities, + ) + dataloader_val = torch.utils.data.DataLoader( + dataset_val, + batch_size=batch_size * 5, + shuffle=True, + num_workers=2, + ) # Theoretical limit of NLL. nll_limit = -torch.sum( # pylint: disable=no-member @@ -319,35 +344,6 @@ def run_experiments(argv): clip_gradients = FLAGS.clip_gradients noise_level = [1e-1, 1e-2, 1e-3] - # Setups the dataset and the dataloader. - modalities = ( - "lidar", - "is_at_traffic_light", - "traffic_light_state", - "player_future", - "velocity", - ) - dataset_train = CARLADataset.as_torch( - dataset_dir=os.path.join(dataset_dir, "train"), - modalities=modalities, - ) - dataloader_train = torch.utils.data.DataLoader( - dataset_train, - batch_size=batch_size, - shuffle=True, - num_workers=2, - ) - dataset_val = CARLADataset.as_torch( - dataset_dir=os.path.join(dataset_dir, "val"), - modalities=modalities, - ) - dataloader_val = torch.utils.data.DataLoader( - dataset_val, - batch_size=batch_size * 5, - shuffle=True, - num_workers=2, - ) - analysis = tune.run( main, loggers=[WandBLogger], @@ -363,8 +359,6 @@ def run_experiments(argv): }, "dataset_dir": dataset_dir, "output_dir": output_dir, - "dataloader_train": dataloader_train, - "dataloader_val": dataloader_val, "save_model_frequency": save_model_frequency, "num_timesteps_to_keep": num_timesteps_to_keep, "clip_gradients": clip_gradients,