Skip to content

Latest commit

 

History

History
160 lines (111 loc) · 5.69 KB

File metadata and controls

160 lines (111 loc) · 5.69 KB

(optional) GPU instance

Use Deep Learning AMI (Ubuntu 18.04) Version 40.0 AMI

  • on us-west-2, ami-084f81625fbc98fa4
  • additional disk may be required for data

Once logged in

# update conda to the latest 
conda update -n base conda 

conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch

Installation

Dependency repo

# clone dependency repo on the same levels as this repo and cd into this repo

# setup environment
conda create -n hpt python=3.7 ipython
conda activate hpt

# NOTE: if you are not using CUDA 10.2, you need to change the 10.2 in this command appropriately. Make sure to use torch 1.6.0
# (check CUDA version with e.g. `cat /usr/local/cuda/version.txt`)
# latest 
conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch

# 1.6 torch (no support for torchvision transform on tensor)
conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch
#colorado machine 
conda install pytorch==1.2.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch

# install local submodules
cd OpenSelfSup
pip install -v -e .

Data installation

Installing and setting up all 16 datsets is a bit of work, so this tutorial shows how to install and setup RESISC-45, and provides links to repeat those steps with other datasets.

RESISC-45

RESISC-45 contains 31,500 aerial images, covering 45 scene classes with 700 images in each class.

# cd to the directory where you want the data, $DATA
wget -q https://bit.ly/3pfkHYp -O resisc45.tar.gz
md5sum resisc45.tar.gz  # this should be 964dafcfa2dff0402d0772514fb4540b
tar xf resisc45.tar.gz

mkdir ~/data 
mv resisc45 ~/data 

# replace/set $DATA and $CODE as appropriate 
# e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/hpt/OpenSelfSup/data/resisc45/all
ln -s $DATA/resisc45 $CODE/OpenSelfSup/data/resisc45/all

e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/hpt/OpenSelfSup/data/resisc45/all

Download Pretrained Models

cd OpenSelfSup/data/basetrain_chkpts/
./download-pretrained-models.sh

Verify Install With RESISC DataSet

OpenSelfSup

Check installation by pretraining using mocov2, extracting the model weights, evaluating the representations, and then viewing the results on tensorboard or wandb:

export WANDB_API_KEY=<use your API key>
export WANDB_ENTITY=cal-capstone
export WANDB_PROJECT=hpt2
#export WANDB_MODE=dryrun

cd OpenSelfSup

# Sanity check with single train and single epoch 
CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep.py --debug 

CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh  /scratch/crguest/OpenSelfSup/configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py --work_dir work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep/ --debug

# Sanity check: MoCo for 20 epoch on 4 gpus
./tools/dist_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep.py 4

# if debugging, use 
tools/train.py configs/selfsup/moco/r50_v2_resisc_in_basetrain_1ep.py --work_dir work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_1ep/ --debug

# make some variables so its clear what's happening
CHECKPOINT=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20.pth
BACKBONE=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20_moco_in_basetrain.pth
# Extract the backbone
python tools/extract_backbone_weights.py ${CHECKPOINT} ${BACKBONE}

# Evaluate the representations
./benchmarks/dist_train_linear.sh configs/benchmarks/linear_classification/resisc45/r50_last.py ${BACKBONE}

# View the results (optional if wandb is not configured)
cd work_dirs
# you may need to install tensorboard
tensorboard --logdir .

Verify Install With SEN12MS Dataset

OpenSelfSup

Check installation by pretraining using mocov2, extracting the model weights, evaluating the representations, and then viewing the results on tensorboard or wandb:

export WANDB_API_KEY=<use your API key>
export WANDB_ENTITY=cal-capstone
export WANDB_PROJECT=hpt2

cd OpenSelfSup

# single GPU training 
CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py --debug

CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py --debug


# command for remote debugging, use full path
python /scratch/crguest/OpenSelfSup/tools/train.py /scratch/crguest/OpenSelfSup/configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py --debug

CUDA_VISIBLE_DEVICES=1 python ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py --debug

# Sanity check: MoCo for 20 epoch on 4 gpus
#CUDA_VISIBLE_DEVICES=0,1,2,3 
CUDA_VISIBLE_DEVICES=1 ./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py 4

# distributed training 
#CUDA_VISIBLE_DEVICES=0,1,2,3 
./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py 4

BACKBONE=work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep/epoch_20_moco_in_baseline.pth
# method 1: from working dir
CHECKPOINT=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20.pth
# method 2: from W&B, {projectid}/{W&B run id}
CHECKPOINT=hpt2/3l4yg63k  

# Extract the backbone
python tools/extract_backbone_weights.py ${BACKBONE} ${CHECKPOINT} 

# Evaluate the representations
./benchmarks/dist_train_linear.sh configs/benchmarks/linear_classification/resisc45/r50_last.py ${BACKBONE}