BMIRDS · ntomita · Jan 19, 2024 · Jan 24, 2024 · Jan 24, 2024 · Jan 24, 2024
diff --git a/MaskHIT/configs/config_default.yaml b/MaskHIT/configs/config_default.yaml
diff --git a/requirement.sh → MaskHIT/install_requirement.sh b/requirement.sh → MaskHIT/install_requirement.sh
diff --git a/install_requirement_for_container.sh → MaskHIT/install_requirement_for_container.sh b/install_requirement_for_container.sh → MaskHIT/install_requirement_for_container.sh
@@ -2,7 +2,7 @@
 pip install openslide-python
 
 # install additional packages
-pip install pandarallel pandas scikit-image scikit-learn einops tqdm lifelines pyyaml
+pip install pandarallel pandas scikit-image scikit-learn einops tqdm lifelines pyyaml seaborn
 pip install git+https://github.com/ildoonet/pytorch-gradual-warmup-lr.git
 pip install opencv-python
 # When you see: AttributeError: module 'cv2.dnn' has no attribute 'DictValue'

diff --git a/MaskHIT/.gitignore → MaskHIT/maskhit/.gitignore b/MaskHIT/.gitignore → MaskHIT/maskhit/.gitignore
diff --git a/MaskHIT/README.md → MaskHIT/maskhit/README.md b/MaskHIT/README.md → MaskHIT/maskhit/README.md
diff --git a/MaskHIT/model/archs/vit/__init__.py → MaskHIT/maskhit/__init__.py b/MaskHIT/model/archs/vit/__init__.py → MaskHIT/maskhit/__init__.py
diff --git a/MaskHIT/maskhit/configs/config_default.yaml b/MaskHIT/maskhit/configs/config_default.yaml
@@ -0,0 +1,137 @@
+# Dataset and Model Configuration File
+# This configuration file sets parameters for dataset preprocessing and model training.
+# Modify values in config_user.yaml to override defaults.
+
+# NOTE: To override any values in this file, please create config_user.yaml 
+# (or any YAML file name of you like).
+# In config_user.yaml, only define the variables you wish to update. 
+# Unmentioned variables will use the default values specified here.
+# Please avoid directly modifying values in this file.
+#
+# Documentation Tags:
+#  (default): variables can be left with their default values
+#  (custom): more likely to need modification for each user
+#---------------------------------------------------
+
+### DATASET CONFIGURATION ###
+dataset:
+  ## Dataset configuration
+
+  # Path to the file (svs_meta.pickle) containing meta info about svs data
+  # File is generated by SlidePrep/MaskHIT_Prep/05_post_process.py
+  meta_svs: !!str <SET_YOUR_OWN_VALUE>
+
+  # Path to the file (meta.pickle) containing meta info about the dataset
+  # File is generated by SlidePrep/MaskHIT_Prep/01_get_svs_meta.py
+  meta_all: !!str <SET_YOUR_OWN_VALUE>
+
+  # Outcome our model is trying to predict
+  # Example: "Dx (U=UC, C=Cr, I=Ind)"
+  #TODO: Not sure what this is meant. In what format? If this is for classification, clarify.
+  outcome: !!str <SET_YOUR_OWN_VALUE>
+
+  # classification type
+  # Available options: survival, classification, regression
+  outcome_type: !!str classification
+
+  # the name of the study
+  #TODO: suggest to rename to 'study_name' for clarity.
+  study: !!str <SET_YOUR_OWN_VALUE>
+
+  # type of disease; whether it is cancer or not
+  #TODO: Add doc to discuss why this is matter. If necessary, we should rename.
+  is_cancer: !!bool False
+
+  # title of project/disease name
+  #TODO: what's the difference between this and 'study'
+  #This seems to be used for folder value:??
+  #        `meta_svs['folder'] = config.dataset.disease`
+  #Better rename or explain the main intent of this parameter.
+  disease: !!str <SET_YOUR_OWN_VALUE>
+
+  # names of classes in your dataset
+  #TODO: need doc and example.
+  classes: !!str <SET_YOUR_OWN_VALUE>
+
+  # Number of folds for nested cross-validation
+  num_folds: !!int 5
+
+patch:
+  ## Patch configuration
+
+  # number of patches from each region. If 0 will sample all patches
+  num_patches: !!int 0
+
+  # magnification level at which patches were extracted at
+  #TODO: Why manually setting this again? Or can we specify a config file from Prep to extract this info?
+  magnification: !!int 10
+
+  # intensity of weight decay
+  #TODO: Why this is under patch section?
+  wd: !!float 0.01
+
+model:
+  ## Model configuration
+
+  # used for uneven class distribution
+  weighted_loss: !!bool False
+
+  # learning rate
+  lr: !!float 1e-5
+
+  # Dropout rate
+  dropout: !!float 0.2
+
+  # Batch size for processing slide patches
+  batch_size: !!int 16
+
+  # which fold to use after kfold cross validation
+  #TODO: Not sure what this means. Also suggest to change variable name to be more descriptive.
+  fold: !!int 0
+
+  # determines whether old logs should stay
+  override_logs: !!bool True
+
+  # number of svs sampled in sample-patient mode
+  regions_per_svs: !!int 64
+
+  #TODO: Copied from config_ibd_train.yaml. Need to double check the default values.
+  # Weight Decays
+  wd_attn: !!float 1e-3
+  wd_fuse: !!float 1e-2 # changed from 1e-3 to 1e-2
+  wd_loss: !!float 1e-2 # changed from 1e-3 to 1e-2
+  wd_pred: !!float 0.002
+
+  #TODO: Copied from config_ibd_train.yaml. Need to double check the default values.
+  # Learning Rates
+  lr_attn: !!float 1e-5 # lowered since we are using pre-trained model
+  lr_fuse: !!float 1e-4
+  lr_loss: !!float 1e-4
+  lr_pred: !!float 7e-4
+
+  #TODO: Copied from config_ibd_train.yaml. Need to double check the default values.
+  #Not sure what this is for, as in the code it chooses a measure based on outcome_type?
+  performance_measure: !!str f1
+
+  #TODO: Copied from config_ibd_train.yaml. Add doc.
+  accumulation_steps: !!int 1
+
+  #TODO: Copied from config_ibd_train.yaml. Need to double check the default values.
+  dropout: !!float 0.2
+
+  #TODO: Copied from config_ibd_train.yaml. Need to double check the default values.
+  #TODO: What is this for? and what's # for visualization (64)
+  batch_size: !!int 16 # for visualization (64)
+
+  #TODO: Copied from config_ibd_train.yaml. Maybe: overwrite_logs 
+  override_logs: !!bool True
+
+  # TBD
+  #TODO: Missing docs.
+  sample_patient: !!bool True
+
+  # Check-point path?
+  #TODO: Missing docs.
+  resume: null
+
+
diff --git a/.../configs/config_default_visualization.yml → .../configs/config_default_visualization.yml b/.../configs/config_default_visualization.yml → .../configs/config_default_visualization.yml
diff --git a/MaskHIT/configs/config_ibd_train.yml → MaskHIT/maskhit/configs/config_ibd_train.yml b/MaskHIT/configs/config_ibd_train.yml → MaskHIT/maskhit/configs/config_ibd_train.yml
diff --git a/MaskHIT/configs/config_ibd_visualization.yml → ...khit/configs/config_ibd_visualization.yml b/MaskHIT/configs/config_ibd_visualization.yml → ...khit/configs/config_ibd_visualization.yml
diff --git a/MaskHIT/configs/config_tcga.yml → MaskHIT/maskhit/configs/config_tcga.yml b/MaskHIT/configs/config_tcga.yml → MaskHIT/maskhit/configs/config_tcga.yml
diff --git a/MaskHIT/create_attention_maps.py → MaskHIT/maskhit/create_attention_maps.py b/MaskHIT/create_attention_maps.py → MaskHIT/maskhit/create_attention_maps.py
diff --git a/MaskHIT/cross_validation.py → MaskHIT/maskhit/cross_validation.py b/MaskHIT/cross_validation.py → MaskHIT/maskhit/cross_validation.py
diff --git a/MaskHIT/model/.gitignore → MaskHIT/maskhit/model/.gitignore b/MaskHIT/model/.gitignore → MaskHIT/maskhit/model/.gitignore
diff --git a/MaskHIT/maskhit/model/__init__.py b/MaskHIT/maskhit/model/__init__.py
diff --git a/MaskHIT/model/archs/__init__.py → MaskHIT/maskhit/model/archs/__init__.py b/MaskHIT/model/archs/__init__.py → MaskHIT/maskhit/model/archs/__init__.py
diff --git a/MaskHIT/model/archs/agg_ap.py → MaskHIT/maskhit/model/archs/agg_ap.py b/MaskHIT/model/archs/agg_ap.py → MaskHIT/maskhit/model/archs/agg_ap.py
diff --git a/MaskHIT/model/archs/agg_attn.py → MaskHIT/maskhit/model/archs/agg_attn.py b/MaskHIT/model/archs/agg_attn.py → MaskHIT/maskhit/model/archs/agg_attn.py
diff --git a/MaskHIT/model/archs/agg_deepattnmisl.py → ...T/maskhit/model/archs/agg_deepattnmisl.py b/MaskHIT/model/archs/agg_deepattnmisl.py → ...T/maskhit/model/archs/agg_deepattnmisl.py
diff --git a/MaskHIT/model/archs/agg_mhattn.py → MaskHIT/maskhit/model/archs/agg_mhattn.py b/MaskHIT/model/archs/agg_mhattn.py → MaskHIT/maskhit/model/archs/agg_mhattn.py
diff --git a/MaskHIT/model/archs/agg_vit.py → MaskHIT/maskhit/model/archs/agg_vit.py b/MaskHIT/model/archs/agg_vit.py → MaskHIT/maskhit/model/archs/agg_vit.py
diff --git a/MaskHIT/model/archs/utils/__init__.py → ...HIT/maskhit/model/archs/utils/__init__.py b/MaskHIT/model/archs/utils/__init__.py → ...HIT/maskhit/model/archs/utils/__init__.py
diff --git a/...IT/model/archs/utils/masking_generator.py → ...it/model/archs/utils/masking_generator.py b/...IT/model/archs/utils/masking_generator.py → ...it/model/archs/utils/masking_generator.py
diff --git a/MaskHIT/maskhit/model/archs/vit/__init__.py b/MaskHIT/maskhit/model/archs/vit/__init__.py
diff --git a/MaskHIT/model/archs/vit/deepvit.py → MaskHIT/maskhit/model/archs/vit/deepvit.py b/MaskHIT/model/archs/vit/deepvit.py → MaskHIT/maskhit/model/archs/vit/deepvit.py
diff --git a/MaskHIT/model/backbone.py → MaskHIT/maskhit/model/backbone.py b/MaskHIT/model/backbone.py → MaskHIT/maskhit/model/backbone.py
diff --git a/MaskHIT/model/helper.py → MaskHIT/maskhit/model/helper.py b/MaskHIT/model/helper.py → MaskHIT/maskhit/model/helper.py
diff --git a/MaskHIT/model/models.py → MaskHIT/maskhit/model/models.py b/MaskHIT/model/models.py → MaskHIT/maskhit/model/models.py
diff --git a/MaskHIT/maskhit/options/__init__.py b/MaskHIT/maskhit/options/__init__.py
diff --git a/MaskHIT/options/base_options.py → MaskHIT/maskhit/options/base_options.py b/MaskHIT/options/base_options.py → MaskHIT/maskhit/options/base_options.py
diff --git a/MaskHIT/options/read_config.py → MaskHIT/maskhit/options/read_config.py b/MaskHIT/options/read_config.py → MaskHIT/maskhit/options/read_config.py
diff --git a/MaskHIT/options/train_options.py → MaskHIT/maskhit/options/train_options.py b/MaskHIT/options/train_options.py → MaskHIT/maskhit/options/train_options.py
@@ -53,6 +53,7 @@ def initialize(self):
                                  type=str,
                                  default='',
                                  help='select cancer subset, if empty then use entire dataset')
+        #TODO: lack of doc. so this 'fold' means 0-indexed test fold ID?
         self.parser.add_argument('--fold',
                                  type=int,
                                  default=0,
@@ -254,10 +255,12 @@ def initialize(self):
                                  help='turn off strict mode')
 
         # patch region masking
+        #TODO: Avoid too similar arg names
         self.parser.add_argument('--prob-mask',
                                  type=float,
                                  default=0,
                                  help='mask probability in BERT')
+        #TODO: Need doc for "masked:original:random". What are these?
         self.parser.add_argument('--prop-mask',
                                  type=str,
                                  default='0, 1, 0',
@@ -298,7 +301,7 @@ def initialize(self):
         # experiment optional options
         self.parser.add_argument('--checkpoints-folder',
                                  type=str,
-                                 default='checkpoints_new',
+                                 default='checkpoints',
                                  help='path to the checkpoints folder')
         self.parser.add_argument('--log-freq',
                                  type=int,

diff --git a/MaskHIT/plot_results.py → MaskHIT/maskhit/plot_results.py b/MaskHIT/plot_results.py → MaskHIT/maskhit/plot_results.py
diff --git a/MaskHIT/quick_test.py → MaskHIT/maskhit/quick_test.py b/MaskHIT/quick_test.py → MaskHIT/maskhit/quick_test.py