diff --git a/recognition/adni_convnext_47280647/.gitignore b/recognition/adni_convnext_47280647/.gitignore new file mode 100644 index 000000000..f578dab0a --- /dev/null +++ b/recognition/adni_convnext_47280647/.gitignore @@ -0,0 +1,21 @@ +# python +__pycache__/ +*.pyc +.venv/ +.env +.python-version + +# training artifacts +results/ +checkpoints/ +*.pt +*.pth + +# data (never commit datasets) +data/ +datasets/ +ADNI/ + +# logs and QOL stuff +logs/ +COMP3710_Report_v1.64_Final.pdf \ No newline at end of file diff --git a/recognition/adni_convnext_47280647/README.md b/recognition/adni_convnext_47280647/README.md new file mode 100644 index 000000000..8a8e40a10 --- /dev/null +++ b/recognition/adni_convnext_47280647/README.md @@ -0,0 +1,228 @@ +# ADNI ConvNeXtLite Classifier – Problem 8 + +Author: Shivam Garg +Student Number: 47280647 + +## Table of Contents +1. [Executive Summary](#1-executive-summary) +2. [Problem Definition](#2-problem-definition) + 1. [Problem Statement](#21-problem-statement) + 2. [Dataset Overview](#22-dataset-overview) +3. [Methodology Overview](#3-methodology-overview) +4. [Data Pipeline](#4-data-pipeline) + 1. [Ingestion & Directory Layout](#41-ingestion--directory-layout) + 2. [Pre-processing](#42-pre-processing) + 3. [Augmentation](#43-augmentation) +5. [Model Architecture](#5-model-architecture) + 1. [TinyCNN Baseline](#51-tinycnn-baseline) + 2. [ConvNeXtLite Classifier](#52-convnextlite-classifier) +6. [Training Configuration & Implementation](#6-training-configuration--implementation) +7. [Evaluation Protocol](#7-evaluation-protocol) +8. [Experiments & Results](#8-experiments--results) + 1. [Training Curves](#81-training-curves) + 2. [Validation & Test Metrics](#82-validation--test-metrics) + 3. [Ablations & Comparisons](#83-ablations--comparisons) +9. [Future Improvements](#9-future-improvements) +10. [Usage Guide](#10-usage-guide) + 1. [Environment Setup](#101-environment-setup) + 2. [Training Commands](#102-training-commands) + 3. [Evaluation Commands](#103-evaluation-commands) +11. [Dependencies](#11-dependencies) +12. [References](#12-references) + +--- + +## 1. Executive Summary +This project tackles **binary classification of Alzheimer's Disease (abbreviated as AD) vs Cognitively Normal (abbreviated as CN)** from **ADNI MRI 2D slices** data, targeting ≥ **80%** test accuracy on a strictly **patient-wise held-out** dataset. This implementation follows a leakage-safe pipeline including grayscale conversion, 224×224 resizing, normalisation `(x-0.5)/0.25`, and light MRI-appropriate augmentation, paired with strict **subject-wise** splits to prevent data leakage (patient overlap) across training, validation, and testing sets. + +Two models are implemented to bracket performance and guide design choices. A compact **TinyCNN** provides a clear, reproducible baseline. A **ConvNeXtLite** classifier then scales representational capacity using modern CNN components (e.g., depthwise convolutions, LayerNorm, larger kernels) to better capture subtle brain textures. Training is implemented in PyTorch with **Adam**, checkpointing, seeded runs, and automatic curve exports. + +## 2. Problem Definition + +### 2.1 Problem Statement +The task is **binary classification** of brain MRI slices into **AD (Alzheimer's Disease)** and **CN (Cognitively Normal)**. Inputs are 2D axial slices derived from the ADNI scans; the output is a single class label per slice, with patient-level reporting obtained by aggregating slice predictions per subject. The primary objective is ≥ 0.80 accuracy on a strict patient held-out test set (to prevent data leakage). + +### 2.2 Dataset Overview +The data is categorised as follows: +- **Sources and Classes**: The dataset is a two-class subset of ADNI with labels AD and CN. Each subject contributes a 3D MRI volume from which 2D axial slices are extracted for training and evaluation. +- **Data units**: Training operates at the slice level. Evaluation includes both slice-level and patient-level (aggregated) metrics. + +## 3. Methodology Overview +The approach is an end-to-end pipeline that turns ADNI MRI 2D slices into patient-level AD/CN predictions while preventing data leakage and keeping runs easy to reproduce. It combines a transparent TinyCNN baseline with a stronger ConvNeXtLite classifier to bracket performance. + +## 4. Data Pipeline + +### 4.1 Ingestion & Directory Layout +- **Accepted layouts**: the loader works when `DATA_ROOT` is either the parent folder `AD_NC/` or one of its child splits (`AD_NC/train` or `AD_NC/test`). Two structures are therefore supported: + 1. `AD_NC/train/{AD,NC}/**/*.jpg|png` and `AD_NC/test/{AD,NC}/**/*.jpg|png`. + 2. A single pool with only `AD_NC/{AD,NC}/...`; in this case we derive validation and test partitions via subject-wise random splits. +- **Subject IDs**: for each image path we infer the subject identifier by taking the directory immediately below `AD/` or `NC/` when available; otherwise we fall back to the filename prefix before the first underscore (e.g. `1003730_94.jpeg → subject 1003730`). This heuristic mirrors `_extract_subject_id` in `dataset.py`. +- **Split construction**: the loader first gathers all `(path, label)` pairs under the requested root, groups them by subject, and then: + - if both `train/` and `test/` trees exist, uses `train/` for training + validation (with subject-wise splitting) and leaves `test/` untouched for the held-out evaluation set; + - otherwise performs a subject-wise shuffle to carve out validation and test sets according to `val_ratio` and `test_ratio`. +Each subject therefore appears in exactly one split, eliminating patient-level leakage. + +### 4.2 Pre-processing +Every slice follows the same deterministic transform chain: +1. open via PIL with `.convert("L")` to force grayscale, +2. convert to a NumPy array and scale intensities to `[0, 1]`, +3. wrap into a PyTorch tensor of shape `(1, H, W)`, +4. resize to `224×224` using `torch.nn.functional.interpolate`, +5. standardise with `(x - 0.5) / 0.25`, matching the statistics used during model development. +The dataset returns `(tensor, label, subject_id)` so downstream evaluation can aggregate logits slice-wise or subject-wise without additional bookkeeping. + +### 4.3 Augmentation +Augmentation is deliberately minimal to respect anatomical structure. When `--augment` is passed and the split is `train`, we flip slices horizontally with probability 0.5. No rotations, elastic deformations, or intensity jitter are applied in this version, keeping the pipeline stable and reproducible while still injecting minor invariance to left–right orientation. + +## 5. Model Architecture + +### 5.1 TinyCNN Baseline +Implemented in `modules.py:9`, the baseline serves as a sanity check for the full pipeline: +- **Structure**: three 3×3 convolutions with ReLU activations and two max-pooling stages reduce the 224×224 input to a 1×1 feature map, followed by an optional dropout layer and a fully connected head. +- **Capacity**: 23,426 trainable parameters, small enough to train quickly on CPU/GPU while exposing integration bugs early. +- **Usage**: invoked via `--model tiny`; helpful for validating the subject-wise split logic and plotting scripts before expensive ConvNeXtLite runs. + +This model was implemented to verify the data pipeline structure and essentially recrate the training process without the actual intention of training. + +### 5.2 ConvNeXtLite Classifier +The primary model (refer `modules.py:93`) adapts ConvNeXt ideas to single-channel medical slices: +- **Two-step stem**: consecutive 3×3 stride-2 convolutions expand the channel count to 80 while downsampling the spatial resolution to 56×56, each followed by GroupNorm-as-LayerNorm and SiLU activation. +- **Stage layout**: four stages with depths `[2, 4, 8, 2]` and channel widths `[80, 160, 320, 640]`. Each block applies a 5×5 depthwise convolution, ChannelNorm, 1×1 expansion to 4× width, SiLU, 1×1 projection, per-channel layer scaling, and stochastic depth. +- **Regularisation**: head dropout (configurable via `--head_dropout`) and a linear drop-path schedule (`--drop_path_rate`) mitigate overfitting, while layer-scale (initialised to 1e-6) stabilises training. +- **Head**: global average pooling, ChannelNorm, dropout, and a linear classifier produce slice logits; probabilities are obtained with softmax for reporting. +- **Parameter count**: 15,296,082 trainable parameters, giving significantly higher capacity than TinyCNN while remaining feasible on a single A100 with batch sizes up to 32. + +Both architectures are exposed through `modules.build_model`, enabling cli selection and consistent metric logging. + +## 6. Training Configuration & Implementation +- `train.py` is entirely CLI-driven; every run writes its resolved arguments to `/config.json` (see Usage Guide for the exact commands). +- `set_seed` aligns Python/NumPy/PyTorch RNGs and cuDNN flags for reproducibility; pass `--seed` to control it. +- Models are built via `modules.build_model`, optimised with Adam + cross-entropy on the available device (`cuda` preferred); the relevant flags (`--model`, `--lr`, etc.) are listed later. +- Best validation accuracy triggers a `best.pt` checkpoint while `loss_curve.png` / `acc_curve.png` are saved alongside the config for quick inspection. + +## 7. Evaluation Protocol +- Validation accuracy is recorded each epoch on the subject-wise validation loader, matching the metrics printed by `train.py`. +- `predict.py` reloads `best.pt`, rebuilds the model using the supplied hyperparameters, and reports slice-/patient-level accuracy (Usage Guide shows the command). +- Patient accuracy averages logits per subject before argmax, no additional setup required. + +## 8. Experiments & Results + +### 8.1 Training Curves +Fresh Rangpur reruns regenerate these plots during training; representative copies are checked into `images/`. Both configurations show steadily decreasing training loss with validation accuracy plateauing around the 0.80 mark after ~18–20 epochs, confirming the subject-wise split has removed the earlier leakage spikes. + +- ![Accuracy curve – best run](images/rerun_lr1e-4_hd0.3_dp0.2_s42_acc_curve.png) + *Figure 1: Training/validation accuracy for the strongest configuration (`lr=1e-4`, `head_dropout=0.3`, `drop_path=0.2`).* +- ![Loss curve – best run](images/rerun_lr1e-4_hd0.3_dp0.2_s42_loss_curve.png) + *Figure 2: Corresponding loss curve, showing convergence by ~epoch 20.* +- ![Accuracy curve – subject-split baseline](images/rerun_subjectsplit_lr3e-4_hd0.2_s123_acc_curve.png) + *Figure 3: Accuracy trace for the baseline rerun (`lr=3e-4`, `head_dropout=0.2`, `drop_path=0.15`).* +- ![Loss curve – subject-split baseline](images/rerun_subjectsplit_lr3e-4_hd0.2_s123_loss_curve.png) + *Figure 4: Loss curve for the baseline setting, plateauing slightly earlier.* + +### 8.2 Validation & Test Metrics +| Run ID | Epochs | Val acc (slice) | Test acc (slice) | Test acc (patient) | Notes | +|------------------------------------------|:------:|:---------------:|:----------------:|:------------------:|-------| +| `rerun_subjectsplit_lr3e-4_hd0.2_s123` | 25 | 0.797 | 0.653 | 0.667 | Subject-wise split, head dropout 0.2, drop-path 0.15. | +| `rerun_lr1e-4_hd0.3_dp0.2_s42` (best) | 25 | **0.806** | 0.653 | 0.689 | Lower LR plus stronger regularisation; best held-out performance. | + +Slice metrics come directly from the validation loop / `predict.py` (averaged over slices). Patient metrics are computed by aggregating logits per subject inside `predict.py`. Validation passes the 0.80 mark while held-out test accuracy stabilises around 0.65, highlighting the residual domain gap between train and test folders. + +### 8.3 Ablations & Comparisons +- ![Leakage run accuracy](images/leakage_acc_curve.png) + *Figure 5: Example of the leakage-affected run (`a100_nextlite_p8_v1_seed123`) where validation accuracy falsely approaches 1.0 under an image-wise split.* +- **Leakage vs. subject-wise splits**: Prior runs (e.g., the figure above and `a100_nextlite_bs32_lr3e-3_hd0`) used image-wise splits and reported inflated validation accuracy (>0.98) despite ~0.65 test accuracy. Switching to subject-level grouping aligns validation with test performance; those leakage-affected artefacts are kept offline for comparison but excluded from the final metrics. +- **Regularisation sweep**: Increasing head dropout to 0.3 and ramping drop-path to 0.2 (while lowering LR to 1e-4) improves patient-level accuracy from 0.667 → 0.689, suggesting the model benefits from stronger stochastic regularisation. +- **Baseline check**: TinyCNN trains without issue but tops out around 0.58 slice accuracy (not shown); it is mainly useful for validating the pipeline before launching ConvNeXtLite experiments. + +## 9. Future Improvements +- **Reach the ≥0.80 test target**: extend training to 35–40 epochs with early stopping, and explore cosine LR decay to squeeze additional generalisation without violating the leakage-safe split. +- **Richer augmentation**: introduce small rotations (±5°), brightness/contrast jitter, or CutMix/MixUp variants while monitoring patient-level accuracy for regressions. +- **Regularisation tweaks**: add weight decay (~1e-4), experiment with label smoothing, and test moderate dropout in earlier stages to combat overfitting on the training distribution. +- **Subject-level modelling**: aggregate predictions with simple ensembling (multiple seeds) or train a lightweight per-subject classifier on pooled slice features to boost patient accuracy. +- **Data quality pass**: review misclassified subjects for slice outliers or label noise, and consider incorporating additional planes (coronal/sagittal) if ADNI derivatives permit. + +## 10. Usage Guide + +### 10.1 Environment Setup +- Create/activate a Python env (conda or venv). +- Install dependencies: + ```bash + pip install -r requirements.txt + pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu121 # adjust CUDA tag if needed + ``` +- Optional: set `PYTHONPATH` to the project root for convenience. + +### 10.2 Training Commands +- Baseline rerun: + ```bash + python train.py \ + --dataset adni \ + --data_root /home/groups/comp3710/ADNI/AD_NC \ + --model nextlite_tiny \ + --epochs 25 \ + --batch_size 32 \ + --lr 3e-4 \ + --seed 123 \ + --head_dropout 0.2 \ + --drop_path_rate 0.15 \ + --layer_scale_init 1e-6 \ + --augment \ + --num_workers 1 \ + --save_dir runs/rerun_subjectsplit_lr3e-4_hd0.2_s123 + ``` +- Best-performing rerun: + ```bash + python train.py \ + --dataset adni \ + --data_root /home/groups/comp3710/ADNI/AD_NC \ + --model nextlite_tiny \ + --epochs 25 \ + --batch_size 32 \ + --lr 1e-4 \ + --seed 42 \ + --head_dropout 0.3 \ + --drop_path_rate 0.2 \ + --layer_scale_init 1e-6 \ + --augment \ + --num_workers 1 \ + --save_dir runs/rerun_lr1e-4_hd0.3_dp0.2_s42 + ``` +- Adjust `--batch_size`, `--num_workers`, and `--data_root` to match your environment; each run writes `config.json`, `best.pt`, and curves into `--save_dir`. + +### 10.3 Evaluation Commands +- Evaluate the best run on the held-out test split: + ```bash + python predict.py \ + --checkpoint runs/rerun_lr1e-4_hd0.3_dp0.2_s42/best.pt \ + --dataset adni \ + --data_root /home/groups/comp3710/ADNI/AD_NC \ + --model nextlite_tiny \ + --batch_size 64 \ + --num_workers 1 \ + --head_dropout 0.3 \ + --drop_path_rate 0.2 \ + --layer_scale_init 1e-6 + ``` +- Swap the checkpoint path/flags to evaluate other configs. Output prints slice accuracy and patient accuracy (if subject IDs were returned by the loader). + +## 11. Dependencies +Install the following libraries (either individually or via `pip install -r requirements.txt`), then add the appropriate PyTorch CUDA wheels manually. + +``` +pillow==10.4.0 +numpy==1.26.4 +matplotlib==3.8.4 +tqdm==4.66.4 +scikit-learn==1.3.2 +scipy==1.11.4 +# GPU stack (choose wheel for your CUDA version) +# pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu121 +``` + +## 12. References +- Liu, Z., Mao, H., Wu, C., Feichtenhofer, C., Darrell, T., & Xie, S. (2022). *ConvNeXt: A ConvNet for the 2020s*. CVPR. https://arxiv.org/abs/2201.03545 +- Alzheimer’s Disease Neuroimaging Initiative (ADNI). (n.d.). *ADNI MRI Collection*. https://adni.loni.usc.edu/ +- Wightman, R. (2021). *Stochastic depth and modern ConvNets in PyTorch*. TIMM GitHub. https://github.com/rwightman/pytorch-image-models + +## AI Declaration +ChatGPT 5 was used to refactor and improve the flow of this document. Github Copilot and ChatGPT (gpt-5-codex) was used to fix bugs relating to data leakage and optimise data loading pipeline. \ No newline at end of file diff --git a/recognition/adni_convnext_47280647/__pycache__/dataset.cpython-313.pyc b/recognition/adni_convnext_47280647/__pycache__/dataset.cpython-313.pyc new file mode 100644 index 000000000..c744232ca Binary files /dev/null and b/recognition/adni_convnext_47280647/__pycache__/dataset.cpython-313.pyc differ diff --git a/recognition/adni_convnext_47280647/__pycache__/modules.cpython-313.pyc b/recognition/adni_convnext_47280647/__pycache__/modules.cpython-313.pyc new file mode 100644 index 000000000..1f1b4b801 Binary files /dev/null and b/recognition/adni_convnext_47280647/__pycache__/modules.cpython-313.pyc differ diff --git a/recognition/adni_convnext_47280647/dataset.py b/recognition/adni_convnext_47280647/dataset.py new file mode 100644 index 000000000..c3cc94b4d --- /dev/null +++ b/recognition/adni_convnext_47280647/dataset.py @@ -0,0 +1,207 @@ +from __future__ import annotations +import os, glob, random +from dataclasses import dataclass +from typing import Tuple, List, Dict, Optional + +import numpy as np +import torch +import torch.nn.functional as F +from torch.utils.data import Dataset, DataLoader, random_split +from PIL import Image # image loading + +# ===== Random dataset (for local smoke tests) ===== +class RandomSliceDataset(Dataset): + def __init__(self, n: int = 192, image_size: Tuple[int, int, int] = (1, 224, 224), num_classes: int = 2, seed: int = 42): + super().__init__() + g = torch.Generator().manual_seed(seed) + self.x = torch.rand((n, *image_size), generator=g) + self.y = torch.randint(low=0, high=num_classes, size=(n,), generator=g) + + def __len__(self): return self.x.shape[0] + def __getitem__(self, i): return self.x[i], int(self.y[i]) + +def build_loaders_random(batch_size: int = 16, seed: int = 42): + full = RandomSliceDataset(n=192, seed=seed) + val_size = 64 + train_size = len(full) - val_size + train_set, val_set = random_split(full, [train_size, val_size], generator=torch.Generator().manual_seed(seed)) + train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0) + val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=0) + return train_loader, val_loader, None + +# ===== ADNI (for Rangpur) ===== +@dataclass +class ADNIArgs: + data_root: str + labels_csv: Optional[str] = None # ignored in image-only mode; kept for CLI compatibility + plane: str = "axial" # ignored (MRI-only option) + slice_mode: str = "center_k" # ignored (MRI-only option) + center_k: int = 32 # ignored + step_s: int = 2 # ignored + resize_hw: Tuple[int,int] = (224,224) + val_ratio: float = 0.1 + test_ratio: float = 0.1 + seed: int = 42 + batch_size: int = 16 + num_workers: int = 4 + augment: bool = False + +_IMG_EXTS = ( + "*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tif", "*.tiff", + "*.JPG", "*.JPEG", "*.PNG", "*.BMP", "*.TIF", "*.TIFF" +) + +def _has_subdirs(path: str, names: List[str]) -> bool: + return all(os.path.isdir(os.path.join(path, n)) for n in names) + +def _list_images_under(root: str, class_map: Dict[str,int]) -> List[Tuple[str,int]]: + items: List[Tuple[str,int]] = [] + for cls_name, label in class_map.items(): + cls_dir = os.path.join(root, cls_name) + if not os.path.isdir(cls_dir): + continue + for pat in _IMG_EXTS: + for p in glob.glob(os.path.join(cls_dir, "**", pat), recursive=True): + if os.path.isfile(p): + items.append((p, label)) + return items + +def _extract_subject_id(path: str) -> str: + """Heuristic subject ID from path: prefer folder just under AD/ or NC/; fallback to filename stem prefix before '_'""" + cls_names = {"AD", "NC"} + parts = os.path.normpath(path).split(os.sep) + for i, part in enumerate(parts): + if part in cls_names: + if i + 1 < len(parts) - 1: + return parts[i + 1] + break + stem = os.path.splitext(os.path.basename(path))[0] + return stem.split("_")[0] + +def _split_items_by_subject(items: List[Tuple[str,int]], val_ratio: float, seed: int): + from collections import defaultdict + subj_to_items: Dict[str, List[Tuple[str,int]]] = defaultdict(list) + for p, y in items: + s = _extract_subject_id(p) + subj_to_items[s].append((p, y)) + subjects = list(subj_to_items.keys()) + rng = random.Random(seed) + rng.shuffle(subjects) + n_val = int(round(len(subjects) * val_ratio)) + val_subjects = set(subjects[:n_val]) + train_subjects = set(subjects[n_val:]) + tr_items: List[Tuple[str,int]] = [] + va_items: List[Tuple[str,int]] = [] + for s in train_subjects: + tr_items.extend(subj_to_items[s]) + for s in val_subjects: + va_items.extend(subj_to_items[s]) + return tr_items, va_items + +class ADNIImageDataset(Dataset): + """Each image file is one sample -> returns (x, y).""" + def __init__(self, items: List[Tuple[str,int]], args: ADNIArgs, split: str): + self.items = items + self.args = args + self.split = split + + def __len__(self): return len(self.items) + + def __getitem__(self, i): + path, y = self.items[i] + with Image.open(path).convert("L") as img: + arr = np.asarray(img, dtype=np.float32) / 255.0 # (H, W) + t = torch.from_numpy(arr).unsqueeze(0) # (1, H, W) + t = F.interpolate( + t.unsqueeze(0), size=self.args.resize_hw, mode="bilinear", align_corners=False + ).squeeze(0) + + mean, std = 0.5, 0.25 + t = (t - mean) / std + + if self.args.augment and self.split == "train": + if random.random() < 0.5: + t = torch.flip(t, dims=[2]) + sid = _extract_subject_id(path) + return t.float(), int(y), sid + +def _split_items(items: List, val_ratio: float, test_ratio: float, seed: int): + rng = random.Random(seed) + arr = items[:]; rng.shuffle(arr) + n = len(arr) + n_test = int(round(n * test_ratio)) + n_val = int(round(n * val_ratio)) + test_items = arr[:n_test] + val_items = arr[n_test:n_test+n_val] + train_items = arr[n_test+n_val:] + return train_items, val_items, test_items + +def build_loaders_adni(args: ADNIArgs): + assert os.path.isdir(args.data_root), f"data_root not found: {args.data_root}" + class_map = {"AD": 1, "NC": 0} + root = args.data_root.rstrip("/") + + # Case A: parent contains train/ and test/ + if _has_subdirs(root, ["train", "test"]): + train_root = os.path.join(root, "train") + test_root = os.path.join(root, "test") + tr_items = _list_images_under(train_root, class_map) + te_items = _list_images_under(test_root, class_map) + if not tr_items: raise RuntimeError(f"No images found under: {train_root}") + if not te_items: raise RuntimeError(f"No images found under: {test_root}") + tr_split, va_split = _split_items_by_subject(tr_items, args.val_ratio, args.seed) + print(f"[ADNI IMAGES] train={len(tr_split)} val={len(va_split)} test={len(te_items)}") + return ( + DataLoader(ADNIImageDataset(tr_split, args, split="train"), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True), + DataLoader(ADNIImageDataset(va_split, args, split="val"), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True), + DataLoader(ADNIImageDataset(te_items, args, split="test"), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True), + ) + + # Case B: root is .../train or .../test (use sibling as the other split) + base = os.path.basename(root) + parent = os.path.dirname(root) + if base in ("train", "test") and _has_subdirs(parent, ["train", "test"]): + split_root = root + other_root = os.path.join(parent, "test" if base == "train" else "train") + sp_items = _list_images_under(split_root, class_map) + ot_items = _list_images_under(other_root, class_map) + if not sp_items: raise RuntimeError(f"No images found under: {split_root}") + if not ot_items: raise RuntimeError(f"No images found under: {other_root}") + if base == "train": + tr_split, va_split = _split_items_by_subject(sp_items, args.val_ratio, args.seed) + print(f"[ADNI IMAGES] train={len(tr_split)} val={len(va_split)} test={len(ot_items)}") + return ( + DataLoader(ADNIImageDataset(tr_split, args, split="train"), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True), + DataLoader(ADNIImageDataset(va_split, args, split="val"), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True), + DataLoader(ADNIImageDataset(ot_items, args, split="test"), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True), + ) + else: + tr_split, va_split = _split_items_by_subject(ot_items, args.val_ratio, args.seed) + print(f"[ADNI IMAGES] train={len(tr_split)} val={len(va_split)} test={len(sp_items)}") + return ( + DataLoader(ADNIImageDataset(tr_split, args, split="train"), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True), + DataLoader(ADNIImageDataset(va_split, args, split="val"), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True), + DataLoader(ADNIImageDataset(sp_items, args, split="test"), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True), + ) + + # Case C: single pool (root has AD/ and NC/ only) + items = _list_images_under(root, class_map) + if not items: + raise RuntimeError(f"No AD/NC images found under: {root}") + tr_items, va_items, te_items = _split_items(items, args.val_ratio, args.test_ratio, args.seed) + print(f"[ADNI IMAGES] train={len(tr_items)} val={len(va_items)} test={len(te_items)}") + return ( + DataLoader(ADNIImageDataset(tr_items, args, split="train"), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True), + DataLoader(ADNIImageDataset(va_items, args, split="val"), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True), + DataLoader(ADNIImageDataset(te_items, args, split="test"), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True), + ) + +# ===== Unified entry point ===== +def build_loaders(dataset: str = "random", **kwargs): + if dataset == "random": + return build_loaders_random(batch_size=kwargs.get("batch_size", 16), seed=kwargs.get("seed", 42)) + elif dataset == "adni": + a = ADNIArgs(**kwargs) + return build_loaders_adni(a) + else: + raise ValueError(f"Unknown dataset={dataset}") diff --git a/recognition/adni_convnext_47280647/images/leakage_acc_curve.png b/recognition/adni_convnext_47280647/images/leakage_acc_curve.png new file mode 100644 index 000000000..650ef8380 Binary files /dev/null and b/recognition/adni_convnext_47280647/images/leakage_acc_curve.png differ diff --git a/recognition/adni_convnext_47280647/images/rerun_lr1e-4_hd0.3_dp0.2_s42_acc_curve.png b/recognition/adni_convnext_47280647/images/rerun_lr1e-4_hd0.3_dp0.2_s42_acc_curve.png new file mode 100644 index 000000000..244149190 Binary files /dev/null and b/recognition/adni_convnext_47280647/images/rerun_lr1e-4_hd0.3_dp0.2_s42_acc_curve.png differ diff --git a/recognition/adni_convnext_47280647/images/rerun_lr1e-4_hd0.3_dp0.2_s42_loss_curve.png b/recognition/adni_convnext_47280647/images/rerun_lr1e-4_hd0.3_dp0.2_s42_loss_curve.png new file mode 100644 index 000000000..b6ea2d372 Binary files /dev/null and b/recognition/adni_convnext_47280647/images/rerun_lr1e-4_hd0.3_dp0.2_s42_loss_curve.png differ diff --git a/recognition/adni_convnext_47280647/images/rerun_subjectsplit_lr3e-4_hd0.2_s123_acc_curve.png b/recognition/adni_convnext_47280647/images/rerun_subjectsplit_lr3e-4_hd0.2_s123_acc_curve.png new file mode 100644 index 000000000..d1cc1b36b Binary files /dev/null and b/recognition/adni_convnext_47280647/images/rerun_subjectsplit_lr3e-4_hd0.2_s123_acc_curve.png differ diff --git a/recognition/adni_convnext_47280647/images/rerun_subjectsplit_lr3e-4_hd0.2_s123_loss_curve.png b/recognition/adni_convnext_47280647/images/rerun_subjectsplit_lr3e-4_hd0.2_s123_loss_curve.png new file mode 100644 index 000000000..662cb9bae Binary files /dev/null and b/recognition/adni_convnext_47280647/images/rerun_subjectsplit_lr3e-4_hd0.2_s123_loss_curve.png differ diff --git a/recognition/adni_convnext_47280647/modules.py b/recognition/adni_convnext_47280647/modules.py new file mode 100644 index 000000000..991dafce2 --- /dev/null +++ b/recognition/adni_convnext_47280647/modules.py @@ -0,0 +1,208 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from typing import List + +# ------------------------------ +# 1) Minimal CNN +# ------------------------------ +class TinyCNN(nn.Module): + """Minimal 2D CNN for binary classification on 1x224x224 inputs.""" + def __init__(self, in_chans: int = 1, num_classes: int = 2, head_dropout: float = 0.0): + super().__init__() + self.features = nn.Sequential( + nn.Conv2d(in_chans, 16, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2), # 112x112 + nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2), # 56x56 + nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d((1, 1)) # 64x1x1 + ) + self.dropout = nn.Dropout(p=head_dropout) if head_dropout > 0 else nn.Identity() + self.head = nn.Linear(64, num_classes) + + def forward(self, x): + x = self.features(x).flatten(1) + x = self.dropout(x) + logits = self.head(x) + probs = F.softmax(logits, dim=1) + return {"logits": logits, "probs": probs} + +# ------------------------------ +# 2) ConvNeXt-Lite +# Differences vs “canonical” ConvNeXt: +# - depthwise kernel 5 (not 7) +# - SiLU activation (not GELU) +# - GroupNorm(1,C) as NCHW LayerNorm +# - two-step stem downsampling (3x3 s=2 twice) +# - depths=[2,4,8,2], dims=[80,160,320,640] +# ------------------------------ + +class StochasticDepth(nn.Module): + """Per-sample DropPath (stochastic depth).""" + def __init__(self, p: float = 0.0): + super().__init__() + self.p = float(p) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.p == 0.0 or not self.training: + return x + keep = 1.0 - self.p + # shape: [N, 1, 1, 1] so each sample is dropped/kept whole + mask = torch.empty(x.shape[0], 1, 1, 1, device=x.device, dtype=x.dtype).bernoulli_(keep) + return x * mask / keep + +class ChannelNorm(nn.Module): + """ + LayerNorm for NCHW via GroupNorm(1, C): normalizes each channel with affine params. + This is a standard trick to avoid permute for channels-last LN. + """ + def __init__(self, num_channels: int, eps: float = 1e-6): + super().__init__() + self.norm = nn.GroupNorm(1, num_channels, eps=eps, affine=True) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.norm(x) + +class NeXtLiteBlock(nn.Module): + """ + ConvNeXt-inspired block (distinct variant): + depthwise 5x5 -> ChannelNorm -> 1x1 (4x) -> SiLU -> 1x1 (proj) -> LayerScale -> StochasticDepth -> +res + """ + def __init__(self, dim: int, drop_path: float = 0.0, layer_scale_init: float = 1e-6): + super().__init__() + self.dw = nn.Conv2d(dim, dim, kernel_size=5, padding=2, groups=dim) # K=5 + self.norm = ChannelNorm(dim) + self.pw_expand = nn.Conv2d(dim, 4 * dim, kernel_size=1) + self.act = nn.SiLU() # different from GELU + self.pw_proj = nn.Conv2d(4 * dim, dim, kernel_size=1) + # per-channel layerscale (optional) + self.alpha = nn.Parameter(torch.full((dim,), layer_scale_init)) if layer_scale_init > 0 else None + self.drop = StochasticDepth(drop_path) if drop_path > 0 else nn.Identity() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + residual = x + x = self.dw(x) + x = self.norm(x) + x = self.pw_expand(x) + x = self.act(x) + x = self.pw_proj(x) + if self.alpha is not None: + x = x * self.alpha[:, None, None] + x = self.drop(x) + return x + residual + +class ConvNeXtLite(nn.Module): + """ + A compact, convnext-inspired classifier for 1x224x224 inputs. + From-scratch, no external backbones, distinct config (dims/depths/kernels/norm/act). + """ + def __init__( + self, + in_chans: int = 1, + num_classes: int = 2, + depths: List[int] = [2, 4, 8, 2], + dims: List[int] = [80, 160, 320, 640], + drop_path_rate: float = 0.15, + head_dropout: float = 0.2, + layer_scale_init: float = 1e-6, + ): + super().__init__() + assert len(depths) == 4 and len(dims) == 4 + + # Stem: two 3x3 stride-2 downsamples (224 -> 56) + self.stem = nn.Sequential( + nn.Conv2d(in_chans, dims[0]//2, kernel_size=3, stride=2, padding=1), # 224->112 + ChannelNorm(dims[0]//2), + nn.SiLU(), + nn.Conv2d(dims[0]//2, dims[0], kernel_size=3, stride=2, padding=1), # 112->56 + ChannelNorm(dims[0]), + nn.SiLU(), + ) + + # Downsample layers between stages: 2x2 stride-2 convs + self.downsamples = nn.ModuleList([ + nn.Sequential(ChannelNorm(dims[0]), nn.Conv2d(dims[0], dims[1], 2, 2)), + nn.Sequential(ChannelNorm(dims[1]), nn.Conv2d(dims[1], dims[2], 2, 2)), + nn.Sequential(ChannelNorm(dims[2]), nn.Conv2d(dims[2], dims[3], 2, 2)), + ]) + + # Stages with progressive stochastic depth + total_blocks = sum(depths) + dp_rates = torch.linspace(0, drop_path_rate, total_blocks).tolist() + cursor = 0 + self.stages = nn.ModuleList() + for stage_idx in range(4): + blocks = [] + width = dims[stage_idx] + for _ in range(depths[stage_idx]): + blocks.append(NeXtLiteBlock(width, drop_path=dp_rates[cursor], layer_scale_init=layer_scale_init)) + cursor += 1 + self.stages.append(nn.Sequential(*blocks)) + + # Head: global avg pool -> LN (via GroupNorm) -> dropout -> linear + self.head_norm = ChannelNorm(dims[-1]) + self.head_drop = nn.Dropout(head_dropout) if head_dropout > 0 else nn.Identity() + self.head_fc = nn.Linear(dims[-1], num_classes) + + self.apply(self._init_weights) + + @staticmethod + def _init_weights(m: nn.Module): + if isinstance(m, (nn.Conv2d, nn.Linear)): + nn.init.trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.zeros_(m.bias) + + def forward_features(self, x: torch.Tensor) -> torch.Tensor: + x = self.stem(x) + x = self.stages[0](x) + x = self.downsamples[0](x) + x = self.stages[1](x) + x = self.downsamples[1](x) + x = self.stages[2](x) + x = self.downsamples[2](x) + x = self.stages[3](x) + # global average pooling + x = x.mean(dim=(2, 3)) + return x + + def forward(self, x: torch.Tensor): + feats = self.forward_features(x) + feats = self.head_norm(feats[:, :, None, None]).squeeze(-1).squeeze(-1) + feats = self.head_drop(feats) + logits = self.head_fc(feats) + probs = F.softmax(logits, dim=1) + return {"logits": logits, "probs": probs} + +# ------------------------------ +# 3) Factory & utility +# ------------------------------ +def build_model( + name: str = "tiny", + in_chans: int = 1, + num_classes: int = 2, + head_dropout: float = 0.0, + drop_path_rate: float = 0.15, + layer_scale_init: float = 1e-6, +) -> nn.Module: + """ + Backward-compatible factory. + - name="tiny" -> TinyCNN (baseline) + - name="nextlite_tiny" -> ConvNeXtLite + """ + if name == "tiny": + return TinyCNN(in_chans=in_chans, num_classes=num_classes, head_dropout=head_dropout) + elif name == "nextlite_tiny": + return ConvNeXtLite( + in_chans=in_chans, + num_classes=num_classes, + depths=[2, 4, 8, 2], + dims=[80, 160, 320, 640], + drop_path_rate=drop_path_rate, + head_dropout=head_dropout, + layer_scale_init=layer_scale_init, + ) + else: + raise ValueError(f"Unknown model '{name}' (use 'tiny' or 'nextlite_tiny').") + +def count_params(model: nn.Module) -> int: + return sum(p.numel() for p in model.parameters() if p.requires_grad) diff --git a/recognition/adni_convnext_47280647/predict.py b/recognition/adni_convnext_47280647/predict.py new file mode 100644 index 000000000..d1c0c174c --- /dev/null +++ b/recognition/adni_convnext_47280647/predict.py @@ -0,0 +1,87 @@ +import argparse, torch, numpy as np +from collections import defaultdict +from modules import build_model +from dataset import build_loaders + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--checkpoint", type=str, default="results/run1/best.pt") + ap.add_argument("--dataset", type=str, default="random", choices=["random","adni"]) + ap.add_argument("--batch_size", type=int, default=32) + ap.add_argument("--model", type=str, default="tiny", choices=["tiny","nextlite_tiny"]) + ap.add_argument("--head_dropout", type=float, default=0.0) + ap.add_argument("--drop_path_rate", type=float, default=0.15) + ap.add_argument("--layer_scale_init", type=float, default=1e-6) + # ADNI flags (only used if dataset=adni) + ap.add_argument("--data_root", type=str, default=None) + ap.add_argument("--labels_csv", type=str, default=None) + ap.add_argument("--num_workers", type=int, default=4) + ap.add_argument("--plane", type=str, default="axial") + ap.add_argument("--slice_mode", type=str, default="center_k") + ap.add_argument("--center_k", type=int, default=32) + ap.add_argument("--step_s", type=int, default=2) + ap.add_argument("--val_ratio", type=float, default=0.1) + ap.add_argument("--test_ratio", type=float, default=0.1) + ap.add_argument("--seed", type=int, default=42) + args = ap.parse_args() + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + tr, va, te = build_loaders( + dataset=args.dataset, + data_root=args.data_root, labels_csv=args.labels_csv, + plane=args.plane, slice_mode=args.slice_mode, center_k=args.center_k, step_s=args.step_s, + resize_hw=(224,224), val_ratio=args.val_ratio, test_ratio=args.test_ratio, + seed=args.seed, batch_size=args.batch_size, num_workers=args.num_workers, + augment=False + ) + loader = te if (args.dataset == "adni" and te is not None) else va + + ckpt = torch.load(args.checkpoint, map_location="cpu") + model = build_model( + name=args.model, + in_chans=1, + num_classes=2, + head_dropout=args.head_dropout, + drop_path_rate=args.drop_path_rate, + layer_scale_init=args.layer_scale_init, + ).to(device) + model.load_state_dict(ckpt["state_dict"]) + model.eval() + + correct = total = 0 + subj_logits = defaultdict(list) + subj_labels = {} + + with torch.no_grad(): + for batch in loader: + if isinstance(batch, (list, tuple)) and len(batch) == 3: + x, y, sid = batch + else: + sid = None + x, y = (batch[0], batch[1]) if isinstance(batch, (list, tuple)) else batch + x, y = x.to(device), y.to(device) + logits = model(x)["logits"] + preds = logits.argmax(1) + correct += (preds == y).sum().item() + total += y.size(0) + + if sid is not None: + for i, s in enumerate(sid): + subj_logits[s].append(logits[i].cpu().numpy()) + subj_labels[s] = int(y[i].cpu().item()) + + slice_acc = correct / max(total, 1) + print(f"Slice-level accuracy: {slice_acc:.3f}") + + if subj_logits: + p_correct = 0 + for s, logit_list in subj_logits.items(): + mlog = np.mean(np.stack(logit_list, axis=0), axis=0) + pred = int(np.argmax(mlog)) + p_correct += int(pred == subj_labels[s]) + patient_acc = p_correct / len(subj_logits) + print(f"Patient-level accuracy: {patient_acc:.3f} (subjects={len(subj_logits)})") + +if __name__ == "__main__": + main() diff --git a/recognition/adni_convnext_47280647/requirements.txt b/recognition/adni_convnext_47280647/requirements.txt new file mode 100644 index 000000000..7066dbe9a --- /dev/null +++ b/recognition/adni_convnext_47280647/requirements.txt @@ -0,0 +1,7 @@ +#! remember this does not include torch/torchvision dependencies +pillow==10.4.0 +numpy==1.26.4 +matplotlib==3.8.4 +tqdm==4.66.4 +scikit-learn==1.3.2 +scipy==1.11.4 diff --git a/recognition/adni_convnext_47280647/train.py b/recognition/adni_convnext_47280647/train.py new file mode 100644 index 000000000..154028f54 --- /dev/null +++ b/recognition/adni_convnext_47280647/train.py @@ -0,0 +1,137 @@ +import argparse, os, json, random +import numpy as np +import torch +import torch.nn as nn +from modules import build_model, count_params +from dataset import build_loaders +import matplotlib.pyplot as plt + +def set_seed(seed: int = 42): + random.seed(seed); np.random.seed(seed); torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed); torch.backends.cudnn.deterministic = True; torch.backends.cudnn.benchmark = False + +def accuracy_from_logits(logits: torch.Tensor, targets: torch.Tensor) -> float: + preds = logits.argmax(dim=1) + return (preds == targets).float().mean().item() + +def plot_curves(history, save_dir): + os.makedirs(save_dir, exist_ok=True) + # Loss + plt.figure() + plt.plot(history["train_loss"], label="train") + plt.plot(history["val_loss"], label="val") + plt.xlabel("epoch"); plt.ylabel("loss"); plt.legend(); plt.tight_layout() + plt.savefig(os.path.join(save_dir, "loss_curve.png")); plt.close() + # Acc + plt.figure() + plt.plot(history["train_acc"], label="train") + plt.plot(history["val_acc"], label="val") + plt.xlabel("epoch"); plt.ylabel("accuracy"); plt.legend(); plt.tight_layout() + plt.savefig(os.path.join(save_dir, "acc_curve.png")); plt.close() + +def main(): + ap = argparse.ArgumentParser() + # core + ap.add_argument("--dataset", type=str, default="random", choices=["random","adni"]) + ap.add_argument("--epochs", type=int, default=3) + ap.add_argument("--batch_size", type=int, default=16) + ap.add_argument("--lr", type=float, default=1e-3) + ap.add_argument("--seed", type=int, default=42) + ap.add_argument("--head_dropout", type=float, default=0.0) + ap.add_argument("--drop_path_rate", type=float, default=0.15) + ap.add_argument("--layer_scale_init", type=float, default=1e-6) + ap.add_argument("--save_dir", type=str, default="results/run1") + # data (ADNI) + ap.add_argument("--data_root", type=str, default=None) + ap.add_argument("--labels_csv", type=str, default=None) + ap.add_argument("--num_workers", type=int, default=4) + ap.add_argument("--plane", type=str, default="axial") + ap.add_argument("--slice_mode", type=str, default="center_k") + ap.add_argument("--center_k", type=int, default=32) + ap.add_argument("--step_s", type=int, default=2) + ap.add_argument("--val_ratio", type=float, default=0.1) + ap.add_argument("--test_ratio", type=float, default=0.1) + ap.add_argument("--augment", action="store_true") + ap.add_argument("--model", type=str, default="tiny", + choices=["tiny","nextlite_tiny"], + help="Which model to build") + + args = ap.parse_args() + + set_seed(args.seed) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + os.makedirs(args.save_dir, exist_ok=True) + with open(os.path.join(args.save_dir, "config.json"), "w") as f: + json.dump(vars(args), f, indent=2) + + train_loader, val_loader, _ = build_loaders( + dataset=args.dataset, + data_root=args.data_root, labels_csv=args.labels_csv, + plane=args.plane, slice_mode=args.slice_mode, + center_k=args.center_k, step_s=args.step_s, + resize_hw=(224,224), val_ratio=args.val_ratio, test_ratio=args.test_ratio, + seed=args.seed, batch_size=args.batch_size, num_workers=args.num_workers, + augment=args.augment + ) + + model = build_model( + name=args.model, + in_chans=1, + num_classes=2, + head_dropout=args.head_dropout, + drop_path_rate=args.drop_path_rate, + layer_scale_init=args.layer_scale_init, + ).to(device) + print(f"Model params: {count_params(model):,}") + optim = torch.optim.Adam(model.parameters(), lr=args.lr) + criterion = nn.CrossEntropyLoss() + + best_val_acc, best_state = -1.0, None + history = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []} + + for epoch in range(1, args.epochs + 1): + # --- train --- + model.train() + tr_loss = tr_acc = n_tr = 0.0 + for batch in train_loader: + x, y = (batch[0], batch[1]) if isinstance(batch, (list, tuple)) else batch + x, y = x.to(device), y.to(device) + out = model(x)["logits"] + loss = criterion(out, y) + optim.zero_grad(); loss.backward(); optim.step() + bs = y.size(0) + tr_loss += loss.item() * bs + tr_acc += accuracy_from_logits(out, y) * bs + n_tr += bs + + # --- val --- + model.eval() + va_loss = va_acc = n_va = 0.0 + with torch.no_grad(): + for batch in val_loader: + x, y = (batch[0], batch[1]) if isinstance(batch, (list, tuple)) else batch + x, y = x.to(device), y.to(device) + out = model(x)["logits"] + loss = criterion(out, y) + bs = y.size(0) + va_loss += loss.item() * bs + va_acc += accuracy_from_logits(out, y) * bs + n_va += bs + + tr_loss /= n_tr; tr_acc /= n_tr + va_loss /= n_va; va_acc /= n_va + history["train_loss"].append(tr_loss); history["val_loss"].append(va_loss) + history["train_acc"].append(tr_acc); history["val_acc"].append(va_acc) + print(f"Epoch {epoch:02d} | train_loss={tr_loss:.4f} val_loss={va_loss:.4f} | train_acc={tr_acc:.3f} val_acc={va_acc:.3f}") + + if va_acc > best_val_acc: + best_val_acc = va_acc + best_state = {"state_dict": model.state_dict(), "epoch": epoch} + torch.save(best_state, os.path.join(args.save_dir, "best.pt")) + + plot_curves(history, args.save_dir) + print(f"Best val_acc: {best_val_acc:.3f} (checkpoint saved)") + +if __name__ == "__main__": + main()