shakes76 · keys-i · Oct 2, 2025 · Oct 2, 2025 · Oct 2, 2025 · Oct 2, 2025
diff --git a/recognition/TimeLOB_TimeGAN_49088276/.gitignore b/recognition/TimeLOB_TimeGAN_49088276/.gitignore
@@ -0,0 +1,22 @@
+# editor specific files
+.idea/
+.vscode/
+
+# python cache files
+./__pycache__/
+*.pyc
+
+# model specific files
+weights/
+outs/
+data/
+preproc_final_core/
+*.csv
+*.pt
+*.pkl
+outputs/
+checkpoints/
+logs/
+
+# OS generated files
+.DS_Store
diff --git a/recognition/TimeLOB_TimeGAN_49088276/README.MD b/recognition/TimeLOB_TimeGAN_49088276/README.MD
diff --git a/recognition/TimeLOB_TimeGAN_49088276/assets/latent_walk_tight_only.png b/recognition/TimeLOB_TimeGAN_49088276/assets/latent_walk_tight_only.png
diff --git a/recognition/TimeLOB_TimeGAN_49088276/assets/latent_walk_tight_to_wide.png b/recognition/TimeLOB_TimeGAN_49088276/assets/latent_walk_tight_to_wide.png
diff --git a/recognition/TimeLOB_TimeGAN_49088276/assets/model-architecture.png b/recognition/TimeLOB_TimeGAN_49088276/assets/model-architecture.png
diff --git a/recognition/TimeLOB_TimeGAN_49088276/assets/real_heatmap.png b/recognition/TimeLOB_TimeGAN_49088276/assets/real_heatmap.png
diff --git a/recognition/TimeLOB_TimeGAN_49088276/assets/synthetic_000.png b/recognition/TimeLOB_TimeGAN_49088276/assets/synthetic_000.png
diff --git a/recognition/TimeLOB_TimeGAN_49088276/assets/synthetic_001.png b/recognition/TimeLOB_TimeGAN_49088276/assets/synthetic_001.png
diff --git a/recognition/TimeLOB_TimeGAN_49088276/assets/synthetic_002.png b/recognition/TimeLOB_TimeGAN_49088276/assets/synthetic_002.png
diff --git a/recognition/TimeLOB_TimeGAN_49088276/assets/training-losses.png b/recognition/TimeLOB_TimeGAN_49088276/assets/training-losses.png
diff --git a/recognition/TimeLOB_TimeGAN_49088276/environment.yml b/recognition/TimeLOB_TimeGAN_49088276/environment.yml
@@ -0,0 +1,47 @@
+# ------------------------------------------------------------------------------
+# Project: TimeGAN (LOB / time-series)
+# Description: Reproducible environment for training, evaluation, and visualization
+# Maintainer: Radhesh Goel (Keys-I)
+# Created: 2025-11-10
+# Python: 3.13
+# Notes:
+# - Keep versions loosely pinned unless you need strict reproducibility.
+# - Use `conda env export --from-history` to capture only explicit deps later.
+# ------------------------------------------------------------------------------
+name: timegan
+
+channels:
+  - conda-forge
+
+variables:
+  PROJECT_NAME: "timegan"
+  PYTHONHASHSEED: "0"
+  MPLBACKEND: "Agg"
+  TORCH_SHOW_CPP_STACKTRACES: "1"
+
+dependencies:
+  - python=3.13
+  - numpy
+  - pandas
+  - scipy
+  - scikit-learn
+  - scikit-image
+  - seaborn
+  - matplotlib
+  - jupyterlab
+  - ipykernel
+  - pytorch
+  - torchvision
+  - pillow
+  - tqdm
+  - rich
+  - contextvars
+  - typing-extensions
+  - pip
+  - pip:
+
+# Notes:
+# - `contextvars` is built into Python 3.12; no backport needed.
+# - If you need GPU on Linux with CUDA 12.x, install these AFTER creating the env:
+#     conda install pytorch-cuda=12.1 -c nvidia -c conda-forge
+#   (Keep pytorch/torchvision versions as above to maintain ABI compatibility.)
diff --git a/recognition/TimeLOB_TimeGAN_49088276/scripts/npy_to_csv.py b/recognition/TimeLOB_TimeGAN_49088276/scripts/npy_to_csv.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+# npy_to_csv.py
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from rich.console import Console
+from rich.panel import Panel
+from rich.status import Status
+from rich.table import Table
+
+console = Console()
+
+
+def show_peek(df: pd.DataFrame, n: int) -> None:
+    if n <= 0:
+        return
+    n = min(n, len(df))
+    table = Table(title=f"Peek (first {n} rows)", show_lines=False)
+    for c in df.columns:
+        table.add_column(str(c))
+    for _, row in df.head(n).iterrows():
+        table.add_row(*[str(x) for x in row.to_list()])
+    console.print(table)
+
+
+def show_summary(df: pd.DataFrame, topk: int = 8) -> None:
+    desc = df.describe().T  # count, mean, std, min, 25%, 50%, 75%, max
+    # keep only first topk columns for display to keep it compact
+    cols = ["count", "mean", "std", "min", "50%", "max"]
+    table = Table(title="Summary stats (per column)", show_lines=False)
+    for c in ["column"] + cols:
+        table.add_column(c)
+    for name, row in desc.head(topk).iterrows():
+        table.add_row(
+            str(name),
+            *(f"{row[c]:.6g}" if pd.notnull(row[c]) else "nan" for c in cols),
+        )
+    console.print(table)
+    if len(desc) > topk:
+        console.print(f"[dim] {len(desc) - topk} more columns not shown[/dim]")
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(
+        description="Convert a 2D NumPy .npy array to CSV with rich peek/summary."
+    )
+    ap.add_argument(
+        "--in", dest="inp", default="./outs/gen_data.npy", help="Input .npy file"
+    )
+    ap.add_argument(
+        "--out", dest="outp", default="./outs/gen_data.csv", help="Output .csv file"
+    )
+    ap.add_argument("--prefix", default="f", help="Column name prefix (default: f)")
+    ap.add_argument(
+        "--peek",
+        type=int,
+        default=5,
+        help="Show first N rows in the console (0 = disable)",
+    )
+    ap.add_argument(
+        "--summary", action="store_true", help="Print per-column summary statistics"
+    )
+    ap.add_argument(
+        "--no-save", action="store_true", help="Do not write CSV (preview only)"
+    )
+    args = ap.parse_args()
+
+    inp = Path(args.inp)
+    outp = Path(args.outp)
+    outp.parent.mkdir(parents=True, exist_ok=True)
+
+    if not inp.exists():
+        console.print(f"[red]Input not found:[/red] {inp}")
+        raise SystemExit(1)
+
+    with Status(f"[cyan]Loading[/cyan] {inp}", console=console):
+        arr = np.load(inp)
+
+    if arr.ndim != 2:
+        console.print(f"[red]Expected a 2D array, got shape {arr.shape}[/red]")
+        raise SystemExit(2)
+
+    n_rows, n_cols = arr.shape
+    cols = [f"{args.prefix}{i}" for i in range(n_cols)]
+
+    console.print(
+        Panel.fit(f"[bold]Array shape[/bold]: {n_rows} × {n_cols}", border_style="cyan")
+    )
+
+    df = pd.DataFrame(arr, columns=cols)
+
+    # Peek and summary
+    show_peek(df, args.peek)
+    if args.summary:
+        show_summary(df)
+
+    # Save CSV unless suppressed
+    if not args.no - save:
+        with Status(f"[cyan]Writing CSV[/cyan] → {outp}", console=console):
+            df.to_csv(outp, index=False)
+        console.print(f"[green]Done:[/green] wrote [bold]{outp}[/bold]")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/recognition/TimeLOB_TimeGAN_49088276/scripts/run.sh b/recognition/TimeLOB_TimeGAN_49088276/scripts/run.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+# Train TimeGAN on UQ Rangpur and run sampling + visualisation
+
+#SBATCH --job-name=timegan-amzn
+#SBATCH --partition=a100
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=4
+#SBATCH --gres=gpu:1
+#SBATCH --time=24:00:00
+#SBATCH --output=logs/%x_%j.out
+#SBATCH --error=logs/%x_%j.err
+
+set -euo pipefail
+
+# Conda bootstrap (batch-safe)
+if [[ -z "${CONDA_EXE:-}" ]]; then
+  # adjust this path to your cluster's Anaconda/Miniconda install if needed
+  source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || {
+    echo "Conda not found. Please load your conda module or fix the path." >&2
+    exit 1
+  }
+fi
+
+# Create/update env only if absent (avoid costly rebuilds on every run)
+ENV_NAME="timegan"
+if ! conda env list | grep -qE "^\s*${ENV_NAME}\s"; then
+  conda env create -n "${ENV_NAME}" -f environment.yml
+else
+  conda env update -n "${ENV_NAME}" -f environment.yml --prune
+fi
+conda activate "${ENV_NAME}"
+
+# Project paths
+export PROJECT_ROOT="${PROJECT_ROOT:-$PWD}"
+export PYTHONPATH="$PROJECT_ROOT"
+
+echo "[info] PROJECT_ROOT=$PROJECT_ROOT"
+echo "[info] PYTHONPATH=$PYTHONPATH"
+
+# Training
+python -m src.train \
+  --dataset \
+    --seq-len 128 \
+    --data-dir ./data \
+    --orderbook-filename AMZN_2012-06-21_34200000_57600000_orderbook_10.csv \
+    --splits 0.7 0.85 1.0 \
+    --no-shuffle \
+  --modules \
+    --batch-size 128 \
+    --z-dim 40 \
+    --hidden-dim 64 \
+    --num-layer 3 \
+    --lr 1e-4 \
+    --beta1 0.5 \
+    --w-gamma 1.0 \
+    --w-g 1.0 \
+    --num-iters 25000
+
+# Sampling (generate flat rows)
+python -m src.predict \
+  --dataset \
+    --seq-len 128 \
+    --data-dir ./data \
+    --orderbook-filename AMZN_2012-06-21_34200000_57600000_orderbook_10.csv \
+    --splits 0.7 0.85 1.0 \
+  --modules \
+    --batch-size 128 \
+    --z-dim 40 \
+    --hidden-dim 64 \
+    --num-layer 3
+
+# Visualisation + metrics + latent walks
+python -m src.helpers.visualise \
+  --dataset \
+    --seq-len 128 \
+    --data-dir ./data \
+    --orderbook-filename AMZN_2012-06-21_34200000_57600000_orderbook_10.csv \
+  --modules \
+    --batch-size 128 \
+    --z-dim 40 \
+    --hidden-dim 64 \
+    --num-layer 3 \
+  --viz \
+    --samples 5 \
+    --out-dir ./outs/viz_run1 \
+    --cmap magma \
+    --dpi 240 \
+    --bins 128 \
+    --levels 10 \
+    --metrics-csv ./outs/viz_run1/metrics.csv \
+    --walk --walk-steps 8 --walk-mode cross --walk-prefix latent_walk