Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
848349e
chore(init): scaffold project structure
keys-i Oct 2, 2025
3203bb0
docs(module): improve top-level module docstring
keys-i Oct 2, 2025
0294626
build(env,src): add conda environment.yml (py3.13) and adopt src/ layout
keys-i Oct 2, 2025
a815817
code(script): fixed formatting issues
keys-i Oct 2, 2025
14b75d1
feat(data): add LOBSTERData with headerless support
keys-i Oct 2, 2025
227d74e
feat(dataset): support headerless LOBSTER CSVs + CLI smoke test
keys-i Oct 2, 2025
d286307
feat(dataset): add CSV summaries and CLI flags
keys-i Oct 2, 2025
0ac2cac
style(dataset): prettier dataset previews (aligned columns, rounded f…
keys-i Oct 2, 2025
0b70e99
feat(dataset): chat-style CLI output with bubbles and KV tables
keys-i Oct 2, 2025
a1a8fb5
feat(dataset): verbose chat-style CLI with diagnostics
keys-i Oct 3, 2025
07860dd
feat(dataset): pretty tables via tabulate; render cleanly in chat/box
keys-i Oct 3, 2025
790de5d
feat(dataset): colored, polished CLI with tabulated tables
keys-i Oct 3, 2025
a5e907b
docs(report): rewrite project overview for TimeGAN LOB generation
keys-i Oct 3, 2025
b08467b
refactor(dataset): split monolithic dataset.py into helpers pkg
keys-i Oct 3, 2025
f9e98b6
mm(preprocess): persist scaler/PCA/ZCA as .pkl for reproducible pipel…
keys-i Oct 3, 2025
0e437aa
feat(dataset): auto-detect headers + nested summary panels; GAN-ready…
keys-i Oct 3, 2025
788efe1
feat(timegan): add basic TimeGAN components (Embedder/Recovery/Genera…
keys-i Oct 4, 2025
53ee1ea
feat(train): add end-to-end TimeGAN trainer for LOBSTER windows
keys-i Oct 4, 2025
e2f1b74
feat(predict): add TimeGAN sampling & visualisation script (lines + h…
keys-i Oct 4, 2025
8be97b6
feat(dataset): simplify pipeline and switch to continuous MinMax scaler
keys-i Oct 5, 2025
bc932cc
refactor(dataset): simplify loader and convert to class-based API
keys-i Oct 6, 2025
eb60d72
feat(dataset): add DataOptions CLI; robust split handling; logging; f…
keys-i Oct 6, 2025
337ff87
feat(cli): add top-level Options router with --dataset passthrough
keys-i Oct 6, 2025
3cf8b0c
feat(metrics): add min–max scaling/inverse, noise sampler, spread/MPR…
keys-i Oct 6, 2025
259567c
feat(model): add TimeGAN components with LOB-aware scaffolding (Encod…
keys-i Oct 6, 2025
aeb67f4
feat(model): extend TimeGAN with training loop, ckpt I/O, KL check, a…
keys-i Oct 7, 2025
b4fbbc1
feat(model): wire full TimeGAN training/generation, checkpoints, and …
keys-i Oct 7, 2025
1291868
feat(train): add CLI entrypoint to run TimeGAN end-to-end
keys-i Oct 7, 2025
8cd2b76
feat(viz): add sampling script to generate and save synthetic LOB data
keys-i Oct 9, 2025
f979f97
feat(cli): nested Options with --dataset/--modules routers for data +…
keys-i Oct 10, 2025
c609c89
chore(types): use List[str] for argv hints instead of union list|str
keys-i Oct 10, 2025
5d908d9
feat(model): add OptLike Protocol + stronger typing; refine GRU init;…
keys-i Oct 11, 2025
d0b14ee
feat(train): use nested Options (dataset/modules), flatten val/test i…
keys-i Oct 14, 2025
9b55f94
feat(viz): sample synthetic LOB data using nested Options (dataset/mo…
keys-i Oct 16, 2025
d22db86
feat(viz): generate LOB depth heatmaps and compute SSIM for real vs. …
keys-i Oct 17, 2025
f38a8c6
fix(viz): import img_as_float from skimage.util
keys-i Oct 17, 2025
4535545
feat(scripts): add run.sh to test TimeGAN model
keys-i Oct 17, 2025
7de81f8
chore(env): polish environment.yml; refactor(constants): remove unuse…
keys-i Oct 17, 2025
5a2cf63
fix: minor bugs in CLI routing and data pipeline
keys-i Oct 17, 2025
24e6ed9
chore(cli): standardize flags to hyphen-case; remove underscore variants
keys-i Oct 18, 2025
f52ac81
fix(project): wire modules with absolute src.* imports; prevent neste…
keys-i Oct 19, 2025
8c2fd31
feat(train): add --num-iters flag and wire schedule into TimeGAN
keys-i Oct 20, 2025
27c28d5
chore(ui): integrate Richie into dataset loader for pretty CLI
keys-i Oct 20, 2025
76c9454
feat(cli): hook up train.py & predict.py to nested Options and src.* …
keys-i Oct 20, 2025
9734b42
feat(viz): refactor visualise with Richie UI and stable src.* imports
keys-i Oct 20, 2025
0cebe52
feat(scripts): make run.sh work out-of-the-box
keys-i Oct 20, 2025
b245a90
feat(ui): add moving ellipsis to status spinner with re-entrant safety
keys-i Oct 20, 2025
aba8fda
style(all): format codebase for readability
keys-i Oct 20, 2025
070bd1c
style(all): reformat codebase with Black and Ruff
keys-i Oct 20, 2025
7b89e9a
feat(metrics): add computation and reporting for SSIM, KL(spread), KL…
keys-i Oct 20, 2025
d6c0480
chore(logs): make output paths and phase actions explicit in console …
keys-i Oct 20, 2025
5cb6f9c
docs: add error/performance analysis, integrate curated references, a…
keys-i Oct 20, 2025
afd61c3
feat(viz): wire latent-space walks with flags and panels
keys-i Oct 21, 2025
243de34
feat(data): add dataset summariser CLI
keys-i Oct 21, 2025
35e629b
feat(docs): add TimeGAN model description (5 components) and three-ph…
keys-i Oct 21, 2025
0172d2e
docs(readme): add Table of Contents, project structure overview, and …
keys-i Oct 21, 2025
1e7066c
docs(readme): add Table of Contents, project structure overview, and …
keys-i Oct 21, 2025
9628bcf
docs(readme): add Dataset and Data Splits sections; references sectio…
keys-i Oct 21, 2025
7a85351
fix(docs): correct wrong BibTeX entry to TimeGAN LOBSTER citation
keys-i Oct 21, 2025
9c3bcde
docs(readme): add TimeGAN model architecture figure and refine archit…
keys-i Oct 21, 2025
2434199
docs(readme): add Training Processes section with methodology and setup
keys-i Oct 21, 2025
a42e209
feat(viz): report KL scores for spread and midprice returns
keys-i Oct 20, 2025
02e4a47
docs(code): add PEP 257 docstrings to predict.py and train.py; fix mi…
keys-i Oct 22, 2025
a2f9b67
docs: finalize report polish; add Results and Discussion sections
keys-i Oct 22, 2025
a358dfd
docs: tighten report wording and structure
keys-i Oct 22, 2025
9e4b14c
docs(code): add PEP 257 docstrings to helpers/ ; fix minor issues
keys-i Oct 22, 2025
49b0823
docs(code): add PEP 257 docstrings to dataset.py; fix minor issues
keys-i Oct 22, 2025
f109e9e
docs(code): add PEP 257 docstrings to modules.py; fix minor issues
keys-i Oct 22, 2025
b2cc591
chore(slurm): harden Rangpur training script and sync CLI
keys-i Oct 22, 2025
e9a861d
docs: fix README PDF rendering with Pandoc
keys-i Oct 22, 2025
fb595c7
docs(readme): fix loss table to render with MathJax in HTML
keys-i Oct 22, 2025
da12e62
docs(readme): fix loss table to render with MathJax in HTML
keys-i Oct 22, 2025
ea2d938
docs(readme): fix 'Components' section for proper GitHub rendering
keys-i Oct 22, 2025
3166780
docs(readme): fix 'Components' section for proper GitHub rendering
keys-i Oct 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions recognition/TimeLOB_TimeGAN_49088276/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# editor specific files
.idea/
.vscode/

# python cache files
./__pycache__/
*.pyc

# model specific files
weights/
outs/
data/
preproc_final_core/
*.csv
*.pt
*.pkl
outputs/
checkpoints/
logs/

# OS generated files
.DS_Store
843 changes: 843 additions & 0 deletions recognition/TimeLOB_TimeGAN_49088276/README.MD

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 47 additions & 0 deletions recognition/TimeLOB_TimeGAN_49088276/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# ------------------------------------------------------------------------------
# Project: TimeGAN (LOB / time-series)
# Description: Reproducible environment for training, evaluation, and visualization
# Maintainer: Radhesh Goel (Keys-I)
# Created: 2025-11-10
# Python: 3.13
# Notes:
# - Keep versions loosely pinned unless you need strict reproducibility.
# - Use `conda env export --from-history` to capture only explicit deps later.
# ------------------------------------------------------------------------------
name: timegan

channels:
- conda-forge

variables:
PROJECT_NAME: "timegan"
PYTHONHASHSEED: "0"
MPLBACKEND: "Agg"
TORCH_SHOW_CPP_STACKTRACES: "1"

dependencies:
- python=3.13
- numpy
- pandas
- scipy
- scikit-learn
- scikit-image
- seaborn
- matplotlib
- jupyterlab
- ipykernel
- pytorch
- torchvision
- pillow
- tqdm
- rich
- contextvars
- typing-extensions
- pip
- pip:

# Notes:
# - `contextvars` is built into Python 3.12; no backport needed.
# - If you need GPU on Linux with CUDA 12.x, install these AFTER creating the env:
# conda install pytorch-cuda=12.1 -c nvidia -c conda-forge
# (Keep pytorch/torchvision versions as above to maintain ABI compatibility.)
109 changes: 109 additions & 0 deletions recognition/TimeLOB_TimeGAN_49088276/scripts/npy_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python3
# npy_to_csv.py
from __future__ import annotations

import argparse
from pathlib import Path

import numpy as np
import pandas as pd
from rich.console import Console
from rich.panel import Panel
from rich.status import Status
from rich.table import Table

console = Console()


def show_peek(df: pd.DataFrame, n: int) -> None:
if n <= 0:
return
n = min(n, len(df))
table = Table(title=f"Peek (first {n} rows)", show_lines=False)
for c in df.columns:
table.add_column(str(c))
for _, row in df.head(n).iterrows():
table.add_row(*[str(x) for x in row.to_list()])
console.print(table)


def show_summary(df: pd.DataFrame, topk: int = 8) -> None:
desc = df.describe().T # count, mean, std, min, 25%, 50%, 75%, max
# keep only first topk columns for display to keep it compact
cols = ["count", "mean", "std", "min", "50%", "max"]
table = Table(title="Summary stats (per column)", show_lines=False)
for c in ["column"] + cols:
table.add_column(c)
for name, row in desc.head(topk).iterrows():
table.add_row(
str(name),
*(f"{row[c]:.6g}" if pd.notnull(row[c]) else "nan" for c in cols),
)
console.print(table)
if len(desc) > topk:
console.print(f"[dim] {len(desc) - topk} more columns not shown[/dim]")


def main() -> None:
ap = argparse.ArgumentParser(
description="Convert a 2D NumPy .npy array to CSV with rich peek/summary."
)
ap.add_argument(
"--in", dest="inp", default="./outs/gen_data.npy", help="Input .npy file"
)
ap.add_argument(
"--out", dest="outp", default="./outs/gen_data.csv", help="Output .csv file"
)
ap.add_argument("--prefix", default="f", help="Column name prefix (default: f)")
ap.add_argument(
"--peek",
type=int,
default=5,
help="Show first N rows in the console (0 = disable)",
)
ap.add_argument(
"--summary", action="store_true", help="Print per-column summary statistics"
)
ap.add_argument(
"--no-save", action="store_true", help="Do not write CSV (preview only)"
)
args = ap.parse_args()

inp = Path(args.inp)
outp = Path(args.outp)
outp.parent.mkdir(parents=True, exist_ok=True)

if not inp.exists():
console.print(f"[red]Input not found:[/red] {inp}")
raise SystemExit(1)

with Status(f"[cyan]Loading[/cyan] {inp}", console=console):
arr = np.load(inp)

if arr.ndim != 2:
console.print(f"[red]Expected a 2D array, got shape {arr.shape}[/red]")
raise SystemExit(2)

n_rows, n_cols = arr.shape
cols = [f"{args.prefix}{i}" for i in range(n_cols)]

console.print(
Panel.fit(f"[bold]Array shape[/bold]: {n_rows} × {n_cols}", border_style="cyan")
)

df = pd.DataFrame(arr, columns=cols)

# Peek and summary
show_peek(df, args.peek)
if args.summary:
show_summary(df)

# Save CSV unless suppressed
if not args.no - save:
with Status(f"[cyan]Writing CSV[/cyan] → {outp}", console=console):
df.to_csv(outp, index=False)
console.print(f"[green]Done:[/green] wrote [bold]{outp}[/bold]")


if __name__ == "__main__":
main()
93 changes: 93 additions & 0 deletions recognition/TimeLOB_TimeGAN_49088276/scripts/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/bin/bash

# Train TimeGAN on UQ Rangpur and run sampling + visualisation

#SBATCH --job-name=timegan-amzn
#SBATCH --partition=a100
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=4
#SBATCH --gres=gpu:1
#SBATCH --time=24:00:00
#SBATCH --output=logs/%x_%j.out
#SBATCH --error=logs/%x_%j.err

set -euo pipefail

# Conda bootstrap (batch-safe)
if [[ -z "${CONDA_EXE:-}" ]]; then
# adjust this path to your cluster's Anaconda/Miniconda install if needed
source "$HOME/miniconda3/etc/profile.d/conda.sh" 2>/dev/null || {
echo "Conda not found. Please load your conda module or fix the path." >&2
exit 1
}
fi

# Create/update env only if absent (avoid costly rebuilds on every run)
ENV_NAME="timegan"
if ! conda env list | grep -qE "^\s*${ENV_NAME}\s"; then
conda env create -n "${ENV_NAME}" -f environment.yml
else
conda env update -n "${ENV_NAME}" -f environment.yml --prune
fi
conda activate "${ENV_NAME}"

# Project paths
export PROJECT_ROOT="${PROJECT_ROOT:-$PWD}"
export PYTHONPATH="$PROJECT_ROOT"

echo "[info] PROJECT_ROOT=$PROJECT_ROOT"
echo "[info] PYTHONPATH=$PYTHONPATH"

# Training
python -m src.train \
--dataset \
--seq-len 128 \
--data-dir ./data \
--orderbook-filename AMZN_2012-06-21_34200000_57600000_orderbook_10.csv \
--splits 0.7 0.85 1.0 \
--no-shuffle \
--modules \
--batch-size 128 \
--z-dim 40 \
--hidden-dim 64 \
--num-layer 3 \
--lr 1e-4 \
--beta1 0.5 \
--w-gamma 1.0 \
--w-g 1.0 \
--num-iters 25000

# Sampling (generate flat rows)
python -m src.predict \
--dataset \
--seq-len 128 \
--data-dir ./data \
--orderbook-filename AMZN_2012-06-21_34200000_57600000_orderbook_10.csv \
--splits 0.7 0.85 1.0 \
--modules \
--batch-size 128 \
--z-dim 40 \
--hidden-dim 64 \
--num-layer 3

# Visualisation + metrics + latent walks
python -m src.helpers.visualise \
--dataset \
--seq-len 128 \
--data-dir ./data \
--orderbook-filename AMZN_2012-06-21_34200000_57600000_orderbook_10.csv \
--modules \
--batch-size 128 \
--z-dim 40 \
--hidden-dim 64 \
--num-layer 3 \
--viz \
--samples 5 \
--out-dir ./outs/viz_run1 \
--cmap magma \
--dpi 240 \
--bins 128 \
--levels 10 \
--metrics-csv ./outs/viz_run1/metrics.csv \
--walk --walk-steps 8 --walk-mode cross --walk-prefix latent_walk
Loading