Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 76 additions & 61 deletions minecraft_copilot_ml/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,40 @@
from sklearn.model_selection import train_test_split # type: ignore
from torch.utils.data import DataLoader
from tqdm import tqdm
from torchcfm.models.unet import UNetModel # type: ignore[import-untyped]
import json

from minecraft_copilot_ml.data_loader import (
MinecraftSchematicsDataset,
MinecraftBlockMapDataset,
MinecraftSchematicsDatasetItemType,
get_working_files_and_unique_blocks_and_counts,
list_schematic_files_in_folder,
get_unique_blocks_from_block_maps,
list_files_in_folder,
)
from minecraft_copilot_ml.model import UNet3d
from minecraft_copilot_ml.model import LightningUNetModel


def export_to_onnx(model: UNet3d, path_to_output: str) -> None:
def export_to_onnx(model: LightningUNetModel, channel_n: int, path_to_output: str) -> None:
model.eval()
model.to("cuda" if torch.cuda.is_available() else "cpu")
torch.onnx.export(
model,
torch.randn(1, 1, 16, 16, 16).to("cuda" if torch.cuda.is_available() else "cpu"),
(
torch.randn(1).to("cuda" if torch.cuda.is_available() else "cpu"),
torch.randn(1, channel_n, 16, 16, 16).to("cuda" if torch.cuda.is_available() else "cpu"),
),
path_to_output,
input_names=["input"],
output_names=["output"],
input_names=["timestep", "block_map"],
# https://onnxruntime.ai/docs/reference/compatibility.html
opset_version=17,
output_names=["output"],
)


def main(argparser: argparse.ArgumentParser) -> None:
path_to_schematics: str = argparser.parse_args().path_to_schematics
path_to_block_maps: str = "/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/block_maps"
path_to_block_map_masks: str = "/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/block_map_masks"
path_to_output: str = argparser.parse_args().path_to_output
epochs: int = argparser.parse_args().epochs
batch_size: int = argparser.parse_args().batch_size
Expand All @@ -47,85 +57,90 @@ def main(argparser: argparse.ArgumentParser) -> None:
if not os.path.exists(path_to_output):
os.makedirs(path_to_output)

schematics_list_files = list_schematic_files_in_folder(path_to_schematics)
schematics_list_files = sorted(schematics_list_files)
block_map_list_files = list_files_in_folder(path_to_block_maps)
block_map_list_files = sorted(block_map_list_files)
block_map_mask_list_files = list_files_in_folder(path_to_block_map_masks)
block_map_mask_list_files = sorted(block_map_mask_list_files)

start = 0
end = len(schematics_list_files)
end = len(block_map_list_files)
if dataset_start is not None:
start = dataset_start
if dataset_limit is not None:
end = dataset_limit
schematics_list_files = schematics_list_files[start:end]
# Set the dictionary size to the number of unique blocks in the dataset.
# And also select the right files to load.
unique_blocks_dict, unique_counts_coefficients, loaded_schematic_files = (
get_working_files_and_unique_blocks_and_counts(schematics_list_files)
)

logger.info(f"Unique blocks: {unique_blocks_dict}")
logger.info(f"Number of unique blocks: {len(unique_blocks_dict)}")
logger.info(f"Number of loaded schematics files: {len(loaded_schematic_files)}")
logger.info(f"Unique counts coefficients: {unique_counts_coefficients}")
block_map_list_files = block_map_list_files[start:end]
block_map_mask_list_files = block_map_mask_list_files[start:end]

train_schematics_list_files, test_schematics_list_files = train_test_split(
loaded_schematic_files, test_size=0.2, random_state=42
unique_blocks_dict = json.load(
open("/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/unique_blocks.json", "r")
)
train_schematics_dataset = MinecraftSchematicsDataset(train_schematics_list_files)
val_schematics_dataset = MinecraftSchematicsDataset(test_schematics_list_files)
# (
# train_block_map_list_files,
# # test_block_map_list_files,
# train_block_map_mask_list_files,
# # test_block_map_mask_list_files,
# ) = train_test_split(block_map_list_files, block_map_mask_list_files, test_size=0.2, random_state=42)
train_block_map_dataset = MinecraftBlockMapDataset(block_map_list_files, block_map_mask_list_files)
# val_block_map_dataset = MinecraftBlockMapDataset(test_block_map_list_files, test_block_map_mask_list_files)

def collate_fn(batch: List[MinecraftSchematicsDatasetItemType]) -> MinecraftSchematicsDatasetItemType:
block_map, noisy_block_map, mask, loss_mask = zip(*batch)
return np.stack(block_map), np.stack(noisy_block_map), np.stack(mask), np.stack(loss_mask)

train_schematics_dataloader = DataLoader(
train_schematics_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn
train_block_map_dataset,
batch_size=batch_size,
shuffle=True,
collate_fn=collate_fn,
# num_workers=os.cpu_count() - 1,
pin_memory=True,
sampler=None
)
val_schematics_dataloader = DataLoader(val_schematics_dataset, batch_size=batch_size, collate_fn=collate_fn)

model = UNet3d(unique_blocks_dict, unique_counts_coefficients=unique_counts_coefficients)
# val_schematics_dataloader = DataLoader(val_block_map_dataset, batch_size=1, collate_fn=collate_fn)

unet_model = UNetModel(
dims=3,
dim=[len(unique_blocks_dict), 16, 16, 16],
num_res_blocks=2,
num_channels=32,
channel_mult=(1, 2, 2, 2),
dropout=0.1,
num_heads=4,
resblock_updown=True,
updown=False,
)
model = LightningUNetModel(unet_model, unique_blocks_dict)
csv_logger = CSVLogger(save_dir=path_to_output)
model_checkpoint = ModelCheckpoint(path_to_output, monitor="val_loss", save_top_k=1, save_last=True, mode="min")
trainer = pl.Trainer(logger=csv_logger, callbacks=model_checkpoint, max_epochs=epochs, log_every_n_steps=1)
trainer.fit(model, train_schematics_dataloader, val_schematics_dataloader)
model_checkpoint = ModelCheckpoint(path_to_output, monitor="train_loss", save_top_k=1, save_last=True, mode="min")
trainer = pl.Trainer(
logger=csv_logger,
callbacks=model_checkpoint,
max_epochs=epochs,
log_every_n_steps=1,
accelerator="gpu",
devices=1,
)
trainer.fit(
model,
train_schematics_dataloader,
# val_schematics_dataloader,
)

# Save the best and last model locally
logger.info(f"Best val_loss is: {model_checkpoint.best_model_score}")
best_model = UNet3d.load_from_checkpoint(
model_checkpoint.best_model_path,
unique_blocks_dict=unique_blocks_dict,
unique_counts_coefficients=unique_counts_coefficients,
best_model = LightningUNetModel.load_from_checkpoint(
model_checkpoint.best_model_path, model=unet_model, unique_blocks_dict=unique_blocks_dict
)
torch.save(best_model, os.path.join(path_to_output, "best_model.pth"))
last_model = UNet3d.load_from_checkpoint(
model_checkpoint.last_model_path,
unique_blocks_dict=unique_blocks_dict,
unique_counts_coefficients=unique_counts_coefficients,
last_model = LightningUNetModel.load_from_checkpoint(
model_checkpoint.last_model_path, model=unet_model, unique_blocks_dict=unique_blocks_dict
)
torch.save(last_model, os.path.join(path_to_output, "last_model.pth"))
export_to_onnx(best_model, os.path.join(path_to_output, "best_model.onnx"))
export_to_onnx(last_model, os.path.join(path_to_output, "last_model.onnx"))
export_to_onnx(best_model, len(unique_blocks_dict), os.path.join(path_to_output, "best_model.onnx"))
export_to_onnx(last_model, len(unique_blocks_dict), os.path.join(path_to_output, "last_model.onnx"))
with open(os.path.join(path_to_output, "unique_blocks_dict.json"), "w") as f:
json.dump(unique_blocks_dict, f)

# Save the best and last model to S3
s3_client = boto3.client(
"s3",
region_name="eu-west-3",
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
)
s3_client.upload_file(os.path.join(path_to_output, "best_model.pth"), "minecraft-copilot-models", "best_model.pth")
s3_client.upload_file(os.path.join(path_to_output, "last_model.pth"), "minecraft-copilot-models", "last_model.pth")
s3_client.upload_file(
os.path.join(path_to_output, "best_model.onnx"), "minecraft-copilot-models", "best_model.onnx"
)
s3_client.upload_file(
os.path.join(path_to_output, "last_model.onnx"), "minecraft-copilot-models", "last_model.onnx"
)
s3_client.upload_file(
os.path.join(path_to_output, "unique_blocks_dict.json"), "minecraft-copilot-models", "unique_blocks_dict.json"
)


if __name__ == "__main__":
argparser = argparse.ArgumentParser()
Expand Down
79 changes: 54 additions & 25 deletions minecraft_copilot_ml/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
"4766.schematic",
"10380.schematic",
"12695.schematic",
"2985.schematic",
"5096.schematic",
"5460.schematic",
]


Expand Down Expand Up @@ -195,30 +198,36 @@ def __getitem__(self, idx: int) -> MinecraftSchematicsDatasetItemType:
minimum_height,
minimum_depth,
) = get_random_block_map_and_mask_coordinates(numpy_minecraft_map, 16, 16, 16)
focused_block_map = block_map[
random_roll_x_value : random_roll_x_value + minimum_width,
random_y_height_value : random_y_height_value + minimum_height,
random_roll_z_value : random_roll_z_value + minimum_depth,
]
focused_noisy_block_map, unraveled_indices_of_noise = create_noisy_block_map(focused_block_map)
noisy_block_map = block_map.copy()
noisy_block_map[
random_roll_x_value : random_roll_x_value + minimum_width,
random_y_height_value : random_y_height_value + minimum_height,
random_roll_z_value : random_roll_z_value + minimum_depth,
] = focused_noisy_block_map
block_map_mask = np.zeros((16, 16, 16), dtype=bool)
block_map_mask[
random_roll_x_value : random_roll_x_value + minimum_width,
random_y_height_value : random_y_height_value + minimum_height,
random_roll_z_value : random_roll_z_value + minimum_depth,
] = True
loss_mask = np.zeros((16, 16, 16), dtype=bool)
loss_mask[unraveled_indices_of_noise] = True
return block_map, noisy_block_map, block_map_mask, loss_mask
return block_map, None, block_map_mask, None


class MinecraftBlockMapDataset(Dataset):
def __init__(
self,
block_map_list_files: List[str],
block_map_mask_list_files: List[str],
) -> None:
self.block_map_list_files = sorted(block_map_list_files)
self.block_map_mask_list_files = sorted(block_map_mask_list_files)

def __len__(self) -> int:
return len(self.block_map_list_files)

def __getitem__(self, idx: int) -> MinecraftSchematicsDatasetItemType:
block_map_file = self.block_map_list_files[idx]
block_map = np.load(block_map_file)
block_map_mask_file = self.block_map_mask_list_files[idx]
block_map_mask = np.load(block_map_mask_file)
return block_map, None, block_map_mask, None


def list_schematic_files_in_folder(path_to_schematics: str) -> list[str]:
def list_files_in_folder(path_to_schematics: str) -> list[str]:
schematics_list_files = []
tqdm_os_walk = tqdm(os.walk(path_to_schematics), smoothing=0)
for dirpath, _, filenames in tqdm_os_walk:
Expand All @@ -233,18 +242,18 @@ def get_working_files_and_unique_blocks_and_counts(
schematics_list_files: list[str],
) -> Tuple[Dict[str, int], np.ndarray, list[str]]:
unique_blocks: Set[str] = set()
unique_counts: Dict[str, int] = {}
# unique_counts: Dict[str, int] = {}
loaded_schematic_files: List[str] = []
tqdm_list_files = tqdm(schematics_list_files, smoothing=0)
for nbt_file in tqdm_list_files:
tqdm_list_files.set_description(f"Processing {nbt_file}")
try:
numpy_minecraft_map = nbt_to_numpy_minecraft_map(nbt_file)
unique_blocks_in_map, unique_counts_in_map = np.unique(numpy_minecraft_map, return_counts=True)
for block, count in zip(unique_blocks_in_map, unique_counts_in_map):
if block not in unique_counts:
unique_counts[block] = 0
unique_counts[block] += count
unique_blocks_in_map = np.unique(numpy_minecraft_map)
# for block, count in zip(unique_blocks_in_map, unique_counts_in_map):
# if block not in unique_counts:
# unique_counts[block] = 0
# unique_counts[block] += count
for block in unique_blocks_in_map:
if block not in unique_blocks:
logger.info(f"Found new block: {block}")
Expand All @@ -255,6 +264,26 @@ def get_working_files_and_unique_blocks_and_counts(
logger.exception(e)
continue
unique_blocks_dict = {block: idx for idx, block in enumerate(unique_blocks)}
unique_counts_coefficients = np.array([unique_counts[block] for block in unique_blocks_dict])
unique_counts_coefficients = unique_counts_coefficients.max() / unique_counts_coefficients
return unique_blocks_dict, unique_counts_coefficients, loaded_schematic_files
return unique_blocks_dict, np.array([1]), loaded_schematic_files


def get_unique_blocks_from_block_maps(
block_map_list_files: list[str],
) -> Dict[str, int]:
unique_blocks: Set[str] = set()
tqdm_list_files = tqdm(block_map_list_files, smoothing=0)
for block_map_file in tqdm_list_files:
tqdm_list_files.set_description(f"Processing {block_map_file}")
try:
block_map = np.load(block_map_file, allow_pickle=True)
unique_blocks_in_map = np.unique(block_map)
for block in unique_blocks_in_map:
if block not in unique_blocks:
logger.info(f"Found new block: {block}")
unique_blocks = unique_blocks.union(unique_blocks_in_map)
except Exception as e:
logger.error(f"Could not load {block_map_file}")
logger.exception(e)
continue
unique_blocks_dict = {block: idx for idx, block in enumerate(unique_blocks)}
return unique_blocks_dict
52 changes: 52 additions & 0 deletions minecraft_copilot_ml/generate_block_maps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import os
from typing import Optional
import numpy as np
from tqdm import tqdm
import json

from minecraft_copilot_ml.data_loader import (
get_working_files_and_unique_blocks_and_counts,
list_files_in_folder,
MinecraftSchematicsDataset,
)

if __name__ == "__main__":
dataset_start: Optional[int] = 0
dataset_limit: Optional[int] = 4096
if not os.path.exists("/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/block_maps"):
os.makedirs("/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/block_maps")
if not os.path.exists("/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/block_map_masks"):
os.makedirs("/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/block_map_masks")
schematics_list_files = list_files_in_folder("/home/mehdi/minecraft-copilot-ml/datasets/minecraft-schematics")
schematics_list_files = sorted(schematics_list_files)

if dataset_start is not None:
start = dataset_start
if dataset_limit is not None:
end = dataset_limit
schematics_list_files = schematics_list_files[start:end]
# Set the dictionary size to the number of unique blocks in the dataset.
# And also select the right files to load.
unique_blocks_dict, _, loaded_schematic_files = get_working_files_and_unique_blocks_and_counts(
schematics_list_files
)
minecraft_schematic_dataset = MinecraftSchematicsDataset(loaded_schematic_files)
tqdm_minecraft_schematic_dataset = tqdm(minecraft_schematic_dataset, smoothing=0)
for idx, (block_map, _, block_map_mask, _) in enumerate(tqdm_minecraft_schematic_dataset):
tqdm_minecraft_schematic_dataset.set_description(f"Processing block map {tqdm_minecraft_schematic_dataset.n}")
block_map = np.vectorize(lambda x: unique_blocks_dict.get(x, unique_blocks_dict["minecraft:air"]))(block_map)
block_map = block_map.astype(np.int64)
np.save(
f"/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/block_maps/{idx}.npy",
block_map,
allow_pickle=False,
)
np.save(
f"/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/block_map_masks/{idx}.npy",
block_map_mask,
allow_pickle=False,
)
json.dump(
unique_blocks_dict,
open("/home/mehdi/minecraft-copilot-ml/cut_datasets/minecraft-schematics/unique_blocks.json", "w"),
)
267 changes: 161 additions & 106 deletions minecraft_copilot_ml/metrics_graph.ipynb

Large diffs are not rendered by default.

Loading