Skip to content

[BUG] Inconsistencies between image crop sizes for dataset Platy CBG #36

@postnubilaphoebus

Description

@postnubilaphoebus

Describe the bug
When running the data jupyter notebook from examples as a python script, I get images with different crop sizes (about half being 16 x 80 x 80, the other half being 24 x 80 x 80). This results in an error during torch.stack when attempting training. Also, this seems different from the crop sizes in the paper (https://www.sciencedirect.com/science/article/pii/S1361841522001700), which lists 32 * 136 * 136 for this dataset. Can you comment on what might be going wrong?
My apologies if I missed something.

To Reproduce

from tqdm import tqdm
from glob import glob
import tifffile
import numpy as np
import os
from EmbedSeg.utils.preprocess_data import extract_data, split_train_val, split_train_test, get_data_properties
from EmbedSeg.utils.generate_crops import *
from EmbedSeg.utils.visualize import visualize_crop_3d
import json
from matplotlib.colors import ListedColormap

data_dir = '../../../data'
project_name = 'Platynereis-Nuclei-CBG'

extract_data(
    zip_url = 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-Nuclei-CBG.zip',
    data_dir = data_dir,
    project_name = project_name,
)

split_train_test(
    data_dir = data_dir,
    project_name = project_name, 
    train_test_name = 'train',
    subset = 2, 
    by_fraction = False,
    seed = 0)

split_train_val(
    data_dir = data_dir,
    project_name = project_name, 
    train_val_name = 'train',
    subset = 2,
    by_fraction = False,
    seed = 0)

data_properties_dir = get_data_properties(data_dir, project_name, train_val_name=['train', 'val'], 
                                          test_name=['test'], mode='3d')

data_properties_dir['data_type']='16-bit'
data_properties_dir['pixel_size_x_microns']=0.406 # set equal to voxel size (microns) in x dimension
data_properties_dir['pixel_size_y_microns']=0.406 # set equal to voxel size (microns) in y dimension
data_properties_dir['pixel_size_z_microns']=2.031 # set equal to voxel size (microns) in z dimension

with open('data_properties.json', 'w') as outfile:
    json.dump(data_properties_dir, outfile)
    print("Dataset properies of the `{}` dataset is saved to `data_properties.json`".format(project_name))

center = 'medoid' # 'medoid', 'centroid'
try:
    assert center in {'medoid', 'centroid'}
    print("Spatial Embedding Location chosen as : {}".format(center))
except AssertionError as e:
    e.args += ('Please specify center as one of : {"medoid", "centroid"}', 42)
    raise

n_sigma = 5

def round_up_8(x):
    return (x.astype(int)+7) & (-8)

crops_dir = './crops'
data_subsets = ['train', 'val'] 
crop_size_z = round_up_8(data_properties_dir['avg_object_size_z'] + n_sigma*data_properties_dir['stdev_object_size_z'])
crop_size_y = np.maximum(round_up_8(data_properties_dir['avg_object_size_y'] + n_sigma*data_properties_dir['stdev_object_size_y']),
round_up_8(data_properties_dir['avg_object_size_x'] + n_sigma*data_properties_dir['stdev_object_size_x']))
crop_size_x = crop_size_y
print("Crop size in x and y will be set equal to {}. Crop size in z is set equal to {}".format(crop_size_x, crop_size_z))

anisotropy_factor = data_properties_dir['pixel_size_z_microns']/data_properties_dir['pixel_size_x_microns']
speed_up = 2

norm = 'min-max-percentile'
try:
    assert norm in {'min-max-percentile', 'mean-std', 'absolute'}
    print("Normalization chosen as : {}".format(norm))
except AssertionError as e:
    e.args += ('Please specify norm as one of : {"min-max-percentile", "mean-std", "absolute"}', 42)
    raise

for data_subset in data_subsets:
    image_dir = os.path.join(data_dir, project_name, data_subset, 'images')
    instance_dir = os.path.join(data_dir, project_name, data_subset, 'masks')
    image_names = sorted(glob(os.path.join(image_dir, '*.tif'))) 
    instance_names = sorted(glob(os.path.join(instance_dir, '*.tif')))  
    for i in tqdm(np.arange(len(image_names))):
        process_3d(image_names[i], instance_names[i], os.path.join(crops_dir, project_name), data_subset, 
                crop_size_x = crop_size_x, crop_size_y = crop_size_y, crop_size_z = crop_size_z,
                center = center, anisotropy_factor = anisotropy_factor, speed_up = speed_up, 
                norm=norm, data_type = data_properties_dir['data_type'])
    print("Cropping of images, instances and centre_images for data_subset = `{}` done!".format(data_subset))

normalization = {}
normalization['data_type']=data_properties_dir['data_type']
normalization['norm']=norm
with open('normalization.json', 'w') as outfile:
    json.dump(normalization, outfile)
    print("Normalization properties of the `{}` dataset is saved to `normalization.json`".format(project_name))

Expected behavior
Equal crop sizes for the whole dataset, as well as crop sizes matching the dimensions mentioned in the paper.

Desktop (please complete the following information):

  • OS: SUSE Linux Enterprise Server 15 SP6

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions