diff --git a/.gitignore b/.gitignore index e616337..1cc6708 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,8 @@ dataset/s3dis/Stanford3dDataset_v1.2 dataset/stpls3d/train dataset/stpls3d/val dataset/stpls3d/Synthetic_v3_InstanceSegmentation + +# ymir dataset files +dataset/ymir/overfit_no_filter +**/*.h5 +**/*.pth diff --git a/configs/softgroup_ymir_backbone_overfit_no_filter.yaml b/configs/softgroup_ymir_backbone_overfit_no_filter.yaml new file mode 100644 index 0000000..1bd1848 --- /dev/null +++ b/configs/softgroup_ymir_backbone_overfit_no_filter.yaml @@ -0,0 +1,81 @@ +model: + channels: 16 + num_blocks: 7 + semantic_classes: 2 + instance_classes: 1 + sem2ins_classes: [] + semantic_only: True + semantic_weight: [1.0, 1.0, 44.0, 21.9, 1.8, 25.1, 31.5, 21.8, 24.0, 54.4, 114.4, + 81.2, 43.6, 9.7, 22.4] # TODO: What is this?! + with_coords: False + ignore_label: -100 + grouping_cfg: + score_thr: 0.2 + radius: 0.9 # TODO: depending on the scale + mean_active: 3 + # TODO: Insert the computed values + class_numpoint_mean: [-1., 10408., 58., 124., 1351., 162., 430., 1090., 451., 26., 43., + 61., 39., 109., 1239] + npoint_thr: 0.05 # absolute if class_numpoint == -1, relative if class_numpoint != -1 + ignore_classes: [0] # TODO: Should we add the walls here? + instance_voxel_cfg: + scale: 3 # TODO: Adjust (?) + spatial_shape: 20 + train_cfg: + max_proposal_num: 200 # TODO: probably needs to be adjusted to the number of pores in one sample + pos_iou_thr: 0.5 + test_cfg: + x4_split: False + cls_score_thr: 0.001 + mask_score_thr: -0.5 + min_npoint: 100 + fixed_modules: [] + +data: + train: + type: 'ymir' + data_root: 'dataset/ymir/overfit_no_filter/prepared' + prefix: 'train' + suffix: '.pth' + training: True + repeat: 4 + voxel_cfg: + scale: 3 # TODO: Adjust + spatial_shape: [128, 512] + max_npoint: 250000 + min_npoint: 5000 + test: + type: 'ymir' + data_root: 'dataset/ymir/overfit_no_filter/prepared' + prefix: 'val' + suffix: '.pth' + training: False + voxel_cfg: + scale: 3 # TODO: Adjust + spatial_shape: [128, 512] + max_npoint: 250000 + min_npoint: 5000 + +dataloader: + train: + batch_size: 4 + num_workers: 4 + test: + batch_size: 1 + num_workers: 1 + +optimizer: + type: 'Adam' + lr: 0.004 + +save_cfg: + semantic: True + offset: True + instance: True + +fp16: False +epochs: 20 +step_epoch: 20 +save_freq: 4 +pretrain: '' +work_dir: '' diff --git a/dataset/ymir/prepare_data.py b/dataset/ymir/prepare_data.py new file mode 100644 index 0000000..99bbb92 --- /dev/null +++ b/dataset/ymir/prepare_data.py @@ -0,0 +1,108 @@ +from random import sample +from typing import List +import torch +import numpy as np +import glob +import h5py +import os + + +def convertToPointCloud( + files: List[str], + outPutFolder: str, + split: str = 'train', + samplePoints: int = 0, # no sampling +): + + train_instance_numpoints = 0 + train_instances = 0 + for file in files: + name = os.path.basename(file).strip('.h5') + outFilePath = os.path.join(outPutFolder, name + '.pth') + # read in file + with h5py.File(file, "r") as data: + + raw = np.array(data['raw']) + colors = raw.flatten() # column first + colors = np.repeat(colors[:, np.newaxis], 3, axis=1) + colors = colors.astype(np.float32) + # normalize + colors = colors / 32767.5 - 1 + + coords = np.mgrid[ + 0:1:raw.shape[0] * 1j, + 0:1:raw.shape[1] * 1j, + 0:1:raw.shape[2] * 1j, + ].reshape(3, -1).T + coords = coords.astype(np.float32) + + # sampling of points + samples = np.arange(0, coords.shape[0]) + if samplePoints > 0: + samples = np.random.choice(coords.shape[0], samplePoints) + + colors = colors[samples] + coords = coords[samples] + + if split != 'test': + # seems a bit weird, but they used float64 fort the labels so + # let's use it as well + sem_labels = np.array(data['foreground']).flatten().astype(np.float64) + # map the background value (= 0 i.e. sugar walls) to -100 + sem_labels[sem_labels == 0] = -100 + + # seems a bit weird, but they used float64 fort the labels so + # let's use it as well + instance_labels = np.array(data['label']).flatten().astype(np.float64) + + # sampling + sem_labels = sem_labels[samples] + instance_labels = instance_labels[samples] + + # keep track of the mean number of points per instance for the training dataset + # NOTE: This does only work as long as we have one type of class + if split == 'train': + values, counts = np.unique( + instance_labels, return_counts=True) + assert values[0] == 0 + print(values, counts) + train_instance_numpoints += np.sum(counts[1:]) + train_instances += len(counts[1:]) + + torch.save((coords, colors, sem_labels, + instance_labels), outFilePath) + else: + torch.save((coords, colors), outFilePath) + + if split == 'train': + assert train_instances > 0 + print('class_numpoints_mean: ', train_instance_numpoints / train_instances) + + +def getFiles(files, fileSplit): + res = [] + for filePath in files: + name = os.path.basename(filePath) + num = name[:2] if name[:2].isdigit() else name[:1] + if int(num) in fileSplit: + res.append(filePath) + return res + + +if __name__ == '__main__': + data_folder = 'overfit_no_filter' + split = 'train' + trainFiles = sorted(glob.glob(data_folder + "/" + split + '/*.h5')) + print(trainFiles) + assert len(trainFiles) > 0 + trainOutDir = split + os.makedirs(trainOutDir, exist_ok=True) + convertToPointCloud(trainFiles, trainOutDir, split, samplePoints=35145) + + split = 'val' + valFiles = sorted(glob.glob(data_folder + "/" + split + '/*.h5')) + print(valFiles) + assert len(valFiles) > 0 + valOutDir = split + os.makedirs(valOutDir, exist_ok=True) + convertToPointCloud(valFiles, valOutDir, split, samplePoints=35145) diff --git a/docs/config_explanation.md b/docs/config_explanation.md index 1facd67..04be8f9 100644 --- a/docs/config_explanation.md +++ b/docs/config_explanation.md @@ -1,4 +1,4 @@ -``` +``` yaml model: channels: 32 # number of base channel for the backbone network num_blocks: 7 # number of backbone blocks