added ymir datasest preparation

This commit is contained in:
botastic 2022-07-17 21:42:01 +02:00
parent a87940664d
commit 04358926d0
4 changed files with 195 additions and 1 deletions

5
.gitignore vendored
View File

@ -80,3 +80,8 @@ dataset/s3dis/Stanford3dDataset_v1.2
dataset/stpls3d/train
dataset/stpls3d/val
dataset/stpls3d/Synthetic_v3_InstanceSegmentation
# ymir dataset files
dataset/ymir/overfit_no_filter
**/*.h5
**/*.pth

View File

@ -0,0 +1,81 @@
model:
channels: 16
num_blocks: 7
semantic_classes: 2
instance_classes: 1
sem2ins_classes: []
semantic_only: True
semantic_weight: [1.0, 1.0, 44.0, 21.9, 1.8, 25.1, 31.5, 21.8, 24.0, 54.4, 114.4,
81.2, 43.6, 9.7, 22.4] # TODO: What is this?!
with_coords: False
ignore_label: -100
grouping_cfg:
score_thr: 0.2
radius: 0.9 # TODO: depending on the scale
mean_active: 3
# TODO: Insert the computed values
class_numpoint_mean: [-1., 10408., 58., 124., 1351., 162., 430., 1090., 451., 26., 43.,
61., 39., 109., 1239]
npoint_thr: 0.05 # absolute if class_numpoint == -1, relative if class_numpoint != -1
ignore_classes: [0] # TODO: Should we add the walls here?
instance_voxel_cfg:
scale: 3 # TODO: Adjust (?)
spatial_shape: 20
train_cfg:
max_proposal_num: 200 # TODO: probably needs to be adjusted to the number of pores in one sample
pos_iou_thr: 0.5
test_cfg:
x4_split: False
cls_score_thr: 0.001
mask_score_thr: -0.5
min_npoint: 100
fixed_modules: []
data:
train:
type: 'ymir'
data_root: 'dataset/ymir/overfit_no_filter/prepared'
prefix: 'train'
suffix: '.pth'
training: True
repeat: 4
voxel_cfg:
scale: 3 # TODO: Adjust
spatial_shape: [128, 512]
max_npoint: 250000
min_npoint: 5000
test:
type: 'ymir'
data_root: 'dataset/ymir/overfit_no_filter/prepared'
prefix: 'val'
suffix: '.pth'
training: False
voxel_cfg:
scale: 3 # TODO: Adjust
spatial_shape: [128, 512]
max_npoint: 250000
min_npoint: 5000
dataloader:
train:
batch_size: 4
num_workers: 4
test:
batch_size: 1
num_workers: 1
optimizer:
type: 'Adam'
lr: 0.004
save_cfg:
semantic: True
offset: True
instance: True
fp16: False
epochs: 20
step_epoch: 20
save_freq: 4
pretrain: ''
work_dir: ''

View File

@ -0,0 +1,108 @@
from random import sample
from typing import List
import torch
import numpy as np
import glob
import h5py
import os
def convertToPointCloud(
files: List[str],
outPutFolder: str,
split: str = 'train',
samplePoints: int = 0, # no sampling
):
train_instance_numpoints = 0
train_instances = 0
for file in files:
name = os.path.basename(file).strip('.h5')
outFilePath = os.path.join(outPutFolder, name + '.pth')
# read in file
with h5py.File(file, "r") as data:
raw = np.array(data['raw'])
colors = raw.flatten() # column first
colors = np.repeat(colors[:, np.newaxis], 3, axis=1)
colors = colors.astype(np.float32)
# normalize
colors = colors / 32767.5 - 1
coords = np.mgrid[
0:1:raw.shape[0] * 1j,
0:1:raw.shape[1] * 1j,
0:1:raw.shape[2] * 1j,
].reshape(3, -1).T
coords = coords.astype(np.float32)
# sampling of points
samples = np.arange(0, coords.shape[0])
if samplePoints > 0:
samples = np.random.choice(coords.shape[0], samplePoints)
colors = colors[samples]
coords = coords[samples]
if split != 'test':
# seems a bit weird, but they used float64 fort the labels so
# let's use it as well
sem_labels = np.array(data['foreground']).flatten().astype(np.float64)
# map the background value (= 0 i.e. sugar walls) to -100
sem_labels[sem_labels == 0] = -100
# seems a bit weird, but they used float64 fort the labels so
# let's use it as well
instance_labels = np.array(data['label']).flatten().astype(np.float64)
# sampling
sem_labels = sem_labels[samples]
instance_labels = instance_labels[samples]
# keep track of the mean number of points per instance for the training dataset
# NOTE: This does only work as long as we have one type of class
if split == 'train':
values, counts = np.unique(
instance_labels, return_counts=True)
assert values[0] == 0
print(values, counts)
train_instance_numpoints += np.sum(counts[1:])
train_instances += len(counts[1:])
torch.save((coords, colors, sem_labels,
instance_labels), outFilePath)
else:
torch.save((coords, colors), outFilePath)
if split == 'train':
assert train_instances > 0
print('class_numpoints_mean: ', train_instance_numpoints / train_instances)
def getFiles(files, fileSplit):
res = []
for filePath in files:
name = os.path.basename(filePath)
num = name[:2] if name[:2].isdigit() else name[:1]
if int(num) in fileSplit:
res.append(filePath)
return res
if __name__ == '__main__':
data_folder = 'overfit_no_filter'
split = 'train'
trainFiles = sorted(glob.glob(data_folder + "/" + split + '/*.h5'))
print(trainFiles)
assert len(trainFiles) > 0
trainOutDir = split
os.makedirs(trainOutDir, exist_ok=True)
convertToPointCloud(trainFiles, trainOutDir, split, samplePoints=35145)
split = 'val'
valFiles = sorted(glob.glob(data_folder + "/" + split + '/*.h5'))
print(valFiles)
assert len(valFiles) > 0
valOutDir = split
os.makedirs(valOutDir, exist_ok=True)
convertToPointCloud(valFiles, valOutDir, split, samplePoints=35145)

View File

@ -1,4 +1,4 @@
```
``` yaml
model:
channels: 32 # number of base channel for the backbone network
num_blocks: 7 # number of backbone blocks