added ymir datasest preparation

2025-10-16 11:45:42 +00:00 · 2022-07-17 21:42:01 +02:00 · 2022-07-17 21:42:01 +02:00 · 04358926d0
commit 04358926d0
parent a87940664d
4 changed files with 195 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -80,3 +80,8 @@ dataset/s3dis/Stanford3dDataset_v1.2
 dataset/stpls3d/train
 dataset/stpls3d/val
 dataset/stpls3d/Synthetic_v3_InstanceSegmentation
 # ymir dataset files
 dataset/ymir/overfit_no_filter
 **/*.h5
 **/*.pth
--- a/configs/softgroup_ymir_backbone_overfit_no_filter.yaml
+++ b/configs/softgroup_ymir_backbone_overfit_no_filter.yaml
@ -0,0 +1,81 @@
 model:
  channels: 16
  num_blocks: 7
  semantic_classes: 2
  instance_classes: 1
  sem2ins_classes: []
  semantic_only: True
  semantic_weight: [1.0, 1.0, 44.0, 21.9, 1.8, 25.1, 31.5, 21.8, 24.0, 54.4, 114.4,
                    81.2, 43.6, 9.7, 22.4]    # TODO: What is this?!
  with_coords: False
  ignore_label: -100
  grouping_cfg:
    score_thr: 0.2
    radius: 0.9 # TODO: depending on the scale
    mean_active: 3
    # TODO: Insert the computed values
    class_numpoint_mean: [-1., 10408., 58., 124., 1351., 162., 430., 1090., 451., 26., 43.,
                          61., 39., 109., 1239]
    npoint_thr: 0.05  # absolute if class_numpoint == -1, relative if class_numpoint != -1
    ignore_classes: [0] # TODO: Should we add the walls here?
  instance_voxel_cfg:
    scale: 3 # TODO: Adjust (?)
    spatial_shape: 20
  train_cfg:
    max_proposal_num: 200 # TODO: probably needs to be adjusted to the number of pores in one sample
    pos_iou_thr: 0.5
  test_cfg:
    x4_split: False
    cls_score_thr: 0.001
    mask_score_thr: -0.5
    min_npoint: 100
  fixed_modules: []
 data:
  train:
    type: 'ymir'
    data_root: 'dataset/ymir/overfit_no_filter/prepared'
    prefix: 'train'
    suffix: '.pth'
    training: True
    repeat: 4
    voxel_cfg:
      scale: 3 # TODO: Adjust
      spatial_shape: [128, 512]
      max_npoint: 250000
      min_npoint: 5000
  test:
    type: 'ymir'
    data_root: 'dataset/ymir/overfit_no_filter/prepared'
    prefix: 'val'
    suffix: '.pth'
    training: False
    voxel_cfg:
      scale: 3 # TODO: Adjust
      spatial_shape: [128, 512]
      max_npoint: 250000
      min_npoint: 5000
 dataloader:
  train:
    batch_size: 4
    num_workers: 4
  test:
    batch_size: 1
    num_workers: 1
 optimizer:
  type: 'Adam'
  lr: 0.004
 save_cfg:
  semantic: True
  offset: True
  instance: True
 fp16: False
 epochs: 20
 step_epoch: 20
 save_freq: 4
 pretrain: ''
 work_dir: ''
--- a/dataset/ymir/prepare_data.py
+++ b/dataset/ymir/prepare_data.py
@ -0,0 +1,108 @@
 from random import sample
 from typing import List
 import torch
 import numpy as np
 import glob
 import h5py
 import os
 def convertToPointCloud(
    files: List[str],
    outPutFolder: str,
    split: str = 'train',
    samplePoints: int = 0,  # no sampling
 ):
    train_instance_numpoints = 0
    train_instances = 0
    for file in files:
        name = os.path.basename(file).strip('.h5')
        outFilePath = os.path.join(outPutFolder, name + '.pth')
        # read in file
        with h5py.File(file, "r") as data:
            raw = np.array(data['raw'])
            colors = raw.flatten()  # column first
            colors = np.repeat(colors[:, np.newaxis], 3, axis=1)
            colors = colors.astype(np.float32)
            # normalize
            colors = colors / 32767.5 - 1
            coords = np.mgrid[
                0:1:raw.shape[0] * 1j,
                0:1:raw.shape[1] * 1j,
                0:1:raw.shape[2] * 1j,
            ].reshape(3, -1).T
            coords = coords.astype(np.float32)
            # sampling of points
            samples = np.arange(0, coords.shape[0])
            if samplePoints > 0:
                samples = np.random.choice(coords.shape[0], samplePoints)
            colors = colors[samples]
            coords = coords[samples]
            if split != 'test':
                # seems a bit weird, but they used float64 fort the labels so
                # let's use it as well
                sem_labels = np.array(data['foreground']).flatten().astype(np.float64)
                # map the background value (= 0 i.e. sugar walls) to -100
                sem_labels[sem_labels == 0] = -100
                # seems a bit weird, but they used float64 fort the labels so
                # let's use it as well
                instance_labels = np.array(data['label']).flatten().astype(np.float64)
                # sampling
                sem_labels = sem_labels[samples]
                instance_labels = instance_labels[samples]
                # keep track of the mean number of points per instance for the training dataset
                # NOTE: This does only work as long as we have one type of class
                if split == 'train':
                    values, counts = np.unique(
                        instance_labels, return_counts=True)
                    assert values[0] == 0
                    print(values, counts)
                    train_instance_numpoints += np.sum(counts[1:])
                    train_instances += len(counts[1:])
                torch.save((coords, colors, sem_labels,
                            instance_labels), outFilePath)
            else:
                torch.save((coords, colors), outFilePath)
    if split == 'train':
        assert train_instances > 0
        print('class_numpoints_mean: ', train_instance_numpoints / train_instances)
 def getFiles(files, fileSplit):
    res = []
    for filePath in files:
        name = os.path.basename(filePath)
        num = name[:2] if name[:2].isdigit() else name[:1]
        if int(num) in fileSplit:
            res.append(filePath)
    return res
 if __name__ == '__main__':
    data_folder = 'overfit_no_filter'
    split = 'train'
    trainFiles = sorted(glob.glob(data_folder + "/" + split + '/*.h5'))
    print(trainFiles)
    assert len(trainFiles) > 0
    trainOutDir = split
    os.makedirs(trainOutDir, exist_ok=True)
    convertToPointCloud(trainFiles, trainOutDir, split, samplePoints=35145)
    split = 'val'
    valFiles = sorted(glob.glob(data_folder + "/" + split + '/*.h5'))
    print(valFiles)
    assert len(valFiles) > 0
    valOutDir = split
    os.makedirs(valOutDir, exist_ok=True)
    convertToPointCloud(valFiles, valOutDir, split, samplePoints=35145)
--- a/docs/config_explanation.md
+++ b/docs/config_explanation.md
@ -1,4 +1,4 @@
-```
+``` yaml
 model:
  channels: 32  # number of base channel for the backbone network
  num_blocks: 7  # number of backbone blocks