update forward test s3dis

2025-10-16 11:45:42 +00:00 · 2022-04-05 08:51:08 +00:00 · 2022-04-05 08:51:08 +00:00 · f822a70dd5
commit f822a70dd5
parent b62be6b409
10 changed files with 684 additions and 1127 deletions
--- a/.isort.cfg
+++ b/.isort.cfg
@ -0,0 +1,7 @@
+[isort]
+line_length = 100
+multi_line_output = 0
+known_standard_library = setuptools
+known_third_party = spconv,numpy,torch,torchvision
+no_lines_before = STDLIB,LOCALFOLDER
+default_section = THIRDPARTY
--- a/.style.yapf
+++ b/.style.yapf
@ -0,0 +1,5 @@
+[style]
+BASED_ON_STYLE = pep8
+BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
+SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
+COLUMN_LIMIT = 100
--- a/config/softgroup_default_scannet.yaml
+++ b/config/softgroup_default_scannet.yaml
@ -9,9 +9,11 @@ model:
  num_blocks: 7
  semantic_classes: 20
  instance_classes: 18
+  sem2ins_classes: []
  semantic_only: False
  ignore_label: -100
  grouping_cfg:
+    score_thr: 0.2
    radius: 0.04
    mean_active: 300
    class_numpoint_mean: [-1., -1., 3917., 12056., 2303.,
@ -23,6 +25,9 @@ model:
    spatial_shape: 20
  test_cfg:
    x4_split: False
+    cls_score_thr: 0.001
+    mask_score_thr: -0.5
+    min_npoint: 100
  fixed_modules: []

 data:
--- a/config/softgroup_fold5_default_s3dis.yaml
+++ b/config/softgroup_fold5_default_s3dis.yaml
@ -4,6 +4,52 @@ GENERAL:
  model_dir: model/softgroup/softgroup.py
  dataset_dir: data/scannetv2_inst.py

+model:
+  channels: 32
+  num_blocks: 7
+  semantic_classes: 13
+  instance_classes: 13
+  sem2ins_classes: [0, 1]
+  semantic_only: False
+  ignore_label: -100
+  grouping_cfg:
+    score_thr: 0.2
+    radius: 0.04
+    mean_active: 300
+    class_numpoint_mean: [1823, 7457, 6189, 7424, 34229, 1724, 5439,
+                          6016, 39796, 5279, 5092, 12210, 10225]
+  instance_voxel_cfg:
+    scale: 50
+    spatial_shape: 20
+  test_cfg:
+    x4_split: True
+    cls_score_thr: 0.001
+    mask_score_thr: -0.5
+    min_npoint: 100
+  fixed_modules: []
+
+data:
+  train:
+    data_root: 'dataset/s3dis'
+    prefix: 'val'
+    suffix: '_inst_nostuff.pth'
+    voxel_cfg:
+      scale: 50
+      spatial_shape: [128, 512]
+      max_npoint: 250000
+  test:
+    data_root: 'dataset/s3dis'
+    prefix: 'Area_5'
+    suffix: '_inst_nostuff.pth'
+    voxel_cfg:
+      scale: 50
+      spatial_shape: [128, 512]
+      max_npoint: 250000
+  data_loader:
+    batch_size: 4
+    num_workers: 4
+
+
 DATA:
  data_root: dataset
  dataset: s3dis
--- a/data/s3dis_inst.py
+++ b/data/s3dis_inst.py
@ -12,36 +12,48 @@ from lib.softgroup_ops.functions import softgroup_ops

 import torch.distributed as dist

+
 class Dataset:
-    def __init__(self, test=False):
-        self.data_root = cfg.data_root
-        self.dataset = cfg.dataset
-        self.filename_suffix = cfg.filename_suffix
+    # def __init__(self, test=False):
+    #     self.data_root = cfg.data_root
+    #     self.dataset = cfg.dataset
+    #     self.filename_suffix = cfg.filename_suffix

-        self.batch_size = cfg.batch_size
-        self.train_workers = cfg.train_workers
-        self.val_workers = cfg.train_workers
+    #     self.batch_size = cfg.batch_size
+    #     self.train_workers = cfg.train_workers
+    #     self.val_workers = cfg.train_workers

-        self.full_scale = cfg.full_scale
-        self.scale = cfg.scale
-        self.max_npoint = cfg.max_npoint
-        self.mode = cfg.mode
-        self.train_areas = cfg.train_areas
-        self.test_area = cfg.test_area
-        self.train_repeats = cfg.train_repeats
+    #     self.full_scale = cfg.full_scale
+    #     self.scale = cfg.scale
+    #     self.max_npoint = cfg.max_npoint
+    #     self.mode = cfg.mode
+    #     self.train_areas = cfg.train_areas
+    #     self.test_area = cfg.test_area
+    #     self.train_repeats = cfg.train_repeats

-        # self.train_split = getattr(cfg, 'train_split', 'train')
+    #     # self.train_split = getattr(cfg, 'train_split', 'train')

-        if test:
-            self.test_split = cfg.split  # val or test
-            self.test_workers = cfg.test_workers
-            cfg.batch_size = 1
+    #     if test:
+    #         self.test_split = cfg.split  # val or test
+    #         self.test_workers = cfg.test_workers
+    #         cfg.batch_size = 1

+    CLASSES = ("ceiling", "floor", "wall", "beam", "column", "window", "door", "chair", "table",
+               "bookcase", "sofa", "board", "clutter")
+
+    def __init__(self, data_root, prefix, suffix, voxel_cfg=None):
+        self.data_root = data_root
+        self.prefix = prefix
+        self.suffix = suffix
+        self.voxel_cfg = voxel_cfg
+        self.test_split = 'val'

    def trainLoader(self):
        train_file_names = []
        for area in self.train_areas:
-            train_file_names += glob.glob(os.path.join(self.data_root, self.dataset, 'preprocess', area + '*' + self.filename_suffix))
+            train_file_names += glob.glob(
+                os.path.join(self.data_root, self.dataset, 'preprocess',
+                             area + '*' + self.filename_suffix))
        train_file_names = sorted(train_file_names)

        self.train_files = train_file_names * self.train_repeats
@ -49,12 +61,20 @@ class Dataset:
        logger.info('Training samples: {}'.format(len(self.train_files)))

        train_set = list(range(len(self.train_files)))
-        self.train_data_loader = DataLoader(train_set, batch_size=self.batch_size, collate_fn=self.trainMerge, num_workers=self.train_workers,
-                                            shuffle=True, sampler=None, drop_last=True, pin_memory=True)
-        
+        self.train_data_loader = DataLoader(
+            train_set,
+            batch_size=self.batch_size,
+            collate_fn=self.trainMerge,
+            num_workers=self.train_workers,
+            shuffle=True,
+            sampler=None,
+            drop_last=True,
+            pin_memory=True)

    def dist_trainLoader(self):
-        train_file_names = sorted(glob.glob(os.path.join(self.data_root, self.dataset, 'train', '*' + self.filename_suffix)))
+        train_file_names = sorted(
+            glob.glob(
+                os.path.join(self.data_root, self.dataset, 'train', '*' + self.filename_suffix)))
        self.train_files = [torch.load(i) for i in train_file_names]

        logger.info('Training samples: {}'.format(len(self.train_files)))
@ -62,38 +82,58 @@ class Dataset:
        train_set = list(range(len(self.train_files)))
        # self.train_data_loader = DataLoader(train_set, batch_size=self.batch_size, collate_fn=self.trainMerge, num_workers=self.train_workers,
        #                                     shuffle=True, sampler=None, drop_last=True, pin_memory=True)
-        
+
        # world_size = dist.get_world_size()
        # rank = dist.get_rank()
        # self.data_sampler = torch.utils.data.distributed.DistributedSampler(train_set, num_replicas=world_size, rank=rank)
        self.data_sampler = torch.utils.data.distributed.DistributedSampler(train_set)

-        self.train_data_loader = DataLoader(train_set, batch_size=self.batch_size, 
-                                    collate_fn=self.trainMerge, 
-                                    num_workers=self.train_workers,
-                                    shuffle=False, sampler=self.data_sampler, 
-                                    drop_last=False, pin_memory=True)
+        self.train_data_loader = DataLoader(
+            train_set,
+            batch_size=self.batch_size,
+            collate_fn=self.trainMerge,
+            num_workers=self.train_workers,
+            shuffle=False,
+            sampler=self.data_sampler,
+            drop_last=False,
+            pin_memory=True)

    def valLoader(self):
-        val_file_names = sorted(glob.glob(os.path.join(self.data_root, self.dataset, 'preprocess_sample', self.test_area + '*' + self.filename_suffix)))
-        self.val_files = val_file_names
+        self.val_file_names = sorted(
+            glob.glob(os.path.join(self.data_root, 'preprocess', self.prefix + '*' + self.suffix)))
+        assert len(self.val_file_names) > 0
+        # self.val_files = [torch.load(i) for i in val_file_names]

-        logger.info('Validation samples: {}'.format(len(self.val_files)))
-
-        val_set = list(range(len(self.val_files)))
-        self.val_data_loader = DataLoader(val_set, batch_size=self.batch_size, collate_fn=self.valMerge, num_workers=self.val_workers,
-                                          shuffle=False, drop_last=False, pin_memory=True)
+        logger.info('Validation samples: {}'.format(len(self.val_file_names)))

+        val_set = list(range(len(self.val_file_names)))
+        self.val_data_loader = DataLoader(
+            val_set,
+            batch_size=1,
+            collate_fn=self.valMerge,
+            num_workers=1,  # TODO check num_worker
+            shuffle=False,
+            drop_last=False,
+            pin_memory=True)

    def testLoader(self):
-        self.test_file_names = sorted(glob.glob(os.path.join(self.data_root, self.dataset, 'preprocess', self.test_area + '*' + self.filename_suffix)))
+        self.test_file_names = sorted(
+            glob.glob(
+                os.path.join(self.data_root, self.dataset, 'preprocess',
+                             self.test_area + '*' + self.filename_suffix)))
        self.test_files = self.test_file_names

        logger.info('Testing samples ({}): {}'.format(self.test_split, len(self.test_files)))

        test_set = list(np.arange(len(self.test_files)))
-        self.test_data_loader = DataLoader(test_set, batch_size=1, collate_fn=self.testMerge, num_workers=0,
-                                           shuffle=False, drop_last=False, pin_memory=True)
+        self.test_data_loader = DataLoader(
+            test_set,
+            batch_size=1,
+            collate_fn=self.testMerge,
+            num_workers=0,
+            shuffle=False,
+            drop_last=False,
+            pin_memory=True)

    # Elastic distortion
    def elastic(self, x, gran, mag):
@ -101,7 +141,7 @@ class Dataset:
        blur1 = np.ones((1, 3, 1)).astype('float32') / 3
        blur2 = np.ones((1, 1, 3)).astype('float32') / 3

-        bb = np.abs(x).max(0).astype(np.int32)//gran + 3
+        bb = np.abs(x).max(0).astype(np.int32) // gran + 3
        noise = [np.random.randn(bb[0], bb[1], bb[2]).astype('float32') for _ in range(3)]
        noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
        noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
@ -109,12 +149,16 @@ class Dataset:
        noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
        noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
        noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise]
-        ax = [np.linspace(-(b-1)*gran, (b-1)*gran, b) for b in bb]
-        interp = [scipy.interpolate.RegularGridInterpolator(ax, n, bounds_error=0, fill_value=0) for n in noise]
-        def g(x_):
-            return np.hstack([i(x_)[:,None] for i in interp])
-        return x + g(x) * mag
+        ax = [np.linspace(-(b - 1) * gran, (b - 1) * gran, b) for b in bb]
+        interp = [
+            scipy.interpolate.RegularGridInterpolator(ax, n, bounds_error=0, fill_value=0)
+            for n in noise
+        ]

+        def g(x_):
+            return np.hstack([i(x_)[:, None] for i in interp])
+
+        return x + g(x) * mag

    def getInstanceInfo(self, xyz, instance_label, label):
        '''
@ -122,8 +166,10 @@ class Dataset:
        :param instance_label: (n), int, (0~nInst-1, -100)
        :return: instance_num, dict
        '''
-        instance_info = np.ones((xyz.shape[0], 9), dtype=np.float32) * -100.0   # (n, 9), float, (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
-        instance_pointnum = []   # (nInst), int
+        instance_info = np.ones(
+            (xyz.shape[0], 9), dtype=np.float32
+        ) * -100.0  # (n, 9), float, (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
+        instance_pointnum = []  # (nInst), int
        instance_cls = []
        instance_num = int(instance_label.max()) + 1
        for i_ in range(instance_num):
@ -146,9 +192,11 @@ class Dataset:
            instance_cls.append(label[cls_loc])
        # assert (0 not in instance_cls) and (1 not in instance_cls)  # sanity check stuff cls

-        return instance_num, {"instance_info": instance_info, "instance_pointnum": instance_pointnum,
-                              "instance_cls": instance_cls}
-
+        return instance_num, {
+            "instance_info": instance_info,
+            "instance_pointnum": instance_pointnum,
+            "instance_cls": instance_cls
+        }

    def dataAugment(self, xyz, jitter=False, flip=False, rot=False):
        m = np.eye(3)
@ -158,10 +206,10 @@ class Dataset:
            m[0][0] *= np.random.randint(0, 2) * 2 - 1  # flip x randomly
        if rot:
            theta = np.random.rand() * 2 * math.pi
-            m = np.matmul(m, [[math.cos(theta), math.sin(theta), 0], [-math.sin(theta), math.cos(theta), 0], [0, 0, 1]])  # rotation
+            m = np.matmul(m, [[math.cos(theta), math.sin(theta), 0],
+                              [-math.sin(theta), math.cos(theta), 0], [0, 0, 1]])  # rotation
        return np.matmul(xyz, m)

-
    # def crop(self, xyz):
    #     '''
    #     :param xyz: (n, 3) >= 0
@ -186,8 +234,7 @@ class Dataset:
        :param xyz: (n, 3) >= 0
        """
        xyz_offset = xyz.copy()
-        valid_idxs = (xyz_offset.min(1) >= 0) * (
-            (xyz < self.full_scale[1]).sum(1) == 3)
+        valid_idxs = (xyz_offset.min(1) >= 0) * ((xyz < self.full_scale[1]).sum(1) == 3)

        full_scale = np.array([self.full_scale[1]] * 3)
        room_range = xyz.max(0) - xyz.min(0)
@ -195,16 +242,13 @@ class Dataset:
            step_temp = step
            if valid_idxs.sum() > 1e6:
                step_temp = step * 2
-            offset = np.clip(full_scale - room_range + 0.001, None,
-                             0) * np.random.rand(3)
+            offset = np.clip(full_scale - room_range + 0.001, None, 0) * np.random.rand(3)
            xyz_offset = xyz + offset
-            valid_idxs = (xyz_offset.min(1) >= 0) * (
-                (xyz_offset < full_scale).sum(1) == 3)
+            valid_idxs = (xyz_offset.min(1) >= 0) * ((xyz_offset < full_scale).sum(1) == 3)
            full_scale[:2] -= step_temp

        return xyz_offset, valid_idxs

-
    def getCroppedInstLabel(self, instance_label, valid_idxs):
        instance_label = instance_label[valid_idxs]
        j = 0
@ -214,7 +258,8 @@ class Dataset:
                    instance_label[instance_label == instance_label.max()] = j
                j += 1
        except:
-            import pdb; pdb.set_trace()
+            import pdb
+            pdb.set_trace()
        return instance_label

    def trainMerge(self, id):
@ -226,7 +271,7 @@ class Dataset:

        instance_infos = []  # (N, 9)
        instance_pointnum = []  # (total_nInst), int
-        instance_cls = [] # (total_nInst), long
+        instance_cls = []  # (total_nInst), long

        batch_offsets = [0]

@ -257,7 +302,7 @@ class Dataset:

            # crop
            xyz, valid_idxs = self.crop(xyz)
-            if valid_idxs.sum() == 0: # handle some corner cases
+            if valid_idxs.sum() == 0:  # handle some corner cases
                continue

            xyz_middle = xyz_middle[valid_idxs]
@ -267,9 +312,11 @@ class Dataset:
            instance_label = self.getCroppedInstLabel(instance_label, valid_idxs)

            # get instance information
-            inst_num, inst_infos = self.getInstanceInfo(xyz_middle, instance_label.astype(np.int32), label)
-            inst_info = inst_infos["instance_info"]  # (n, 9), (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
-            inst_pointnum = inst_infos["instance_pointnum"]   # (nInst), list
+            inst_num, inst_infos = self.getInstanceInfo(xyz_middle, instance_label.astype(np.int32),
+                                                        label)
+            inst_info = inst_infos[
+                "instance_info"]  # (n, 9), (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
+            inst_pointnum = inst_infos["instance_pointnum"]  # (nInst), list
            inst_cls = inst_infos["instance_cls"]

            instance_label[np.where(instance_label != -100)] += total_inst_num
@ -278,7 +325,10 @@ class Dataset:
            # merge the scene to the batch
            batch_offsets.append(batch_offsets[-1] + xyz.shape[0])

-            locs.append(torch.cat([torch.LongTensor(xyz.shape[0], 1).fill_(i), torch.from_numpy(xyz).long()], 1))
+            locs.append(
+                torch.cat(
+                    [torch.LongTensor(xyz.shape[0], 1).fill_(i),
+                     torch.from_numpy(xyz).long()], 1))
            locs_float.append(torch.from_numpy(xyz_middle))
            feats.append(torch.from_numpy(rgb).float() + torch.randn(3) * 0.1)
            labels.append(torch.from_numpy(label))
@ -291,26 +341,40 @@ class Dataset:
        # merge all the scenes in the batchd
        batch_offsets = torch.tensor(batch_offsets, dtype=torch.int)  # int (B+1)

-        locs = torch.cat(locs, 0)                                # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
+        locs = torch.cat(locs, 0)  # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
        locs_float = torch.cat(locs_float, 0).to(torch.float32)  # float (N, 3)
-        feats = torch.cat(feats, 0)                              # float (N, C)
-        labels = torch.cat(labels, 0).long()                     # long (N)
-        instance_labels = torch.cat(instance_labels, 0).long()   # long (N)
+        feats = torch.cat(feats, 0)  # float (N, C)
+        labels = torch.cat(labels, 0).long()  # long (N)
+        instance_labels = torch.cat(instance_labels, 0).long()  # long (N)

-        instance_infos = torch.cat(instance_infos, 0).to(torch.float32)       # float (N, 9) (meanxyz, minxyz, maxxyz)
+        instance_infos = torch.cat(instance_infos,
+                                   0).to(torch.float32)  # float (N, 9) (meanxyz, minxyz, maxxyz)
        instance_pointnum = torch.tensor(instance_pointnum, dtype=torch.int)  # int (total_nInst)
-        instance_cls = torch.tensor(instance_cls, dtype=torch.long)            # long (total_nInst)
+        instance_cls = torch.tensor(instance_cls, dtype=torch.long)  # long (total_nInst)

-        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.full_scale[0], None)     # long (3)
+        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.full_scale[0],
+                                None)  # long (3)

        # voxelize
-        voxel_locs, p2v_map, v2p_map = softgroup_ops.voxelization_idx(locs, self.batch_size, self.mode)
-
-        return {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map,
-                'locs_float': locs_float, 'feats': feats, 'labels': labels, 'instance_labels': instance_labels,
-                'instance_info': instance_infos, 'instance_pointnum': instance_pointnum, 'instance_cls': instance_cls,
-                'id': id, 'offsets': batch_offsets, 'spatial_shape': spatial_shape}
+        voxel_locs, p2v_map, v2p_map = softgroup_ops.voxelization_idx(locs, self.batch_size,
+                                                                      self.mode)

+        return {
+            'locs': locs,
+            'voxel_locs': voxel_locs,
+            'p2v_map': p2v_map,
+            'v2p_map': v2p_map,
+            'locs_float': locs_float,
+            'feats': feats,
+            'labels': labels,
+            'instance_labels': instance_labels,
+            'instance_info': instance_infos,
+            'instance_pointnum': instance_pointnum,
+            'instance_cls': instance_cls,
+            'id': id,
+            'offsets': batch_offsets,
+            'spatial_shape': spatial_shape
+        }

    def valMerge(self, id):
        locs = []
@ -321,13 +385,43 @@ class Dataset:

        instance_infos = []  # (N, 9)
        instance_pointnum = []  # (total_nInst), int
-        instance_cls = [] # (total_nInst), long 
+        instance_cls = []  # (total_nInst), long

        batch_offsets = [0]

        total_inst_num = 0
        for i, idx in enumerate(id):
-            xyz_origin, rgb, label, instance_label, _, _ = torch.load(self.val_files[idx])
+            xyz_origin, rgb, label, instance_label, _, _ = torch.load(self.val_file_names[idx])
+
+            # devide into 4 piecies
+            inds = np.arange(xyz_origin.shape[0])
+            piece_1 = inds[::4]
+            piece_2 = inds[1::4]
+            piece_3 = inds[2::4]
+            piece_4 = inds[3::4]
+            xyz_origin_aug = self.dataAugment(xyz_origin, False, True, True)
+
+            for batch, piece in enumerate([piece_1, piece_2, piece_3, piece_4]):
+
+                # flip x / rotation
+                xyz_middle = xyz_origin_aug[piece]
+
+                # scale
+                xyz = xyz_middle * self.voxel_cfg.scale
+
+                # offset
+                xyz -= xyz.min(0)
+
+                # merge the scene to the batch
+                batch_offsets.append(batch_offsets[-1] + xyz.shape[0])
+
+                locs.append(
+                    torch.cat([
+                        torch.LongTensor(xyz.shape[0], 1).fill_(batch),
+                        torch.from_numpy(xyz).long()
+                    ], 1))
+                locs_float.append(torch.from_numpy(xyz_middle))
+                feats.append(torch.from_numpy(rgb[piece]).float())

            # subsample
            # N = xyz_origin.shape[0]
@ -338,29 +432,31 @@ class Dataset:
            # instance_label = self.getCroppedInstLabel(instance_label, inds)

            # flip x / rotation
-            xyz_middle = self.dataAugment(xyz_origin, False, False, False)
+            # xyz_middle = self.dataAugment(xyz_origin, False, False, False)

-            # scale
-            xyz = xyz_middle * self.scale
+            # # scale
+            # xyz = xyz_middle * self.scale

-            # offset
-            xyz -= xyz.min(0)
+            # # offset
+            # xyz -= xyz.min(0)

            # crop
            # xyz, valid_idxs = self.crop(xyz)
-            valid_idxs = np.arange(xyz.shape[0])
-            if valid_idxs.sum() == 0: # handle some corner cases
-                continue
+            valid_idxs = np.arange(xyz_origin.shape[0])
+            # if valid_idxs.sum() == 0: # handle some corner cases
+            #     continue

-            xyz_middle = xyz_middle[valid_idxs]
-            xyz = xyz[valid_idxs]
-            rgb = rgb[valid_idxs]
-            label = label[valid_idxs]
+            # xyz_middle = xyz_middle[valid_idxs]
+            # xyz = xyz[valid_idxs]
+            # rgb = rgb[valid_idxs]
+            # label = label[valid_idxs]
            instance_label = self.getCroppedInstLabel(instance_label, valid_idxs)

            # get instance information
-            inst_num, inst_infos = self.getInstanceInfo(xyz_middle, instance_label.astype(np.int32), label)
-            inst_info = inst_infos["instance_info"]  # (n, 9), (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
+            inst_num, inst_infos = self.getInstanceInfo(xyz_origin, instance_label.astype(np.int32),
+                                                        label)
+            inst_info = inst_infos[
+                "instance_info"]  # (n, 9), (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
            inst_pointnum = inst_infos["instance_pointnum"]  # (nInst), list
            inst_cls = inst_infos["instance_cls"]

@ -368,11 +464,11 @@ class Dataset:
            total_inst_num += inst_num

            # merge the scene to the batch
-            batch_offsets.append(batch_offsets[-1] + xyz.shape[0])
+            # batch_offsets.append(batch_offsets[-1] + xyz.shape[0])

-            locs.append(torch.cat([torch.LongTensor(xyz.shape[0], 1).fill_(i), torch.from_numpy(xyz).long()], 1))
-            locs_float.append(torch.from_numpy(xyz_middle))
-            feats.append(torch.from_numpy(rgb))
+            # locs.append(torch.cat([torch.LongTensor(xyz.shape[0], 1).fill_(i), torch.from_numpy(xyz).long()], 1))
+            # locs_float.append(torch.from_numpy(xyz_middle))
+            # feats.append(torch.from_numpy(rgb))
            labels.append(torch.from_numpy(label))
            instance_labels.append(torch.from_numpy(instance_label))

@ -383,33 +479,46 @@ class Dataset:
        # merge all the scenes in the batch
        batch_offsets = torch.tensor(batch_offsets, dtype=torch.int)  # int (B+1)

-        locs = torch.cat(locs, 0)                                  # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
-        locs_float = torch.cat(locs_float, 0).to(torch.float32)    # float (N, 3)
-        feats = torch.cat(feats, 0).float()                                # float (N, C)
-        labels = torch.cat(labels, 0).long()                       # long (N)
-        instance_labels = torch.cat(instance_labels, 0).long()     # long (N)
+        locs = torch.cat(locs, 0)  # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
+        locs_float = torch.cat(locs_float, 0).to(torch.float32)  # float (N, 3)
+        feats = torch.cat(feats, 0).float()  # float (N, C)
+        labels = torch.cat(labels, 0).long()  # long (N)
+        instance_labels = torch.cat(instance_labels, 0).long()  # long (N)

-        instance_infos = torch.cat(instance_infos, 0).to(torch.float32)               # float (N, 9) (meanxyz, minxyz, maxxyz)
-        instance_pointnum = torch.tensor(instance_pointnum, dtype=torch.int)          # int (total_nInst)
-        instance_cls = torch.tensor(instance_cls, dtype=torch.long)            # long (total_nInst)
+        instance_infos = torch.cat(instance_infos,
+                                   0).to(torch.float32)  # float (N, 9) (meanxyz, minxyz, maxxyz)
+        instance_pointnum = torch.tensor(instance_pointnum, dtype=torch.int)  # int (total_nInst)
+        instance_cls = torch.tensor(instance_cls, dtype=torch.long)  # long (total_nInst)

-        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.full_scale[0], None)  # long (3)
+        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.voxel_cfg.spatial_shape[0],
+                                None)  # long (3)

        # voxelize
-        voxel_locs, p2v_map, v2p_map = softgroup_ops.voxelization_idx(locs, self.batch_size, self.mode)
-
-        return {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map,
-                'locs_float': locs_float, 'feats': feats, 'labels': labels, 'instance_labels': instance_labels,
-                'instance_info': instance_infos, 'instance_pointnum': instance_pointnum, 'instance_cls': instance_cls,
-                'id': id, 'offsets': batch_offsets, 'spatial_shape': spatial_shape}
+        voxel_locs, p2v_map, v2p_map = softgroup_ops.voxelization_idx(locs, 4)

+        return {
+            'locs': locs,
+            'voxel_locs': voxel_locs,
+            'p2v_map': p2v_map,
+            'v2p_map': v2p_map,
+            'locs_float': locs_float,
+            'feats': feats,
+            'labels': labels,
+            'instance_labels': instance_labels,
+            'instance_info': instance_infos,
+            'instance_pointnum': instance_pointnum,
+            'instance_cls': instance_cls,
+            'id': id,
+            'offsets': batch_offsets,
+            'spatial_shape': spatial_shape
+        }

    def testMerge(self, id):
        locs = []
        locs_float = []
        feats = []

-        labels = []#
+        labels = []  #

        batch_offsets = [0]
        for i, idx in enumerate(id):
@ -444,7 +553,11 @@ class Dataset:
                # merge the scene to the batch
                batch_offsets.append(batch_offsets[-1] + xyz.shape[0])

-                locs.append(torch.cat([torch.LongTensor(xyz.shape[0], 1).fill_(batch), torch.from_numpy(xyz).long()], 1))
+                locs.append(
+                    torch.cat([
+                        torch.LongTensor(xyz.shape[0], 1).fill_(batch),
+                        torch.from_numpy(xyz).long()
+                    ], 1))
                locs_float.append(torch.from_numpy(xyz_middle))
                feats.append(torch.from_numpy(rgb[piece]).float())

@ -458,25 +571,41 @@ class Dataset:
        # merge all the scenes in the batch
        batch_offsets = torch.tensor(batch_offsets, dtype=torch.int)  # int (B+1)

-        locs = torch.cat(locs, 0)                                         # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
-        locs_float = torch.cat(locs_float, 0).to(torch.float32)           # float (N, 3)
-        feats = torch.cat(feats, 0)                                       # float (N, C)
+        locs = torch.cat(locs, 0)  # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
+        locs_float = torch.cat(locs_float, 0).to(torch.float32)  # float (N, 3)
+        feats = torch.cat(feats, 0)  # float (N, C)

-        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.full_scale[0], None)  # long (3)
+        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.full_scale[0],
+                                None)  # long (3)

        # voxelize
        voxel_locs, p2v_map, v2p_map = softgroup_ops.voxelization_idx(locs, 4, self.mode)

        if self.test_split == 'val':
-            return {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map,
-                    'locs_float': locs_float, 'feats': feats,
-                    'id': id, 'offsets': batch_offsets, 'spatial_shape': spatial_shape,
-                    'labels': labels}
-        
+            return {
+                'locs': locs,
+                'voxel_locs': voxel_locs,
+                'p2v_map': p2v_map,
+                'v2p_map': v2p_map,
+                'locs_float': locs_float,
+                'feats': feats,
+                'id': id,
+                'offsets': batch_offsets,
+                'spatial_shape': spatial_shape,
+                'labels': labels
+            }
+
        elif self.test_split == 'test':
-            return {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map,
-                    'locs_float': locs_float, 'feats': feats,
-                    'id': id, 'offsets': batch_offsets, 'spatial_shape': spatial_shape} 
+            return {
+                'locs': locs,
+                'voxel_locs': voxel_locs,
+                'p2v_map': p2v_map,
+                'v2p_map': v2p_map,
+                'locs_float': locs_float,
+                'feats': feats,
+                'id': id,
+                'offsets': batch_offsets,
+                'spatial_shape': spatial_shape
+            }
        else:
            assert Exception
-
--- a/data/scannetv2_inst.py
+++ b/data/scannetv2_inst.py
@ -11,7 +11,13 @@ from lib.softgroup_ops.functions import softgroup_ops

 import torch.distributed as dist

+
 class Dataset:
+
+    CLASSES = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture',
+               'counter', 'desk', 'curtain', 'refrigerator', 'shower curtain', 'toilet', 'sink',
+               'bathtub', 'otherfurniture')
+
    def __init__(self, data_root, prefix, suffix, voxel_cfg=None):
        self.data_root = data_root
        self.prefix = prefix
@ -37,11 +43,13 @@ class Dataset:
        #     self.test_workers = cfg.test_workers
        #     cfg.batch_size = 1

-
    def trainLoader(self):
        train_file_names = []
        for path in self.paths:
-            file_names = sorted(glob.glob(os.path.join(self.data_root, self.dataset, 'train', '*' + self.filename_suffix)))
+            file_names = sorted(
+                glob.glob(
+                    os.path.join(self.data_root, self.dataset, 'train',
+                                 '*' + self.filename_suffix)))
        assert len(train_file_names) > 0

        self.train_files = [torch.load(i) for i in train_file_names]
@ -49,12 +57,20 @@ class Dataset:
        logger.info('Training samples: {}'.format(len(self.train_files)))

        train_set = list(range(len(self.train_files)))
-        self.train_data_loader = DataLoader(train_set, batch_size=self.batch_size, collate_fn=self.trainMerge, num_workers=self.train_workers,
-                                            shuffle=True, sampler=None, drop_last=True, pin_memory=True)
-        
+        self.train_data_loader = DataLoader(
+            train_set,
+            batch_size=self.batch_size,
+            collate_fn=self.trainMerge,
+            num_workers=self.train_workers,
+            shuffle=True,
+            sampler=None,
+            drop_last=True,
+            pin_memory=True)

    def dist_trainLoader(self):
-        train_file_names = sorted(glob.glob(os.path.join(self.data_root, self.dataset, 'train', '*' + self.filename_suffix)))
+        train_file_names = sorted(
+            glob.glob(
+                os.path.join(self.data_root, self.dataset, 'train', '*' + self.filename_suffix)))
        self.train_files = [torch.load(i) for i in train_file_names]

        logger.info('Training samples: {}'.format(len(self.train_files)))
@ -62,42 +78,57 @@ class Dataset:
        train_set = list(range(len(self.train_files)))
        # self.train_data_loader = DataLoader(train_set, batch_size=self.batch_size, collate_fn=self.trainMerge, num_workers=self.train_workers,
        #                                     shuffle=True, sampler=None, drop_last=True, pin_memory=True)
-        
+
        # world_size = dist.get_world_size()
        # rank = dist.get_rank()
        # self.data_sampler = torch.utils.data.distributed.DistributedSampler(train_set, num_replicas=world_size, rank=rank)
        self.data_sampler = torch.utils.data.distributed.DistributedSampler(train_set)

-        self.train_data_loader = DataLoader(train_set, batch_size=self.batch_size, 
-                                    collate_fn=self.trainMerge, 
-                                    num_workers=self.train_workers,
-                                    shuffle=False, sampler=self.data_sampler, 
-                                    drop_last=False, pin_memory=True)
-        
-
+        self.train_data_loader = DataLoader(
+            train_set,
+            batch_size=self.batch_size,
+            collate_fn=self.trainMerge,
+            num_workers=self.train_workers,
+            shuffle=False,
+            sampler=self.data_sampler,
+            drop_last=False,
+            pin_memory=True)

    def valLoader(self):
-        self.val_file_names = sorted(glob.glob(os.path.join(self.data_root, self.prefix, '*' + self.suffix)))
+        self.val_file_names = sorted(
+            glob.glob(os.path.join(self.data_root, self.prefix, '*' + self.suffix)))
        assert len(self.val_file_names) > 0
        # self.val_files = [torch.load(i) for i in val_file_names]

        logger.info('Validation samples: {}'.format(len(self.val_file_names)))

        val_set = list(range(len(self.val_file_names)))
-        self.val_data_loader = DataLoader(val_set, batch_size=1, collate_fn=self.valMerge, num_workers=16,
-                                          shuffle=False, drop_last=False, pin_memory=True)
-
+        self.val_data_loader = DataLoader(
+            val_set,
+            batch_size=1,
+            collate_fn=self.valMerge,
+            num_workers=16,
+            shuffle=False,
+            drop_last=False,
+            pin_memory=True)

    def testLoader(self):
-        self.test_file_names = sorted(glob.glob(os.path.join(self.data_root, self.prefix, '*' + self.suffix)))
+        self.test_file_names = sorted(
+            glob.glob(os.path.join(self.data_root, self.prefix, '*' + self.suffix)))
        assert len(self.test_file_names) > 0
        # self.test_files = [torch.load(i) for i in self.test_file_names]

        logger.info('Testing samples {}'.format(len(self.test_file_names)))

        test_set = list(np.arange(len(self.test_file_names)))
-        self.test_data_loader = DataLoader(test_set, batch_size=1, collate_fn=self.testMerge, num_workers=1,
-                                           shuffle=False, drop_last=False, pin_memory=True)
+        self.test_data_loader = DataLoader(
+            test_set,
+            batch_size=1,
+            collate_fn=self.testMerge,
+            num_workers=1,
+            shuffle=False,
+            drop_last=False,
+            pin_memory=True)

    # Elastic distortion
    def elastic(self, x, gran, mag):
@ -105,7 +136,7 @@ class Dataset:
        blur1 = np.ones((1, 3, 1)).astype('float32') / 3
        blur2 = np.ones((1, 1, 3)).astype('float32') / 3

-        bb = np.abs(x).max(0).astype(np.int32)//gran + 3
+        bb = np.abs(x).max(0).astype(np.int32) // gran + 3
        noise = [np.random.randn(bb[0], bb[1], bb[2]).astype('float32') for _ in range(3)]
        noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
        noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
@ -113,12 +144,16 @@ class Dataset:
        noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
        noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
        noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise]
-        ax = [np.linspace(-(b-1)*gran, (b-1)*gran, b) for b in bb]
-        interp = [scipy.interpolate.RegularGridInterpolator(ax, n, bounds_error=0, fill_value=0) for n in noise]
-        def g(x_):
-            return np.hstack([i(x_)[:,None] for i in interp])
-        return x + g(x) * mag
+        ax = [np.linspace(-(b - 1) * gran, (b - 1) * gran, b) for b in bb]
+        interp = [
+            scipy.interpolate.RegularGridInterpolator(ax, n, bounds_error=0, fill_value=0)
+            for n in noise
+        ]

+        def g(x_):
+            return np.hstack([i(x_)[:, None] for i in interp])
+
+        return x + g(x) * mag

    def getInstanceInfo(self, xyz, instance_label, label):
        '''
@ -126,8 +161,10 @@ class Dataset:
        :param instance_label: (n), int, (0~nInst-1, -100)
        :return: instance_num, dict
        '''
-        instance_info = np.ones((xyz.shape[0], 9), dtype=np.float32) * -100.0   # (n, 9), float, (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
-        instance_pointnum = []   # (nInst), int
+        instance_info = np.ones(
+            (xyz.shape[0], 9), dtype=np.float32
+        ) * -100.0  # (n, 9), float, (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
+        instance_pointnum = []  # (nInst), int
        instance_cls = []
        instance_num = int(instance_label.max()) + 1
        for i_ in range(instance_num):
@ -149,11 +186,13 @@ class Dataset:
            cls_loc = inst_idx_i[0][0]

            # ignore 2 first classes (floor, ceil)
-            cls = label[cls_loc] - 2 if label[cls_loc] != -100 else label[cls_loc] 
-            instance_cls.append(cls)  
-        return instance_num, {"instance_info": instance_info, "instance_pointnum": instance_pointnum,
-                              "instance_cls": instance_cls}
-
+            cls = label[cls_loc] - 2 if label[cls_loc] != -100 else label[cls_loc]
+            instance_cls.append(cls)
+        return instance_num, {
+            "instance_info": instance_info,
+            "instance_pointnum": instance_pointnum,
+            "instance_cls": instance_cls
+        }

    def dataAugment(self, xyz, jitter=False, flip=False, rot=False):
        m = np.eye(3)
@ -163,10 +202,10 @@ class Dataset:
            m[0][0] *= np.random.randint(0, 2) * 2 - 1  # flip x randomly
        if rot:
            theta = np.random.rand() * 2 * math.pi
-            m = np.matmul(m, [[math.cos(theta), math.sin(theta), 0], [-math.sin(theta), math.cos(theta), 0], [0, 0, 1]])  # rotation
+            m = np.matmul(m, [[math.cos(theta), math.sin(theta), 0],
+                              [-math.sin(theta), math.cos(theta), 0], [0, 0, 1]])  # rotation
        return np.matmul(xyz, m)

-
    def crop(self, xyz):
        '''
        :param xyz: (n, 3) >= 0
@ -185,7 +224,6 @@ class Dataset:

        return xyz_offset, valid_idxs

-
    def getCroppedInstLabel(self, instance_label, valid_idxs):
        instance_label = instance_label[valid_idxs]
        j = 0
@ -195,7 +233,6 @@ class Dataset:
            j += 1
        return instance_label

-
    def trainMerge(self, id):
        locs = []
        locs_float = []
@ -205,7 +242,7 @@ class Dataset:

        instance_infos = []  # (N, 9)
        instance_pointnum = []  # (total_nInst), int
-        instance_cls = [] # (total_nInst), long
+        instance_cls = []  # (total_nInst), long

        batch_offsets = [0]

@ -213,7 +250,6 @@ class Dataset:
        for i, idx in enumerate(id):
            xyz_origin, rgb, label, instance_label = self.train_files[idx]

-
            # jitter / flip x / rotation
            xyz_middle = self.dataAugment(xyz_origin, True, True, True)

@ -237,9 +273,11 @@ class Dataset:
            instance_label = self.getCroppedInstLabel(instance_label, valid_idxs)

            # get instance information
-            inst_num, inst_infos = self.getInstanceInfo(xyz_middle, instance_label.astype(np.int32), label)
-            inst_info = inst_infos["instance_info"]  # (n, 9), (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
-            inst_pointnum = inst_infos["instance_pointnum"]   # (nInst), list
+            inst_num, inst_infos = self.getInstanceInfo(xyz_middle, instance_label.astype(np.int32),
+                                                        label)
+            inst_info = inst_infos[
+                "instance_info"]  # (n, 9), (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
+            inst_pointnum = inst_infos["instance_pointnum"]  # (nInst), list
            inst_cls = inst_infos["instance_cls"]

            instance_label[np.where(instance_label != -100)] += total_inst_num
@ -248,7 +286,10 @@ class Dataset:
            # merge the scene to the batch
            batch_offsets.append(batch_offsets[-1] + xyz.shape[0])

-            locs.append(torch.cat([torch.LongTensor(xyz.shape[0], 1).fill_(i), torch.from_numpy(xyz).long()], 1))
+            locs.append(
+                torch.cat(
+                    [torch.LongTensor(xyz.shape[0], 1).fill_(i),
+                     torch.from_numpy(xyz).long()], 1))
            locs_float.append(torch.from_numpy(xyz_middle))
            feats.append(torch.from_numpy(rgb) + torch.randn(3) * 0.1)
            labels.append(torch.from_numpy(label))
@ -261,26 +302,40 @@ class Dataset:
        # merge all the scenes in the batchd
        batch_offsets = torch.tensor(batch_offsets, dtype=torch.int)  # int (B+1)

-        locs = torch.cat(locs, 0)                                # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
+        locs = torch.cat(locs, 0)  # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
        locs_float = torch.cat(locs_float, 0).to(torch.float32)  # float (N, 3)
-        feats = torch.cat(feats, 0)                              # float (N, C)
-        labels = torch.cat(labels, 0).long()                     # long (N)
-        instance_labels = torch.cat(instance_labels, 0).long()   # long (N)
+        feats = torch.cat(feats, 0)  # float (N, C)
+        labels = torch.cat(labels, 0).long()  # long (N)
+        instance_labels = torch.cat(instance_labels, 0).long()  # long (N)

-        instance_infos = torch.cat(instance_infos, 0).to(torch.float32)       # float (N, 9) (meanxyz, minxyz, maxxyz)
+        instance_infos = torch.cat(instance_infos,
+                                   0).to(torch.float32)  # float (N, 9) (meanxyz, minxyz, maxxyz)
        instance_pointnum = torch.tensor(instance_pointnum, dtype=torch.int)  # int (total_nInst)
-        instance_cls = torch.tensor(instance_cls, dtype=torch.long)            # long (total_nInst)
+        instance_cls = torch.tensor(instance_cls, dtype=torch.long)  # long (total_nInst)

-        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.spatial_shape[0], None)     # long (3)
+        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.spatial_shape[0],
+                                None)  # long (3)

        # voxelize
-        voxel_locs, p2v_map, v2p_map = softgroup_ops.voxelization_idx(locs, self.batch_size, self.mode)
-
-        return {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map,
-                'locs_float': locs_float, 'feats': feats, 'labels': labels, 'instance_labels': instance_labels,
-                'instance_info': instance_infos, 'instance_pointnum': instance_pointnum, 'instance_cls': instance_cls,
-                'id': id, 'offsets': batch_offsets, 'spatial_shape': spatial_shape}
+        voxel_locs, p2v_map, v2p_map = softgroup_ops.voxelization_idx(locs, self.batch_size,
+                                                                      self.mode)

+        return {
+            'locs': locs,
+            'voxel_locs': voxel_locs,
+            'p2v_map': p2v_map,
+            'v2p_map': v2p_map,
+            'locs_float': locs_float,
+            'feats': feats,
+            'labels': labels,
+            'instance_labels': instance_labels,
+            'instance_info': instance_infos,
+            'instance_pointnum': instance_pointnum,
+            'instance_cls': instance_cls,
+            'id': id,
+            'offsets': batch_offsets,
+            'spatial_shape': spatial_shape
+        }

    def valMerge(self, id):
        locs = []
@ -291,7 +346,7 @@ class Dataset:

        instance_infos = []  # (N, 9)
        instance_pointnum = []  # (total_nInst), int
-        instance_cls = [] # (total_nInst), long 
+        instance_cls = []  # (total_nInst), long

        batch_offsets = [0]

@ -319,8 +374,10 @@ class Dataset:
            instance_label = self.getCroppedInstLabel(instance_label, valid_idxs)

            # get instance information
-            inst_num, inst_infos = self.getInstanceInfo(xyz_middle, instance_label.astype(np.int32), label)
-            inst_info = inst_infos["instance_info"]  # (n, 9), (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
+            inst_num, inst_infos = self.getInstanceInfo(xyz_middle, instance_label.astype(np.int32),
+                                                        label)
+            inst_info = inst_infos[
+                "instance_info"]  # (n, 9), (cx, cy, cz, minx, miny, minz, maxx, maxy, maxz)
            inst_pointnum = inst_infos["instance_pointnum"]  # (nInst), list
            inst_cls = inst_infos["instance_cls"]

@ -330,7 +387,10 @@ class Dataset:
            # merge the scene to the batch
            batch_offsets.append(batch_offsets[-1] + xyz.shape[0])

-            locs.append(torch.cat([torch.LongTensor(xyz.shape[0], 1).fill_(i), torch.from_numpy(xyz).long()], 1))
+            locs.append(
+                torch.cat(
+                    [torch.LongTensor(xyz.shape[0], 1).fill_(i),
+                     torch.from_numpy(xyz).long()], 1))
            locs_float.append(torch.from_numpy(xyz_middle))
            feats.append(torch.from_numpy(rgb))
            labels.append(torch.from_numpy(label))
@ -343,33 +403,46 @@ class Dataset:
        # merge all the scenes in the batch
        batch_offsets = torch.tensor(batch_offsets, dtype=torch.int)  # int (B+1)

-        locs = torch.cat(locs, 0)                                  # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
-        locs_float = torch.cat(locs_float, 0).to(torch.float32)    # float (N, 3)
-        feats = torch.cat(feats, 0)                                # float (N, C)
-        labels = torch.cat(labels, 0).long()                       # long (N)
-        instance_labels = torch.cat(instance_labels, 0).long()     # long (N)
+        locs = torch.cat(locs, 0)  # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
+        locs_float = torch.cat(locs_float, 0).to(torch.float32)  # float (N, 3)
+        feats = torch.cat(feats, 0)  # float (N, C)
+        labels = torch.cat(labels, 0).long()  # long (N)
+        instance_labels = torch.cat(instance_labels, 0).long()  # long (N)

-        instance_infos = torch.cat(instance_infos, 0).to(torch.float32)               # float (N, 9) (meanxyz, minxyz, maxxyz)
-        instance_pointnum = torch.tensor(instance_pointnum, dtype=torch.int)          # int (total_nInst)
-        instance_cls = torch.tensor(instance_cls, dtype=torch.long)            # long (total_nInst)
+        instance_infos = torch.cat(instance_infos,
+                                   0).to(torch.float32)  # float (N, 9) (meanxyz, minxyz, maxxyz)
+        instance_pointnum = torch.tensor(instance_pointnum, dtype=torch.int)  # int (total_nInst)
+        instance_cls = torch.tensor(instance_cls, dtype=torch.long)  # long (total_nInst)

-        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.voxel_cfg.spatial_shape[0], None)  # long (3)
+        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.voxel_cfg.spatial_shape[0],
+                                None)  # long (3)

        # voxelize
        voxel_locs, p2v_map, v2p_map = softgroup_ops.voxelization_idx(locs, 1)

-        return {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map,
-                'locs_float': locs_float, 'feats': feats, 'labels': labels, 'instance_labels': instance_labels,
-                'instance_info': instance_infos, 'instance_pointnum': instance_pointnum, 'instance_cls': instance_cls,
-                'id': id, 'offsets': batch_offsets, 'spatial_shape': spatial_shape}
-
+        return {
+            'locs': locs,
+            'voxel_locs': voxel_locs,
+            'p2v_map': p2v_map,
+            'v2p_map': v2p_map,
+            'locs_float': locs_float,
+            'feats': feats,
+            'labels': labels,
+            'instance_labels': instance_labels,
+            'instance_info': instance_infos,
+            'instance_pointnum': instance_pointnum,
+            'instance_cls': instance_cls,
+            'id': id,
+            'offsets': batch_offsets,
+            'spatial_shape': spatial_shape
+        }

    def testMerge(self, id):
        locs = []
        locs_float = []
        feats = []

-        labels = []#
+        labels = []  #

        batch_offsets = [0]
        for i, idx in enumerate(id):
@ -394,7 +467,10 @@ class Dataset:
            # merge the scene to the batch
            batch_offsets.append(batch_offsets[-1] + xyz.shape[0])

-            locs.append(torch.cat([torch.LongTensor(xyz.shape[0], 1).fill_(i), torch.from_numpy(xyz).long()], 1))
+            locs.append(
+                torch.cat(
+                    [torch.LongTensor(xyz.shape[0], 1).fill_(i),
+                     torch.from_numpy(xyz).long()], 1))
            locs_float.append(torch.from_numpy(xyz_middle))
            feats.append(torch.from_numpy(rgb))

@ -402,30 +478,46 @@ class Dataset:
                labels.append(torch.from_numpy(label))

        if self.test_split == 'val':
-            labels = torch.cat(labels, 0).long()                     # long (N)
+            labels = torch.cat(labels, 0).long()  # long (N)

        # merge all the scenes in the batch
        batch_offsets = torch.tensor(batch_offsets, dtype=torch.int)  # int (B+1)

-        locs = torch.cat(locs, 0)                                         # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
-        locs_float = torch.cat(locs_float, 0).to(torch.float32)           # float (N, 3)
-        feats = torch.cat(feats, 0)                                       # float (N, C)
+        locs = torch.cat(locs, 0)  # long (N, 1 + 3), the batch item idx is put in locs[:, 0]
+        locs_float = torch.cat(locs_float, 0).to(torch.float32)  # float (N, 3)
+        feats = torch.cat(feats, 0)  # float (N, C)

-        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.voxel_cfg.spatial_shape[0], None)  # long (3)
+        spatial_shape = np.clip((locs.max(0)[0][1:] + 1).numpy(), self.voxel_cfg.spatial_shape[0],
+                                None)  # long (3)

        # voxelize
        voxel_locs, p2v_map, v2p_map = softgroup_ops.voxelization_idx(locs, 1)  # TODO

        if self.test_split == 'val':
-            return {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map,
-                    'locs_float': locs_float, 'feats': feats,
-                    'id': id, 'offsets': batch_offsets, 'spatial_shape': spatial_shape,
-                    'labels': labels}
-        
+            return {
+                'locs': locs,
+                'voxel_locs': voxel_locs,
+                'p2v_map': p2v_map,
+                'v2p_map': v2p_map,
+                'locs_float': locs_float,
+                'feats': feats,
+                'id': id,
+                'offsets': batch_offsets,
+                'spatial_shape': spatial_shape,
+                'labels': labels
+            }
+
        elif self.test_split == 'test':
-            return {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map,
-                    'locs_float': locs_float, 'feats': feats,
-                    'id': id, 'offsets': batch_offsets, 'spatial_shape': spatial_shape} 
+            return {
+                'locs': locs,
+                'voxel_locs': voxel_locs,
+                'p2v_map': p2v_map,
+                'v2p_map': v2p_map,
+                'locs_float': locs_float,
+                'feats': feats,
+                'id': id,
+                'offsets': batch_offsets,
+                'spatial_shape': spatial_shape
+            }
        else:
            assert Exception
-
--- a/evaluation/evaluate_semantic_instance.py
+++ b/evaluation/evaluate_semantic_instance.py
@ -11,37 +11,15 @@ from .util_3d import get_instances

 class ScanNetEval(object):

-    valid_class_labels = [
-        'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
-        'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
-        'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'
-    ]
-    valid_class_ids = np.array(
-        [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39])
-    valid_class_ids = np.arange(18) + 1
-    # valid_class_labels = [
-    #     "ceiling",
-    #     "floor",
-    #     "wall",
-    #     "beam",
-    #     "column",
-    #     "window",
-    #     "door",
-    #     "chair",
-    #     "table",
-    #     "bookcase",
-    #     "sofa",
-    #     "board",
-    #     "clutter"
-    # ]
-    # valid_class_ids = np.arange(13) + 1
-    id2label = {}
-    label2id = {}
-    for i in range(len(valid_class_ids)):
-        label2id[valid_class_labels[i]] = valid_class_ids[i]
-        id2label[valid_class_ids[i]] = valid_class_labels[i]
+    def __init__(self, class_labels, iou_type=None, use_label=True):
+        self.valid_class_labels = class_labels
+        self.valid_class_ids = np.arange(len(class_labels)) + 1
+        self.id2label = {}
+        self.label2id = {}
+        for i in range(len(self.valid_class_ids)):
+            self.label2id[self.valid_class_labels[i]] = self.valid_class_ids[i]
+            self.id2label[self.valid_class_ids[i]] = self.valid_class_labels[i]

-    def __init__(self, iou_type=None, use_label=True):
        self.ious = np.append(np.arange(0.5, 0.95, 0.05), 0.25)
        self.min_region_sizes = np.array([100])
        self.distance_threshes = np.array([float('inf')])
--- a/model/softgroup.py
+++ b/model/softgroup.py
@ -1,25 +1,26 @@
+import functools
+import spconv
+import sys
 import torch
 import torch.nn as nn
-import spconv
-from spconv.modules import SparseModule
-import functools
-from collections import OrderedDict
-import sys
+
+from util import utils
+from .blocks import ResidualBlock, UBlock
+
 sys.path.append('../../')

-from lib.softgroup_ops.functions import softgroup_ops
-from util import utils
-import torch.nn.functional as F
-from .blocks import ResidualBlock, UBlock
+from lib.softgroup_ops.functions import softgroup_ops  # noqa


 class SoftGroup(nn.Module):
+
    def __init__(self,
                 channels=32,
                 num_blocks=7,
                 semantic_only=False,
                 semantic_classes=20,
                 instance_classes=18,
+                 sem2ins_classes=[],
                 ignore_label=-100,
                 grouping_cfg=None,
                 instance_voxel_cfg=None,
@ -32,6 +33,7 @@ class SoftGroup(nn.Module):
        self.semantic_only = semantic_only
        self.semantic_classes = semantic_classes
        self.instance_classes = instance_classes
+        self.sem2ins_classes = sem2ins_classes
        self.ignore_label = ignore_label
        self.grouping_cfg = grouping_cfg
        self.instance_voxel_cfg = instance_voxel_cfg
@ -51,42 +53,28 @@ class SoftGroup(nn.Module):

        # backbone
        self.input_conv = spconv.SparseSequential(
-            spconv.SubMConv3d(6, channels, kernel_size=3, padding=1, bias=False, indice_key='subm1')
-        )
+            spconv.SubMConv3d(
+                6, channels, kernel_size=3, padding=1, bias=False, indice_key='subm1'))
        block_channels = [channels * (i + 1) for i in range(num_blocks)]
        self.unet = UBlock(block_channels, norm_fn, 2, block, indice_key_id=1)
-        self.output_layer = spconv.SparseSequential(
-            norm_fn(channels),
-            nn.ReLU()
-        )
+        self.output_layer = spconv.SparseSequential(norm_fn(channels), nn.ReLU())

        # semantic segmentation branch
        self.semantic_linear = nn.Sequential(
-            nn.Linear(channels, channels, bias=True),
-            norm_fn(channels),
-            nn.ReLU(),
-            nn.Linear(channels, semantic_classes)
-        )
+            nn.Linear(channels, channels, bias=True), norm_fn(channels), nn.ReLU(),
+            nn.Linear(channels, semantic_classes))

        # center shift vector branch
        self.offset_linear = nn.Sequential(
-            nn.Linear(channels, channels, bias=True),
-            norm_fn(channels),
-            nn.ReLU(),
-            nn.Linear(channels, 3, bias=True)
-        )
+            nn.Linear(channels, channels, bias=True), norm_fn(channels), nn.ReLU(),
+            nn.Linear(channels, 3, bias=True))

        # topdown refinement path
-        self.intra_ins_unet = UBlock([channels, 2*channels], norm_fn, 2, block, indice_key_id=11)
-        self.intra_ins_outputlayer = spconv.SparseSequential(
-            norm_fn(channels),
-            nn.ReLU()
-        )
+        self.intra_ins_unet = UBlock([channels, 2 * channels], norm_fn, 2, block, indice_key_id=11)
+        self.intra_ins_outputlayer = spconv.SparseSequential(norm_fn(channels), nn.ReLU())
        self.cls_linear = nn.Linear(channels, instance_classes + 1)
        self.mask_linear = nn.Sequential(
-                nn.Linear(channels, channels),
-                nn.ReLU(),
-                nn.Linear(channels, instance_classes + 1))
+            nn.Linear(channels, channels), nn.ReLU(), nn.Linear(channels, instance_classes + 1))
        self.score_linear = nn.Linear(channels, instance_classes + 1)

        self.apply(self.set_bn_init)
@ -106,151 +94,51 @@ class SoftGroup(nn.Module):
            m.weight.data.fill_(1.0)
            m.bias.data.fill_(0.0)

-
    def forward(self, batch, return_loss=False):
        if return_loss:
            return self.forward_train(batch)
        else:
            return self.forward_test(batch)

-
    def forward_test(self, batch):
-        coords = batch['locs'].cuda()                          # (N, 1 + 3), long, cuda, dimension 0 for batch_idx
-        voxel_coords = batch['voxel_locs'].cuda()              # (M, 1 + 3), long, cuda
-        p2v_map = batch['p2v_map'].cuda()                      # (N), int, cuda
-        v2p_map = batch['v2p_map'].cuda()                      # (M, 1 + maxActive), int, cuda
-
-        coords_float = batch['locs_float'].cuda()              # (N, 3), float32, cuda
-        feats = batch['feats'].cuda()                          # (N, C), float32, cuda
-        labels = batch['labels'].cuda()                        # (N), long, cuda
-        instance_labels = batch['instance_labels'].cuda()      # (N), long, cuda, 0~total_nInst, -100
-
-        instance_info = batch['instance_info'].cuda()          # (N, 9), float32, cuda, (meanxyz, minxyz, maxxyz)
-        instance_pointnum = batch['instance_pointnum'].cuda()  # (total_nInst), long, cuda
-        instance_cls = batch['instance_cls'].cuda()            # (total_nInst), int, cuda
-        batch_offsets = batch['offsets'].cuda()                # (B + 1), int, cuda
+        coords = batch['locs'].cuda()
+        voxel_coords = batch['voxel_locs'].cuda()
+        p2v_map = batch['p2v_map'].cuda()
+        v2p_map = batch['v2p_map'].cuda()
+        coords_float = batch['locs_float'].cuda()
+        feats = batch['feats'].cuda()
+        labels = batch['labels'].cuda()
+        instance_labels = batch['instance_labels'].cuda()
+        # instance_info = batch['instance_info'].cuda()
+        # instance_pointnum = batch['instance_pointnum'].cuda()
+        # instance_cls = batch['instance_cls'].cuda()
+        # batch_offsets = batch['offsets'].cuda()
        spatial_shape = batch['spatial_shape']

        feats = torch.cat((feats, coords_float), 1)
-        voxel_feats = softgroup_ops.voxelization(feats, v2p_map)  # (M, C), float, cuda
-
+        voxel_feats = softgroup_ops.voxelization(feats, v2p_map)
        if self.test_cfg.x4_split:
            input = spconv.SparseConvTensor(voxel_feats, voxel_coords.int(), spatial_shape, 4)
            batch_idxs = torch.zeros_like(coords[:, 0].int())
        else:
            input = spconv.SparseConvTensor(voxel_feats, voxel_coords.int(), spatial_shape, 1)
            batch_idxs = coords[:, 0].int()
-
-        semantic_scores, pt_offsets, output_feats = self.forward_backbone(
-            input, p2v_map, x4_split=self.test_cfg.x4_split)  # TODO check name for map
-
-        proposals_idx, proposals_offset = self.forward_grouping(
-            semantic_scores, pt_offsets, batch_idxs, coords_float, self.grouping_cfg)
-
-        scores_batch_idxs, cls_scores, scores, mask_scores = self.forward_instance(
+        semantic_scores, pt_offsets, output_feats, coords_float = self.forward_backbone(
+            input, p2v_map, coords_float, x4_split=self.test_cfg.x4_split)  # TODO check name for map
+        proposals_idx, proposals_offset = self.forward_grouping(semantic_scores, pt_offsets,
+                                                                batch_idxs, coords_float,
+                                                                self.grouping_cfg)
+        scores_batch_idxs, cls_scores, iou_scores, mask_scores = self.forward_instance(
            proposals_idx, proposals_offset, output_feats, coords_float)
-
-        # scores_batch_idxs, proposals_idx, proposals_offset, mask_scores = preds['proposals']
-        N = coords.size(0)
-        semantic_pred = semantic_scores.max(1)[1]
-        cls_scores = cls_scores.softmax(1)
-        slice_inds = torch.arange(cls_scores.size(0), dtype=torch.long, device=cls_scores.device)
-        cls_scores_new, cls_pred = cls_scores[:, :-1].max(1)
-
-        cluster_scores_list = []
-        clusters_list = []
-        cluster_semantic_id_list = []
-        semantic_label_idx = torch.arange(18) + 1
-        for i in range(18):
-            # arg_score = cls_pred == i
-            score_inds = (cls_scores[:, i] > 0.001)
-            cls_scores_new = cls_scores[:, i]
-            scores_pred = scores[slice_inds, i]
-            scores_pred = scores_pred.clamp(0, 1) * cls_scores_new
-            # scores_pred = cls_scores_new
-            # mask_cls_pred = cls_pred[scores_batch_idxs.long()]
-            mask_slice_inds = torch.arange(scores_batch_idxs.size(0), dtype=torch.long, device=scores_batch_idxs.device)
-            mask_scores_new = mask_scores[:, i]
-            # proposals_idx: (sumNPoint, 2), int, cpu, [:, 0] for cluster_id, [:, 1] for corresponding point idxs in N
-            # proposals_offset: (nProposal + 1), int, cpu
-            proposals_pred = torch.zeros((proposals_offset.shape[0] - 1, N), dtype=torch.int, device=scores_pred.device) 
-            # (nProposal, N), int, cuda
-            
-            # outlier filtering
-            test_mask_score_thre = -0.5 # TODO
-            _mask = mask_scores_new > test_mask_score_thre
-            proposals_pred[proposals_idx[_mask][:, 0].long(), proposals_idx[_mask][:, 1].long()] = 1
-
-            # bg filtering
-            # import pdb; pdb.set_trace()
-            # pos_inds = (cls_pred != cfg.classes - 2)
-            # proposals_pred = proposals_pred[pos_inds]
-            # scores_pred = scores_pred[pos_inds]
-            # cls_pred = cls_pred[pos_inds]
-            # import pdb; pdb.set_trace()
-            semantic_id = cls_scores.new_full(cls_scores_new.size(), semantic_label_idx[i], dtype=torch.long)
-
-
-            # semantic_id1 = torch.tensor(semantic_label_idx, device=scores_pred.device) \
-            #     [semantic_pred[proposals_idx[:, 1][proposals_offset[:-1].long()].long()]] # (nProposal), long
-            # semantic_id_idx = semantic_pred[proposals_idx[:, 1][proposals_offset[:-1].long()].long()]
-
-            proposals_pred = proposals_pred[score_inds]
-            scores_pred = scores_pred[score_inds]
-            semantic_id = semantic_id[score_inds]
-
-            # score threshold
-            score_mask = (scores_pred > -1)
-            scores_pred = scores_pred[score_mask]
-            proposals_pred = proposals_pred[score_mask]
-            semantic_id = semantic_id[score_mask]
-            # semantic_id_idx = semantic_id_idx[score_mask]
-
-            # npoint threshold
-            proposals_pointnum = proposals_pred.sum(1)
-            npoint_mask = (proposals_pointnum >= 100)
-            scores_pred = scores_pred[npoint_mask]
-            proposals_pred = proposals_pred[npoint_mask]
-            semantic_id = semantic_id[npoint_mask]
-
-            clusters = proposals_pred
-            cluster_scores = scores_pred
-            cluster_semantic_id = semantic_id
-
-            clusters_list.append(clusters)
-            cluster_scores_list.append(cluster_scores)
-            cluster_semantic_id_list.append(cluster_semantic_id)
-        clusters = torch.cat(clusters_list).cpu().numpy()
-        cluster_scores = torch.cat(cluster_scores_list).cpu().numpy()
-        cluster_semantic_id = torch.cat(cluster_semantic_id_list).cpu().numpy()
-        # import pdb; pdb.set_trace()
-
-        nclusters = clusters.shape[0]
-
+        pred_instances = self.get_instances(batch['id'][0], proposals_idx, semantic_scores,
+                                            cls_scores, iou_scores, mask_scores)
+        gt_instances = self.get_gt_instances(labels, instance_labels)
        ret = {}
-        det_ins = []
-        for i in range(nclusters):
-            pred = {}
-            pred['scan_id'] = batch['id'][0]
-            pred['conf'] = cluster_scores[i]
-            pred['label_id'] = cluster_semantic_id[i]
-            pred['pred_mask'] = clusters[i]
-            det_ins.append(pred)
-        labels = labels - 2 + 1
-        labels[labels <0] = 0
-        instance_labels += 1
-        instance_labels[instance_labels == -99] = 0
-        gt_ins = labels * 1000 + instance_labels
-
-        gt_ins = gt_ins.cpu().numpy() 
-
-        ret['det_ins'] = det_ins
-        ret['gt_ins'] = gt_ins
-
+        ret['det_ins'] = pred_instances
+        ret['gt_ins'] = gt_instances
        return ret

-
-    def forward_backbone(self, input, input_map, x4_split=False):
+    def forward_backbone(self, input, input_map, coords, x4_split=False):
        if x4_split:
            output_feats = self.forward_4_parts(input, input_map)
            output_feats = self.merge_4_parts(output_feats)
@ -263,19 +151,19 @@ class SoftGroup(nn.Module):

        semantic_scores = self.semantic_linear(output_feats)
        semantic_scores = semantic_scores.softmax(dim=-1)
-        semantic_preds = semantic_scores.max(1)[1]
        pt_offsets = self.offset_linear(output_feats)
-        return semantic_scores, pt_offsets, output_feats
+        return semantic_scores, pt_offsets, output_feats, coords

    def forward_4_parts(self, x, input_map):
-        # helper function for s3dis: devide and forward 4 parts of a scene
+        """Helper function for s3dis: devide and forward 4 parts of a scene"""
        outs = []
        for i in range(4):
            inds = x.indices[:, 0] == i
            feats = x.features[inds]
            coords = x.indices[inds]
            coords[:, 0] = 0
-            x_new = spconv.SparseConvTensor(indices=coords, features=feats, spatial_shape=x.spatial_shape, batch_size=1)
+            x_new = spconv.SparseConvTensor(
+                indices=coords, features=feats, spatial_shape=x.spatial_shape, batch_size=1)
            out = self.input_conv(x_new)
            out = self.unet(out)
            out = self.output_layer(out)
@ -284,7 +172,7 @@ class SoftGroup(nn.Module):
        return outs[input_map.long()]

    def merge_4_parts(self, x):
-        # helper function for s3dis: take output of 4 parts and merge them
+        """Helper function for s3dis: take output of 4 parts and merge them"""
        inds = torch.arange(x.size(0), device=x.device)
        p1 = inds[::4]
        p2 = inds[1::4]
@ -297,8 +185,12 @@ class SoftGroup(nn.Module):
            x_new[p] = x_split[i]
        return x_new

-    def forward_grouping(self, semantic_scores, pt_offsets, batch_idxs, coords_float, grouping_cfg=None):
-        thr = 0.2  #TODO
+    def forward_grouping(self,
+                         semantic_scores,
+                         pt_offsets,
+                         batch_idxs,
+                         coords_float,
+                         grouping_cfg=None):
        proposals_idx_list = []
        proposals_offset_list = []
        batch_size = batch_idxs.max() + 1
@ -306,36 +198,30 @@ class SoftGroup(nn.Module):

        radius = self.grouping_cfg.radius
        mean_active = self.grouping_cfg.mean_active
-        class_numpoint_mean = torch.tensor(self.grouping_cfg.class_numpoint_mean, dtype=torch.float32)
-        training_mode = None # TODO remove this
+        class_numpoint_mean = torch.tensor(
+            self.grouping_cfg.class_numpoint_mean, dtype=torch.float32)
+        training_mode = None  # TODO remove this
        for class_id in range(self.semantic_classes):
            # ignore "floor" and "wall"
            if class_id < 2:
                continue
-
            scores = semantic_scores[:, class_id].contiguous()
-            object_idxs = (scores > thr).nonzero().view(-1)
+            object_idxs = (scores > self.grouping_cfg.score_thr).nonzero().view(-1)
            if object_idxs.size(0) < 100:  # TODO
                continue
            batch_idxs_ = batch_idxs[object_idxs]
            batch_offsets_ = utils.get_batch_offsets(batch_idxs_, batch_size)
            coords_ = coords_float[object_idxs]
            pt_offsets_ = pt_offsets[object_idxs]  # (N_fg, 3), float32
-
            semantic_preds_cpu = semantic_preds[object_idxs].int().cpu()
-
-
-            idx, start_len = softgroup_ops.ballquery_batch_p(coords_ + pt_offsets_, \
-                batch_idxs_, batch_offsets_, radius, mean_active)
-            
-            using_set_aggr = False  #TODO refactor this
+            idx, start_len = softgroup_ops.ballquery_batch_p(coords_ + pt_offsets_, batch_idxs_,
+                                                             batch_offsets_, radius, mean_active)
+            using_set_aggr = False  # TODO refactor this
            proposals_idx, proposals_offset = softgroup_ops.hierarchical_aggregation(
-                class_numpoint_mean, semantic_preds_cpu, (coords_ + pt_offsets_).cpu(), idx.cpu(), start_len.cpu(),
-                batch_idxs_.cpu(), training_mode, using_set_aggr, class_id)             
-
+                class_numpoint_mean, semantic_preds_cpu, (coords_ + pt_offsets_).cpu(), idx.cpu(),
+                start_len.cpu(), batch_idxs_.cpu(), training_mode, using_set_aggr, class_id)
            proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int()

-            # import pdb; pdb.set_trace()
            # merge proposals
            if len(proposals_offset_list) > 0:
                proposals_idx[:, 0] += sum([x.size(0) for x in proposals_offset_list]) - 1
@ -350,7 +236,9 @@ class SoftGroup(nn.Module):

    def forward_instance(self, proposals_idx, proposals_offset, output_feats, coords_float):
        # proposals voxelization again
-        input_feats, inp_map = self.clusters_voxelization(proposals_idx, proposals_offset, output_feats, coords_float, **self.instance_voxel_cfg)
+        input_feats, inp_map = self.clusters_voxelization(proposals_idx, proposals_offset,
+                                                          output_feats, coords_float,
+                                                          **self.instance_voxel_cfg)

        # predict instance scores
        score = self.intra_ins_unet(input_feats)
@ -365,33 +253,93 @@ class SoftGroup(nn.Module):
        score_feats = self.global_pool(score)
        cls_scores = self.cls_linear(score_feats)
        iou_scores = self.score_linear(score_feats)
-        
+
        return scores_batch_idxs, cls_scores, iou_scores, mask_scores

+    def get_instances(self, scan_id, proposals_idx, semantic_scores, cls_scores, iou_scores,
+                      mask_scores):
+        num_instances = cls_scores.size(0)
+        num_points = semantic_scores.size(0)
+        cls_scores = cls_scores.softmax(1)
+        semantic_pred = semantic_scores.max(1)[1]
+        cls_pred_list, score_pred_list, mask_pred_list = [], [], []
+        for i in range(self.instance_classes):
+            if i in self.sem2ins_classes:
+                cls_pred = cls_scores.new_tensor([i + 1], dtype=torch.long)
+                score_pred = cls_scores.new_tensor([1.], dtype=torch.float32)
+                mask_pred = (semantic_pred == i)[None, :].int()
+            else:
+                cls_pred = cls_scores.new_full((num_instances, ), i + 1, dtype=torch.long)
+                cur_cls_scores = cls_scores[:, i]
+                cur_iou_scores = iou_scores[:, i]
+                cur_mask_scores = mask_scores[:, i]
+                score_pred = cur_cls_scores * cur_iou_scores.clamp(0, 1)
+                mask_pred = torch.zeros((num_instances, num_points), dtype=torch.int, device='cuda')
+                mask_inds = cur_mask_scores > self.test_cfg.mask_score_thr
+                cur_proposals_idx = proposals_idx[mask_inds].long()
+                mask_pred[cur_proposals_idx[:, 0], cur_proposals_idx[:, 1]] = 1

-    def clusters_voxelization(self, clusters_idx, clusters_offset, feats, coords, scale, spatial_shape):
-        '''
-        :param clusters_idx: (SumNPoint, 2), int, [:, 0] for cluster_id, [:, 1] for corresponding point idxs in N, cpu
-        :param clusters_offset: (nCluster + 1), int, cpu
-        :param feats: (N, C), float, cuda
-        :param coords: (N, 3), float, cuda
-        :return:
-        '''
+                # filter low score instance
+                inds = cur_cls_scores > self.test_cfg.cls_score_thr
+                cls_pred = cls_pred[inds]
+                score_pred = score_pred[inds]
+                mask_pred = mask_pred[inds]
+
+                # filter too small instances
+                npoint = mask_pred.sum(1)
+                inds = npoint >= self.test_cfg.min_npoint
+                cls_pred = cls_pred[inds]
+                score_pred = score_pred[inds]
+                mask_pred = mask_pred[inds]
+            cls_pred_list.append(cls_pred)
+            score_pred_list.append(score_pred)
+            mask_pred_list.append(mask_pred)
+        cls_pred = torch.cat(cls_pred_list).cpu().numpy()
+        score_pred = torch.cat(score_pred_list).cpu().numpy()
+        mask_pred = torch.cat(mask_pred_list).cpu().numpy()
+
+        instances = []
+        for i in range(cls_pred.shape[0]):
+            pred = {}
+            pred['scan_id'] = scan_id
+            pred['label_id'] = cls_pred[i]
+            pred['conf'] = score_pred[i]
+            pred['pred_mask'] = mask_pred[i]
+            instances.append(pred)
+        return instances
+
+    def get_gt_instances(self, labels, instance_labels):
+        """Get gt instances for evaluation"""
+        # convert to evaluation format 0: ignore, 1->N: valid
+        label_shift = self.semantic_classes - self.instance_classes
+        labels = labels - label_shift + 1
+        labels[labels < 0] = 0
+        instance_labels += 1
+        ignore_inds = instance_labels < 0
+        gt_ins = labels * 1000 + instance_labels
+        gt_ins[ignore_inds] = 0
+        gt_ins = gt_ins.cpu().numpy()
+        return gt_ins
+
+    def clusters_voxelization(self, clusters_idx, clusters_offset, feats, coords, scale,
+                              spatial_shape):
        c_idxs = clusters_idx[:, 1].cuda()
        clusters_feats = feats[c_idxs.long()]
        clusters_coords = coords[c_idxs.long()]

-        clusters_coords_mean = softgroup_ops.sec_mean(clusters_coords, clusters_offset.cuda())  # (nCluster, 3), float
-        clusters_coords_mean = torch.index_select(clusters_coords_mean, 0, clusters_idx[:, 0].cuda().long())  # (sumNPoint, 3), float
+        clusters_coords_mean = softgroup_ops.sec_mean(clusters_coords, clusters_offset.cuda())
+        clusters_coords_mean = torch.index_select(clusters_coords_mean, 0,
+                                                  clusters_idx[:, 0].cuda().long())
        clusters_coords -= clusters_coords_mean

-        clusters_coords_min = softgroup_ops.sec_min(clusters_coords, clusters_offset.cuda())  # (nCluster, 3), float
-        clusters_coords_max = softgroup_ops.sec_max(clusters_coords, clusters_offset.cuda())  # (nCluster, 3), float
+        clusters_coords_min = softgroup_ops.sec_min(clusters_coords, clusters_offset.cuda())
+        clusters_coords_max = softgroup_ops.sec_max(clusters_coords, clusters_offset.cuda())

-        clusters_scale = 1 / ((clusters_coords_max - clusters_coords_min) / spatial_shape).max(1)[0] - 0.01  # (nCluster), float
+        clusters_scale = 1 / (
+            (clusters_coords_max - clusters_coords_min) / spatial_shape).max(1)[0] - 0.01
        clusters_scale = torch.clamp(clusters_scale, min=None, max=scale)

-        min_xyz = clusters_coords_min * clusters_scale.unsqueeze(-1)  # (nCluster, 3), float
+        min_xyz = clusters_coords_min * clusters_scale.unsqueeze(-1)
        max_xyz = clusters_coords_max * clusters_scale.unsqueeze(-1)

        clusters_scale = torch.index_select(clusters_scale, 0, clusters_idx[:, 0].cuda().long())
@ -399,24 +347,25 @@ class SoftGroup(nn.Module):
        clusters_coords = clusters_coords * clusters_scale.unsqueeze(-1)

        range = max_xyz - min_xyz
-        offset = - min_xyz + torch.clamp(spatial_shape - range - 0.001, min=0) * torch.rand(3).cuda() + torch.clamp(spatial_shape - range + 0.001, max=0) * torch.rand(3).cuda()
+        offset = -min_xyz + torch.clamp(
+            spatial_shape - range - 0.001, min=0) * torch.rand(3).cuda() + torch.clamp(
+                spatial_shape - range + 0.001, max=0) * torch.rand(3).cuda()
        offset = torch.index_select(offset, 0, clusters_idx[:, 0].cuda().long())
        clusters_coords += offset
-        assert clusters_coords.shape.numel() == ((clusters_coords >= 0) * (clusters_coords < spatial_shape)).sum()
+        assert clusters_coords.shape.numel() == ((clusters_coords >= 0) *
+                                                 (clusters_coords < spatial_shape)).sum()

        clusters_coords = clusters_coords.long()
-        clusters_coords = torch.cat([clusters_idx[:, 0].view(-1, 1).long(), clusters_coords.cpu()], 1)  # (sumNPoint, 1 + 3)
-
-        out_coords, inp_map, out_map = softgroup_ops.voxelization_idx(clusters_coords, int(clusters_idx[-1, 0]) + 1)
-        # output_coords: M * (1 + 3) long
-        # input_map: sumNPoint int
-        # output_map: M * (maxActive + 1) int
-
-        out_feats = softgroup_ops.voxelization(clusters_feats, out_map.cuda())  # (M, C), float, cuda
+        clusters_coords = torch.cat([clusters_idx[:, 0].view(-1, 1).long(),
+                                     clusters_coords.cpu()], 1)

+        out_coords, inp_map, out_map = softgroup_ops.voxelization_idx(clusters_coords,
+                                                                      int(clusters_idx[-1, 0]) + 1)
+        out_feats = softgroup_ops.voxelization(clusters_feats, out_map.cuda())
        spatial_shape = [spatial_shape] * 3
-        voxelization_feats = spconv.SparseConvTensor(out_feats, out_coords.int().cuda(), spatial_shape, int(clusters_idx[-1, 0]) + 1)
-
+        voxelization_feats = spconv.SparseConvTensor(out_feats,
+                                                     out_coords.int().cuda(), spatial_shape,
+                                                     int(clusters_idx[-1, 0]) + 1)
        return voxelization_feats, inp_map

    def global_pool(self, x, expand=False):
@ -432,400 +381,3 @@ class SoftGroup(nn.Module):
        x_pool_expand = x_pool[indices.long()]
        x.features = torch.cat((x.features, x_pool_expand), dim=1)
        return x
-
-
-
-    def forward_old(self, input, input_map, coords, batch_idxs, batch_offsets, epoch, training_mode, gt_instances=None, split=False, semantic_only=False):
-        '''
-        :param input_map: (N), int, cuda
-        :param coords: (N, 3), float, cuda
-        :param batch_idxs: (N), int, cuda
-        :param batch_offsets: (B + 1), int, cuda
-        '''
-        ret = {}
-
-        if split:
-            output_feats = self.forward_4_parts(input, input_map)
-            output_feats = self.merge_4_parts(output_feats)
-            coords = self.merge_4_parts(coords)
-        else:
-            output = self.input_conv(input)
-            output = self.unet(output)
-            output = self.output_layer(output)
-            output_feats = output.features[input_map.long()]
-
-        semantic_scores = self.semantic_linear(output_feats)   # (N, nClass), float
-        semantic_preds = semantic_scores.max(1)[1]    # (N), long
-        pt_offsets = self.offset_linear(output_feats)  # (N, 3)
-
-        ret['semantic_scores'] = semantic_scores
-        ret['pt_offsets'] = pt_offsets
-
-        if(epoch > self.prepare_epochs) and not semantic_only:
-            thr = self.cfg.score_thr
-            semantic_scores = semantic_scores.softmax(dim=-1)
-            proposals_idx_list = []
-            proposals_offset_list = []
-            cls_pred_list = []
-            for class_id in range(self.cfg.semantic_classes):
-                # ignore "floor" and "wall"
-                if class_id < 2:
-                    continue
-
-                scores = semantic_scores[:, class_id].contiguous()
-                object_idxs = (scores > thr).nonzero().view(-1)
-                if object_idxs.size(0) < self.cfg.TEST_NPOINT_THRESH:
-                    continue
-                batch_idxs_ = batch_idxs[object_idxs]
-                batch_offsets_ = utils.get_batch_offsets(batch_idxs_, self.cfg.batch_size)
-                coords_ = coords[object_idxs]
-                pt_offsets_ = pt_offsets[object_idxs]  # (N_fg, 3), float32
-
-                semantic_preds_cpu = semantic_preds[object_idxs].int().cpu()
-
-                idx, start_len = softgroup_ops.ballquery_batch_p(coords_ + pt_offsets_, \
-                    batch_idxs_, batch_offsets_, self.point_aggr_radius, self.cluster_shift_meanActive)
-                
-                using_set_aggr = False  #TODO refactor this
-                class_numpoint_mean = torch.tensor(self.cfg.class_numpoint_mean, dtype=torch.float32)
-                proposals_idx, proposals_offset = softgroup_ops.hierarchical_aggregation(
-                    class_numpoint_mean, semantic_preds_cpu, (coords_ + pt_offsets_).cpu(), idx.cpu(), start_len.cpu(),
-                    batch_idxs_.cpu(), training_mode, using_set_aggr, class_id)             
-
-                proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int()
-
-                # import pdb; pdb.set_trace()
-                # merge proposals
-                cls_pred = proposals_offset.new_full((proposals_offset.size(0) - 1,), class_id)
-                if len(proposals_offset_list) > 0:
-                    proposals_idx[:, 0] += sum([x.size(0) for x in proposals_offset_list]) - 1
-                    proposals_offset += proposals_offset_list[-1][-1]
-                    proposals_offset = proposals_offset[1:]
-                if proposals_idx.size(0) > 0:
-                    proposals_idx_list.append(proposals_idx)
-                    proposals_offset_list.append(proposals_offset)
-                    cls_pred_list.append(cls_pred)
-
-            # add gt_instances to proposals
-            if gt_instances is not None:
-                indices = gt_instances[:, 0]
-                batch_counts = torch.bincount(indices)
-                gt_instances_offset = torch.cumsum(batch_counts, dim=0)
-                gt_instances_offset += proposals_offset_list[-1][-1]
-                gt_instances[:, 0] += sum([x.size(0) for x in proposals_offset_list]) - 1
-                proposals_idx_list.append(gt_instances.cpu().int())
-                proposals_offset_list.append(gt_instances_offset.cpu().int())
-            proposals_idx = torch.cat(proposals_idx_list, dim=0)
-            proposals_offset = torch.cat(proposals_offset_list)
-            cls_pred = torch.cat(cls_pred_list)
-
-    
-
-            # restrict the num of training proposals, avoid OOM
-            max_proposal_num = getattr(self.cfg, 'max_proposal_num', 200)
-            if training_mode == 'train' and proposals_offset.shape[0] > max_proposal_num:
-                proposals_offset = proposals_offset[:max_proposal_num + 1]
-                proposals_idx = proposals_idx[: proposals_offset[-1]]
-                assert proposals_idx.shape[0] == proposals_offset[-1]
-                print('selected proposal num', proposals_offset.shape[0] - 1)
-
-            # proposals voxelization again
-            input_feats, inp_map = self.clusters_voxelization(proposals_idx, proposals_offset, output_feats, coords, self.score_spatial_shape, self.score_scale, self.score_mode)
-
-            # predict instance scores
-            score = self.intra_ins_unet(input_feats)
-            score = self.intra_ins_outputlayer(score)
-            # score_feats = score.features[inp_map.long()] # (sumNPoint, C)
-
-            # predict mask scores
-            # first linear than voxel to point,  more efficient  (because voxel num < point num)
-            mask_scores = self.mask_linear(score.features)
-            mask_scores = mask_scores[inp_map.long()]
-            scores_batch_idxs = score.indices[:, 0][inp_map.long()]
-
-            # predict instance scores
-            # if getattr(self.cfg, 'use_mask_filter_score_feature', False)  and \
-            #         epoch > self.cfg.use_mask_filter_score_feature_start_epoch:
-            #     mask_index_select = torch.ones_like(mask_scores)
-            #     mask_index_select[torch.sigmoid(mask_scores) < self.cfg.mask_filter_score_feature_thre] = 0.
-            #     score_feats = score_feats * mask_index_select
-            # score_feats = softgroup_ops.roipool(score_feats, proposals_offset.cuda())  # (nProposal, C)
-            # score_feats = softgroup_ops.global_avg_pool(score_feats, proposals_offset.cuda())
-            score_feats = self.global_pool(score)
-            cls_scores = self.cls_linear(score_feats)
-            scores = self.score_linear(score_feats)  # (nProposal, 1)
-            
-            ret['proposal_scores'] = (scores_batch_idxs, cls_scores, scores, proposals_idx, proposals_offset, mask_scores)
-
-        return ret
-
-
-def model_fn_decorator(test=False):
-    # config
-    from util.config import cfg
-
-
-    semantic_criterion = nn.CrossEntropyLoss(ignore_index=cfg.ignore_label).cuda()
-    score_criterion = nn.BCELoss(reduction='none').cuda()
-
-    def get_gt_instances(labels, instance_labels):
-        instance_pointnum = []   # (nInst), int
-        gt_cls = []
-        gt_instances = []
-        instance_num = int(instance_labels.max()) + 1
-        inst_count = 0
-        for i in range(instance_num):
-            inst_idx_i = (instance_labels == i).nonzero().view(-1)
-            cls_loc = inst_idx_i[0]
-            cls = labels[cls_loc]
-            if cls != cfg.ignore_label:
-                gt_cls.append(cls)
-                pad = torch.ones_like(inst_idx_i) * inst_count
-                instance = torch.stack([pad, inst_idx_i], dim=1)
-                gt_instances.append(instance)
-                inst_count += 1
-        gt_instances = torch.cat(gt_instances)
-        return gt_cls, gt_instances
-
-    def test_model_fn(batch, model, epoch, semantic_only=False):
-        coords = batch['locs'].cuda()              # (N, 1 + 3), long, cuda, dimension 0 for batch_idx
-        voxel_coords = batch['voxel_locs'].cuda()  # (M, 1 + 3), long, cuda
-        p2v_map = batch['p2v_map'].cuda()          # (N), int, cuda
-        v2p_map = batch['v2p_map'].cuda()          # (M, 1 + maxActive), int, cuda
-
-        coords_float = batch['locs_float'].cuda()  # (N, 3), float32, cuda
-        feats = batch['feats'].cuda()              # (N, C), float32, cuda
-        batch_offsets = batch['offsets'].cuda()    # (B + 1), int, cuda
-        spatial_shape = batch['spatial_shape']
-
-        if cfg.use_coords:
-            feats = torch.cat((feats, coords_float), 1)
-
-        voxel_feats = softgroup_ops.voxelization(feats, v2p_map, cfg.mode)  # (M, C), float, cuda
-
-        if cfg.dataset == 'scannetv2':
-            input_ = spconv.SparseConvTensor(voxel_feats, voxel_coords.int(), spatial_shape, 1)
-
-            ret = model(input_, p2v_map, coords_float, coords[:, 0].int(), batch_offsets, epoch, 'test', semantic_only=semantic_only)
-        elif cfg.dataset == 's3dis':
-            input_ = spconv.SparseConvTensor(voxel_feats, voxel_coords.int(), spatial_shape, 4)
-            batch_idxs = torch.zeros_like(coords[:, 0].int())
-            ret = model(input_, p2v_map, coords_float, batch_idxs, batch_offsets, epoch, 'test', split=True, semantic_only=semantic_only)
-        semantic_scores = ret['semantic_scores']  # (N, nClass) float32, cuda
-        pt_offsets = ret['pt_offsets']            # (N, 3), float32, cuda
-
-        if (epoch > cfg.prepare_epochs) and not semantic_only:
-            scores_batch_idxs, cls_scores, scores, proposals_idx, proposals_offset, mask_scores = ret['proposal_scores']
-
-        # preds
-        with torch.no_grad():
-            preds = {}
-            preds['semantic'] = semantic_scores
-            preds['pt_offsets'] = pt_offsets
-            if (epoch > cfg.prepare_epochs) and not semantic_only:
-                preds['score'] = scores
-                preds['cls_score'] = cls_scores
-                preds['proposals'] = (scores_batch_idxs, proposals_idx, proposals_offset, mask_scores)
-
-        return preds
-        
-    def model_fn(batch, model, epoch, semantic_only=False):
-        # batch {'locs': locs, 'voxel_locs': voxel_locs, 'p2v_map': p2v_map, 'v2p_map': v2p_map,
-        # 'locs_float': locs_float, 'feats': feats, 'labels': labels, 'instance_labels': instance_labels,
-        # 'instance_info': instance_infos, 'instance_pointnum': instance_pointnum,
-        # 'id': tbl, 'offsets': batch_offsets, 'spatial_shape': spatial_shape}
-        coords = batch['locs'].cuda()                          # (N, 1 + 3), long, cuda, dimension 0 for batch_idx
-        voxel_coords = batch['voxel_locs'].cuda()              # (M, 1 + 3), long, cuda
-        p2v_map = batch['p2v_map'].cuda()                      # (N), int, cuda
-        v2p_map = batch['v2p_map'].cuda()                      # (M, 1 + maxActive), int, cuda
-
-        coords_float = batch['locs_float'].cuda()              # (N, 3), float32, cuda
-        feats = batch['feats'].cuda()                          # (N, C), float32, cuda
-        labels = batch['labels'].cuda()                        # (N), long, cuda
-        instance_labels = batch['instance_labels'].cuda()      # (N), long, cuda, 0~total_nInst, -100
-
-        instance_info = batch['instance_info'].cuda()          # (N, 9), float32, cuda, (meanxyz, minxyz, maxxyz)
-        instance_pointnum = batch['instance_pointnum'].cuda()  # (total_nInst), long, cuda
-        instance_cls = batch['instance_cls'].cuda()            # (total_nInst), int, cuda
-        batch_offsets = batch['offsets'].cuda()                # (B + 1), int, cuda
-        spatial_shape = batch['spatial_shape']
-
-        if cfg.use_coords:
-            feats = torch.cat((feats, coords_float), 1)
-
-        voxel_feats = softgroup_ops.voxelization(feats, v2p_map, cfg.mode)  # (M, C), float, cuda
-
-        input_ = spconv.SparseConvTensor(voxel_feats, voxel_coords.int(), spatial_shape, cfg.batch_size)
-
-        ret = model(input_, p2v_map, coords_float, coords[:, 0].int(), batch_offsets, epoch, 'train', semantic_only=semantic_only)
-        semantic_scores = ret['semantic_scores'] # (N, nClass) float32, cuda
-        pt_offsets = ret['pt_offsets']           # (N, 3), float32, cuda
-        
-        if(epoch > cfg.prepare_epochs) and not semantic_only:
-            scores_batch_idxs, cls_scores, scores, proposals_idx, proposals_offset, mask_scores = ret['proposal_scores']
-            # scores: (nProposal, 1) float, cuda
-            # proposals_idx: (sumNPoint, 2), int, cpu, [:, 0] for cluster_id, [:, 1] for corresponding point idxs in N
-            # proposals_offset: (nProposal + 1), int, cpu
-            # mask_scores: (sumNPoint, 1), float, cuda
-
-        loss_inp = {}
-
-        loss_inp['semantic_scores'] = (semantic_scores, labels)
-        loss_inp['pt_offsets'] = (pt_offsets, coords_float, instance_info, instance_labels)
-
-        if(epoch > cfg.prepare_epochs) and not semantic_only:
-            loss_inp['proposal_scores'] = (scores_batch_idxs, cls_scores, scores, proposals_idx, proposals_offset, instance_pointnum, instance_cls, mask_scores)
-
-        loss, loss_out = loss_fn(loss_inp, epoch, semantic_only=semantic_only)
-
-        # accuracy / visual_dict / meter_dict
-        with torch.no_grad():
-            preds = {}
-            preds['semantic'] = semantic_scores
-            preds['pt_offsets'] = pt_offsets
-            if(epoch > cfg.prepare_epochs) and not semantic_only:
-                preds['score'] = scores
-                preds['proposals'] = (proposals_idx, proposals_offset)
-
-            visual_dict = {}
-            visual_dict['loss'] = loss
-            for k, v in loss_out.items():
-                visual_dict[k] = v[0]
-
-            meter_dict = {}
-            meter_dict['loss'] = (loss.item(), coords.shape[0])
-            for k, v in loss_out.items():
-                meter_dict[k] = (float(v[0]), v[1])
-
-        return loss, preds, visual_dict, meter_dict
-
-
-    def loss_fn(loss_inp, epoch, semantic_only=False):
-
-        loss_out = {}
-
-        '''semantic loss'''
-        semantic_scores, semantic_labels = loss_inp['semantic_scores']
-        # semantic_scores: (N, nClass), float32, cuda
-        # semantic_labels: (N), long, cuda
-        
-        semantic_loss = semantic_criterion(semantic_scores, semantic_labels)
-
-        loss_out['semantic_loss'] = (semantic_loss, semantic_scores.shape[0])
-
-        '''offset loss'''
-        pt_offsets, coords, instance_info, instance_labels = loss_inp['pt_offsets']
-        # pt_offsets: (N, 3), float, cuda
-        # coords: (N, 3), float32
-        # instance_info: (N, 9), float32 tensor (meanxyz, minxyz, maxxyz)
-        # instance_labels: (N), long
-
-
-        gt_offsets = instance_info[:, 0:3] - coords   # (N, 3)
-        pt_diff = pt_offsets - gt_offsets   # (N, 3)
-        pt_dist = torch.sum(torch.abs(pt_diff), dim=-1)   # (N)       
-
-        valid = (instance_labels != cfg.ignore_label).float()
-
-        offset_norm_loss = torch.sum(pt_dist * valid) / (torch.sum(valid) + 1e-6)
-        loss_out['offset_norm_loss'] = (offset_norm_loss, valid.sum())
-
-        if (epoch > cfg.prepare_epochs) and not semantic_only:
-            '''score and mask loss'''
-            
-            scores_batch_idxs, cls_scores, scores, proposals_idx, proposals_offset, instance_pointnum, instance_cls, mask_scores = loss_inp['proposal_scores']
-            # scores: (nProposal, 1), float32
-            # proposals_idx: (sumNPoint, 2), int, cpu, [:, 0] for cluster_id, [:, 1] for corresponding point idxs in N
-            # proposals_offset: (nProposal + 1), int, cpu
-            # instance_pointnum: (total_nInst), int
-
-            # prepare to compute iou and mask target
-            proposals_idx = proposals_idx[:, 1].cuda()
-            proposals_offset = proposals_offset.cuda()
-
-            # get iou and calculate mask label and mask loss
-            # mask_scores_sigmoid = torch.sigmoid(mask_scores)
-
-            # if getattr(cfg, 'cal_iou_based_on_mask', False) \
-            #         and (epoch > cfg.cal_iou_based_on_mask_start_epoch):
-            #     ious, mask_label =  softgroup_ops.cal_iou_and_masklabel(proposals_idx[:, 1].cuda(), \
-            #         proposals_offset.cuda(), instance_labels, instance_cls, instance_pointnum, mask_scores_sigmoid.detach(), 1)
-            # else:
-            #     ious, mask_label =  softgroup_ops.cal_iou_and_masklabel(proposals_idx[:, 1].cuda(), \
-            #         proposals_offset.cuda(), instance_labels, instance_cls, instance_pointnum, mask_scores_sigmoid.detach(), 0)
-
-            # cal iou of clustered instance
-            ious_on_cluster = softgroup_ops.get_mask_iou_on_cluster(proposals_idx,
-                    proposals_offset, instance_labels, instance_pointnum)
-
-           
-            # filter out stuff instance
-            fg_inds = (instance_cls != cfg.ignore_label)
-            fg_instance_cls = instance_cls[fg_inds]
-            fg_ious_on_cluster = ious_on_cluster[:, fg_inds]
-
-            # overlap > thr on fg instances are positive samples
-            max_iou, gt_inds = fg_ious_on_cluster.max(1)
-            pos_inds = max_iou >= cfg.iou_thr  # this value should match thr in get_mask_label.cu
-            pos_gt_inds = gt_inds[pos_inds]
-
-            # compute cls loss. follow detection convention: 0 -> K - 1 are fg, K is bg
-            labels = fg_instance_cls.new_full((fg_ious_on_cluster.size(0), ), cfg.classes)
-            labels[pos_inds] = fg_instance_cls[pos_gt_inds]
-            cls_loss = F.cross_entropy(cls_scores, labels)
-            loss_out['cls_loss'] = (cls_loss, labels.size(0))
-           
-            # compute mask loss
-            mask_cls_label = labels[scores_batch_idxs.long()]
-            slice_inds = torch.arange(0, mask_cls_label.size(0), dtype=torch.long, device=mask_cls_label.device)
-            mask_scores_sigmoid_slice = mask_scores.sigmoid()[slice_inds, mask_cls_label]
-            # if getattr(cfg, 'cal_iou_based_on_mask', False) \
-            #         and (epoch > cfg.cal_iou_based_on_mask_start_epoch):
-            #     ious =  softgroup_ops.get_mask_iou_on_pred(proposals_idx, 
-            #         proposals_offset, instance_labels, instance_pointnum, mask_scores_sigmoid_slice.detach())
-            # else:
-            #     ious = ious_on_cluster
-            mask_label = softgroup_ops.get_mask_label(proposals_idx, proposals_offset, instance_labels, instance_cls, instance_pointnum, ious_on_cluster, cfg.iou_thr)
-            mask_label_weight = (mask_label != -1).float()
-            mask_label[mask_label==-1.] = 0.5 # any value is ok
-            mask_loss = F.binary_cross_entropy(mask_scores_sigmoid_slice, mask_label, weight=mask_label_weight, reduction='sum')
-            mask_loss /= (mask_label_weight.sum() + 1)
-            loss_out['mask_loss'] = (mask_loss, mask_label_weight.sum())
-            
-            # mask_loss = torch.nn.functional.binary_cross_entropy(mask_scores_sigmoid, mask_label, weight=mask_label_weight, reduction='none')
-            # mask_loss = mask_loss.mean()
-            # loss_out['mask_loss'] = (mask_loss, mask_label_weight.sum())
-
-            # compute mask score loss
-            ious =  softgroup_ops.get_mask_iou_on_pred(proposals_idx, 
-                proposals_offset, instance_labels, instance_pointnum, mask_scores_sigmoid_slice.detach())
-            fg_ious = ious[:, fg_inds]
-            gt_ious, _ = fg_ious.max(1)  # gt_ious: (nProposal) float, long
-            
-
-            # gt_scores = get_segmented_scores(gt_ious, cfg.fg_thresh, cfg.bg_thresh)
-
-            slice_inds = torch.arange(0, labels.size(0), dtype=torch.long, device=labels.device)
-            score_weight = (labels < cfg.classes).float()
-            score_slice = scores[slice_inds, labels]
-            score_loss = F.mse_loss(score_slice, gt_ious, reduction='none')
-            score_loss = (score_loss * score_weight).sum() / (score_weight.sum() + 1)
-
-
-            loss_out['score_loss'] = (score_loss, score_weight.sum())
-
-        '''total loss'''
-        loss = cfg.loss_weight[0] * semantic_loss + cfg.loss_weight[1] * offset_norm_loss
-        if(epoch > cfg.prepare_epochs) and not semantic_only:
-            loss += (cfg.loss_weight[2] * cls_loss)
-            loss += (cfg.loss_weight[3] * mask_loss)
-            loss += (cfg.loss_weight[4] * score_loss)
-
-        return loss, loss_out
-
-    if test:
-        fn = test_model_fn
-    else:
-        fn = model_fn
-
-    return fn
--- a/test.py
+++ b/test.py
@ -1,19 +1,15 @@
-import torch
-import time
+import argparse
 import numpy as np
 import random
-import os
-
-# from util.config import cfg
-# cfg.task = 'test'
-from util.log import logger
-import util.utils as utils
-import util.eval as eval
-
+import torch
 import yaml
-import argparse
-from model.softgroup import SoftGroup
 from munch import Munch
+from tqdm import tqdm
+
+import util.utils as utils
+from evaluation import ScanNetEval
+from model.softgroup import SoftGroup
+

 def get_args():
    parser = argparse.ArgumentParser('SoftGroup')
@ -22,223 +18,6 @@ def get_args():
    args = parser.parse_args()
    return args

-def init():
-    global result_dir
-    result_dir = os.path.join(cfg.exp_path, 'result', cfg.split)
-    backup_dir = os.path.join(result_dir, 'backup_files')
-    os.makedirs(backup_dir, exist_ok=True)
-    os.makedirs(os.path.join(result_dir, 'predicted_masks'), exist_ok=True)
-    os.system('cp test.py {}'.format(backup_dir))
-    os.system('cp {} {}'.format(cfg.model_dir, backup_dir))
-    os.system('cp {} {}'.format(cfg.dataset_dir, backup_dir))
-    os.system('cp {} {}'.format(cfg.config, backup_dir))
-
-    global semantic_label_idx
-    semantic_label_idx = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39]
-
-    logger.info(cfg)
-
-    random.seed(cfg.test_seed)
-    np.random.seed(cfg.test_seed)
-    torch.manual_seed(cfg.test_seed)
-    torch.cuda.manual_seed_all(cfg.test_seed)
-
-
-def test(model, model_fn, data_name, epoch):
-    logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>')
-
-    if cfg.dataset == 'scannetv2':
-        if data_name == 'scannet':
-            from data.scannetv2_inst import Dataset
-            dataset = Dataset(test=True)
-            dataset.testLoader()
-        else:
-            print("Error: no data loader - " + data_name)
-            exit(0)
-    dataloader = dataset.test_data_loader
-    total = 0
-
-    with torch.no_grad():
-        model = model.eval()
-
-        total_end1 = 0.
-        matches = {}
-        for i, batch in enumerate(dataloader):
-
-            # inference
-            start1 = time.time()
-            preds = model_fn(batch, model, epoch)
-            end1 = time.time() - start1
-
-            # decode results for evaluation
-            N = batch['feats'].shape[0]
-            test_scene_name = dataset.test_file_names[int(batch['id'][0])].split('/')[-1][:12]
-            semantic_scores = preds['semantic']  # (N, nClass=20) float32, cuda
-            semantic_pred = semantic_scores.max(1)[1]  # (N) long, cuda
-            pt_offsets = preds['pt_offsets']    # (N, 3), float32, cuda
-            if (epoch > cfg.prepare_epochs):
-                scores = preds['score']   # (nProposal, 1) float, cuda
-                # scores_pred = torch.sigmoid(scores.view(-1))
-
-                scores_batch_idxs, proposals_idx, proposals_offset, mask_scores = preds['proposals']
-                cls_scores = preds['cls_score'].softmax(1)
-                slice_inds = torch.arange(cls_scores.size(0), dtype=torch.long, device=cls_scores.device)
-                cls_scores_new, cls_pred = cls_scores[:, :-1].max(1)
-
-                cluster_scores_list = []
-                clusters_list = []
-                cluster_semantic_id_list = []
-                # import pdb; pdb.set_trace()
-                for i in range(18):
-                    # arg_score = cls_pred == i
-                    score_inds = (cls_scores[:, i] > 0.001)
-                    cls_scores_new = cls_scores[:, i]
-                    scores_pred = scores[slice_inds, i]
-                    scores_pred = scores_pred.clamp(0, 1) * cls_scores_new
-                    # scores_pred = cls_scores_new
-                    # mask_cls_pred = cls_pred[scores_batch_idxs.long()]
-                    mask_slice_inds = torch.arange(scores_batch_idxs.size(0), dtype=torch.long, device=scores_batch_idxs.device)
-                    mask_scores_new = mask_scores[:, i]
-                    # proposals_idx: (sumNPoint, 2), int, cpu, [:, 0] for cluster_id, [:, 1] for corresponding point idxs in N
-                    # proposals_offset: (nProposal + 1), int, cpu
-                    proposals_pred = torch.zeros((proposals_offset.shape[0] - 1, N), dtype=torch.int, device=scores_pred.device) 
-                    # (nProposal, N), int, cuda
-                    
-                    # outlier filtering
-                    test_mask_score_thre = getattr(cfg, 'test_mask_score_thre', -0.5)
-                    _mask = mask_scores_new > test_mask_score_thre
-                    proposals_pred[proposals_idx[_mask][:, 0].long(), proposals_idx[_mask][:, 1].long()] = 1
-
-                    # bg filtering
-                    # import pdb; pdb.set_trace()
-                    # pos_inds = (cls_pred != cfg.classes - 2)
-                    # proposals_pred = proposals_pred[pos_inds]
-                    # scores_pred = scores_pred[pos_inds]
-                    # cls_pred = cls_pred[pos_inds]
-                    # import pdb; pdb.set_trace()
-                    semantic_id = cls_scores.new_full(cls_scores_new.size(), semantic_label_idx[i + 2], dtype=torch.long)
-
-
-                    semantic_id1 = torch.tensor(semantic_label_idx, device=scores_pred.device) \
-                        [semantic_pred[proposals_idx[:, 1][proposals_offset[:-1].long()].long()]] # (nProposal), long
-                    # semantic_id_idx = semantic_pred[proposals_idx[:, 1][proposals_offset[:-1].long()].long()]
-    
-                    proposals_pred = proposals_pred[score_inds]
-                    scores_pred = scores_pred[score_inds]
-                    semantic_id = semantic_id[score_inds]
-
-                    # score threshold
-                    score_mask = (scores_pred > cfg.TEST_SCORE_THRESH)
-                    scores_pred = scores_pred[score_mask]
-                    proposals_pred = proposals_pred[score_mask]
-                    semantic_id = semantic_id[score_mask]
-                    # semantic_id_idx = semantic_id_idx[score_mask]
-
-                    # npoint threshold
-                    proposals_pointnum = proposals_pred.sum(1)
-                    npoint_mask = (proposals_pointnum >= cfg.TEST_NPOINT_THRESH)
-                    scores_pred = scores_pred[npoint_mask]
-                    proposals_pred = proposals_pred[npoint_mask]
-                    semantic_id = semantic_id[npoint_mask]
-
-
-                    # nms (no need)
-                    if getattr(cfg, 'using_NMS', False):
-                        if semantic_id.shape[0] == 0:
-                            pick_idxs = np.empty(0)
-                        else:
-                            proposals_pred_f = proposals_pred.float()  # (nProposal, N), float, cuda
-                            intersection = torch.mm(proposals_pred_f, proposals_pred_f.t())  # (nProposal, nProposal), float, cuda
-                            proposals_pointnum = proposals_pred_f.sum(1)  # (nProposal), float, cuda
-                            proposals_pn_h = proposals_pointnum.unsqueeze(-1).repeat(1, proposals_pointnum.shape[0])
-                            proposals_pn_v = proposals_pointnum.unsqueeze(0).repeat(proposals_pointnum.shape[0], 1)
-                            cross_ious = intersection / (proposals_pn_h + proposals_pn_v - intersection)
-                            pick_idxs = non_max_suppression(cross_ious.cpu().numpy(), scores_pred.cpu().numpy(), cfg.TEST_NMS_THRESH)  
-                            # int, (nCluster, N)
-                        clusters = proposals_pred[pick_idxs]
-                        cluster_scores = scores_pred[pick_idxs]
-                        cluster_semantic_id = semantic_id[pick_idxs]
-                    else:
-                        clusters = proposals_pred
-                        cluster_scores = scores_pred
-                        cluster_semantic_id = semantic_id
-                    clusters_list.append(clusters)
-                    cluster_scores_list.append(cluster_scores)
-                    cluster_semantic_id_list.append(cluster_semantic_id)
-                clusters = torch.cat(clusters_list)
-                cluster_scores = torch.cat(cluster_scores_list)
-                cluster_semantic_id = torch.cat(cluster_semantic_id_list)
-                # import pdb; pdb.set_trace()
-
-                nclusters = clusters.shape[0]
-
-
-                # prepare for evaluation
-                if cfg.eval:
-                    pred_info = {}
-                    pred_info['conf'] = cluster_scores.cpu().numpy()
-                    pred_info['label_id'] = cluster_semantic_id.cpu().numpy()
-                    pred_info['mask'] = clusters.cpu().numpy()
-                    gt_file = os.path.join(cfg.data_root, cfg.dataset, cfg.split + '_gt', test_scene_name + '.txt')
-                    gt2pred, pred2gt = eval.assign_instances_for_scan(test_scene_name, pred_info, gt_file)
-
-                    matches[test_scene_name] = {}
-                    matches[test_scene_name]['gt'] = gt2pred
-                    matches[test_scene_name]['pred'] = pred2gt
-                
-                    if cfg.split == 'val':
-                        matches[test_scene_name]['seg_gt'] = batch['labels']
-                        matches[test_scene_name]['seg_pred'] = semantic_pred
-                # break
-    
-
-            # save files
-            if cfg.save_semantic:
-                os.makedirs(os.path.join(result_dir, 'semantic'), exist_ok=True)
-                semantic_np = semantic_pred.cpu().numpy()
-                np.save(os.path.join(result_dir, 'semantic', test_scene_name + '.npy'), semantic_np)
-
-            if cfg.save_pt_offsets:
-                os.makedirs(os.path.join(result_dir, 'coords_offsets'), exist_ok=True)
-                pt_offsets_np = pt_offsets.cpu().numpy()
-                coords_np = batch['locs_float'].numpy()
-                coords_offsets = np.concatenate((coords_np, pt_offsets_np), 1)   # (N, 6)
-                np.save(os.path.join(result_dir, 'coords_offsets', test_scene_name + '.npy'), coords_offsets)
-
-            if(epoch > cfg.prepare_epochs and cfg.save_instance):
-                f = open(os.path.join(result_dir, test_scene_name + '.txt'), 'w')
-                for proposal_id in range(nclusters):
-                    clusters_i = clusters[proposal_id].cpu().numpy()  # (N)
-                    semantic_label = cluster_semantic_id[proposal_id]
-                    score = cluster_scores[proposal_id]
-                    f.write('predicted_masks/{}_{:03d}.txt {} {:.4f}'.format( \
-                        test_scene_name, proposal_id, semantic_label, score))
-                    if proposal_id < nclusters - 1:
-                        f.write('\n')
-                    np.savetxt(os.path.join(result_dir, 'predicted_masks', test_scene_name + '_%03d.txt' % (proposal_id)), clusters_i, fmt='%d')
-                f.close()
-
-
-            logger.info("instance iter: {}/{} point_num: {} ncluster: {} inference time: {:.2f}s".format( \
-                batch['id'][0] + 1, len(dataset.test_files), N, nclusters, end1))
-            total_end1 += end1
-            # import pdb; pdb.set_trace()
-            # break
-
-        # evaluation
-        if cfg.eval:
-            ap_scores = eval.evaluate_matches(matches)
-            avgs = eval.compute_averages(ap_scores)
-            eval.print_results(avgs)
-
-        logger.info("whole set inference time: {:.2f}s, latency per frame: {:.2f}ms".format(total_end1, total_end1 / len(dataloader) * 1000))
-
-        # evaluate semantic segmantation accuracy and mIoU
-        if cfg.split == 'val':
-            seg_accuracy = evaluate_semantic_segmantation_accuracy(matches)
-            logger.info("semantic_segmantation_accuracy: {:.4f}".format(seg_accuracy))
-            miou = evaluate_semantic_segmantation_miou(matches)
-            logger.info("semantic_segmantation_mIoU: {:.4f}".format(miou))

 def evaluate_semantic_segmantation_accuracy(matches):
    seg_gt_list = []
@ -254,6 +33,7 @@ def evaluate_semantic_segmantation_accuracy(matches):
    seg_accuracy = correct.float() / whole.float()
    return seg_accuracy

+
 def evaluate_semantic_segmantation_miou(matches):
    seg_gt_list = []
    seg_pred_list = []
@ -269,7 +49,7 @@ def evaluate_semantic_segmantation_miou(matches):
    iou_list = []
    for _index in seg_gt_all.unique():
        if _index != -100:
-            intersection = ((seg_gt_all == _index) &  (seg_pred_all == _index)).sum()
+            intersection = ((seg_gt_all == _index) & (seg_pred_all == _index)).sum()
            union = ((seg_gt_all == _index) | (seg_pred_all == _index)).sum()
            iou = intersection.float() / union
            iou_list.append(iou)
@ -277,70 +57,33 @@ def evaluate_semantic_segmantation_miou(matches):
    miou = iou_tensor.mean()
    return miou

-def non_max_suppression(ious, scores, threshold):
-    ixs = scores.argsort()[::-1]
-    pick = []
-    while len(ixs) > 0:
-        i = ixs[0]
-        pick.append(i)
-        iou = ious[i, ixs[1:]]
-        remove_ixs = np.where(iou > threshold)[0] + 1
-        ixs = np.delete(ixs, remove_ixs)
-        ixs = np.delete(ixs, 0)
-    return np.array(pick, dtype=np.int32)
-

 if __name__ == '__main__':
-
    test_seed = 567
    random.seed(test_seed)
    np.random.seed(test_seed)
    torch.manual_seed(test_seed)
    torch.cuda.manual_seed_all(test_seed)

-
    args = get_args()
    cfg = Munch.fromDict(yaml.safe_load(open(args.config, 'r')))
    torch.backends.cudnn.enabled = False
-    # init()
-
-    # exp_name = cfg.config.split('/')[-1][:-5]
-    # model_name = exp_name.split('_')[0]
-    # data_name = exp_name.split('_')[-1]
-
-    # logger.info('=> creating model ...')
-    # logger.info('Classes: {}'.format(cfg.classes))

    model = SoftGroup(**cfg.model)
-    logger.info(f'Load state dict from {args.checkpoint}')
+    print(f'Load state dict from {args.checkpoint}')
    model = utils.load_checkpoint(model, args.checkpoint)
    model.cuda()

-    # load model
-    # utils.checkpoint_restore(cfg, model, None, cfg.exp_path, cfg.config.split('/')[-1][:-5], 
-    #     use_cuda, cfg.test_epoch, dist=False, f=cfg.pretrain)      
-    # resume from the latest epoch, or specify the epoch to restore
-
-    # evaluate
-    logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>')
-
    from data.scannetv2_inst import Dataset
    dataset = Dataset(**cfg.data.test)
    dataset.valLoader()
    dataloader = dataset.val_data_loader
-    total = 0
    all_preds, all_gts = [], []
-
    with torch.no_grad():
        model = model.eval()
-
-        total_end1 = 0.
-        matches = {}
-        for i, batch in enumerate(dataloader):
-            print(i)
+        for i, batch in tqdm(enumerate(dataloader), total=len(dataloader)):
            ret = model(batch)
            all_preds.append(ret['det_ins'])
            all_gts.append(ret['gt_ins'])
-        from evaluation import ScanNetEval
-        scannet_eval = ScanNetEval()
+        scannet_eval = ScanNetEval(dataset.CLASSES)
        scannet_eval.evaluate(all_preds, all_gts)
--- a/util/utils.py
+++ b/util/utils.py
@ -134,7 +134,7 @@ def checkpoint_save(model, optimizer, exp_path, exp_name, epoch, save_freq=16, u

 def load_checkpoint(model, checkpoint, strict=False):
    state_dict = torch.load(checkpoint)
-    model.load_state_dict(state_dict['net'], strict=False)
+    model.load_state_dict(state_dict['net'], strict=strict)
    return model

 def load_model_param(model, pretrained_dict, prefix=""):