diff --git a/config/softgroup_fold5_backbone_s3dis.yaml b/config/softgroup_fold5_backbone_s3dis.yaml index beea456..79fe6ff 100644 --- a/config/softgroup_fold5_backbone_s3dis.yaml +++ b/config/softgroup_fold5_backbone_s3dis.yaml @@ -1,81 +1,61 @@ -GENERAL: - task: train # train, test - manual_seed: 123 - model_dir: model/softgroup/softgroup.py - dataset_dir: data/scannetv2_inst.py - -DATA: - data_root: dataset - dataset: s3dis - filename_suffix: _inst_nostuff.pth - train_areas: ['Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_6'] - test_area: 'Area_5' - train_repeats: 5 - +model: + channels: 32 + num_blocks: 7 semantic_classes: 13 - classes: 13 - ignore_label: -100 - - input_channel: 3 - scale: 50 # voxel_size = 1 / scale, scale 50 -> voxel_size 0.02m - batch_size: 4 - full_scale: [128, 512] - max_npoint: 250000 - mode: 4 # 4=mean - -STRUCTURE: - model_name: softgroup - width: 32 - block_residual: True - block_reps: 2 - use_coords: True + instance_classes: 13 + sem2ins_classes: [0, 1] semantic_only: True + ignore_label: -100 + grouping_cfg: + score_thr: 0.2 + radius: 0.04 + mean_active: 300 + class_numpoint_mean: [1823, 7457, 6189, 7424, 34229, 1724, 5439, + 6016, 39796, 5279, 5092, 12210, 10225] + instance_voxel_cfg: + scale: 50 + spatial_shape: 20 + test_cfg: + x4_split: True + cls_score_thr: 0.001 + mask_score_thr: -0.5 + min_npoint: 100 + fixed_modules: [] -TRAIN: - epochs: 30 - train_workers: 4 # data loader workers - optim: Adam # Adam or SGD +data: + train: + type: 's3dis' + data_root: 'dataset/s3dis/preprocess' + prefix: ['Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_6'] + suffix: '_inst_nostuff.pth' + repeat: 5 + training: True + voxel_cfg: + scale: 50 + spatial_shape: [128, 512] + max_npoint: 250000 + min_npoint: 5000 + test: + type: 's3dis' + data_root: 'dataset/s3dis/preprocess' + prefix: 'Area_5' + suffix: '_inst_nostuff.pth' + training: False + voxel_cfg: + scale: 50 + spatial_shape: [128, 512] + max_npoint: 250000 + min_npoint: 5000 + dataloader: + batch_size: 4 + num_workers: 4 + +optimizer: + type: 'Adam' lr: 0.001 - step_epoch: 0 - multiplier: 0.5 - momentum: 0.9 - weight_decay: 0.0001 - save_freq: 4 # also eval_freq - loss_weight: [1.0, 1.0, 1.0, 1.0, 1.0] # semantic_loss, offset_norm_loss, cls_loss, mask_loss, score_loss - fg_thresh: 1. - bg_thresh: 0. - score_scale: 50 # the minimal voxel size is 2cm - score_fullscale: 20 - score_mode: 4 # mean - pretrain_path: 'hais_ckpt.pth' - pretrain_module: ['input_conv', 'unet', 'output_layer'] - fix_module: [] - - point_aggr_radius: 0.04 - cluster_shift_meanActive: 300 - prepare_epochs: 10 - - max_proposal_num: 300 - max_clusters: 100 - - iou_thr: 0.5 - score_thr: 0.2 - -TEST: - split: val - test_epoch: 100 - test_workers: 1 - test_seed: 567 - - using_NMS: False - TEST_NMS_THRESH: 0.3 - TEST_SCORE_THRESH: -1 - TEST_NPOINT_THRESH: 100 - - eval: True - save_semantic: False - save_pt_offsets: False - save_instance: False - - test_mask_score_thre: -0.5 # bias fg << bg +epochs: 30 # actual epochs = 30 * repeat +step_epoch: 0 +save_freq: 2 +pretrain: 'hais_ckpt.pth' +work_dir: 'work_dirs/softgroup_s3dis_backbone' diff --git a/config/softgroup_fold5_default_s3dis.yaml b/config/softgroup_fold5_default_s3dis.yaml index 77383c1..574f6d5 100644 --- a/config/softgroup_fold5_default_s3dis.yaml +++ b/config/softgroup_fold5_default_s3dis.yaml @@ -15,12 +15,14 @@ model: instance_voxel_cfg: scale: 50 spatial_shape: 20 + train_cfg: + pos_iou_thr: 0.5 test_cfg: x4_split: True cls_score_thr: 0.001 mask_score_thr: -0.5 min_npoint: 100 - fixed_modules: [] + fixed_modules: ['input_conv', 'unet', 'output_layer', 'semantic_linear', 'offset_linear'] data: train: @@ -54,8 +56,8 @@ optimizer: type: 'Adam' lr: 0.001 -epochs: 512 -step_epoch: 100 -save_freq: 8 -pretrain: 'hais_ckpt.pth' -work_dir: 'work_dirs/softgroup_scannet' +epochs: 30 # actual epochs = 30 * repeat +step_epoch: 0 +save_freq: 2 +pretrain: 'exp/s3dis/softgroup/softgroup_fold5_s3dis/softgroup_fold5_s3dis-000000030.pth' +work_dir: 'work_dirs/softgroup_s3dis' diff --git a/model/softgroup.py b/model/softgroup.py index 69dff48..641faf9 100644 --- a/model/softgroup.py +++ b/model/softgroup.py @@ -3,6 +3,7 @@ import spconv import sys import torch import torch.nn as nn +import torch.nn.functional as F from util import utils from .blocks import ResidualBlock, UBlock @@ -24,6 +25,7 @@ class SoftGroup(nn.Module): ignore_label=-100, grouping_cfg=None, instance_voxel_cfg=None, + train_cfg=None, test_cfg=None, fixed_modules=[]): super().__init__() @@ -36,17 +38,9 @@ class SoftGroup(nn.Module): self.ignore_label = ignore_label self.grouping_cfg = grouping_cfg self.instance_voxel_cfg = instance_voxel_cfg + self.train_cfg = train_cfg self.test_cfg = test_cfg - # self.score_scale = cfg.score_scale - # self.score_spatial_shape = cfg.score_spatial_shape - # self.score_mode = cfg.score_mode - - # self.prepare_epochs = cfg.prepare_epochs - # self.pretrain_path = cfg.pretrain_path - # self.pretrain_module = cfg.pretrain_module - # self.fix_module = cfg.fix_module - block = ResidualBlock norm_fn = functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1) @@ -69,19 +63,22 @@ class SoftGroup(nn.Module): nn.Linear(channels, 3, bias=True)) # topdown refinement path - self.intra_ins_unet = UBlock([channels, 2 * channels], norm_fn, 2, block, indice_key_id=11) - self.intra_ins_outputlayer = spconv.SparseSequential(norm_fn(channels), nn.ReLU()) - self.cls_linear = nn.Linear(channels, instance_classes + 1) - self.mask_linear = nn.Sequential( - nn.Linear(channels, channels), nn.ReLU(), nn.Linear(channels, instance_classes + 1)) - self.score_linear = nn.Linear(channels, instance_classes + 1) + if not semantic_only: + self.intra_ins_unet = UBlock([channels, 2 * channels], + norm_fn, + 2, + block, + indice_key_id=11) + self.intra_ins_outputlayer = spconv.SparseSequential(norm_fn(channels), nn.ReLU()) + self.cls_linear = nn.Linear(channels, instance_classes + 1) + self.mask_linear = nn.Sequential( + nn.Linear(channels, channels), nn.ReLU(), nn.Linear(channels, instance_classes + 1)) + self.iou_score_linear = nn.Linear(channels, instance_classes + 1) - self.semantic_loss = nn.CrossEntropyLoss(ignore_index=ignore_label) - self.offset_loss = nn.L1Loss(reduction='sum') + nn.init.normal_(self.iou_score_linear.weight, 0, 0.01) + nn.init.constant_(self.iou_score_linear.bias, 0) self.apply(self.set_bn_init) - nn.init.normal_(self.score_linear.weight, 0, 0.01) - nn.init.constant_(self.score_linear.bias, 0) for mod in fixed_modules: mod = getattr(self, mod) @@ -96,6 +93,9 @@ class SoftGroup(nn.Module): m.weight.data.fill_(1.0) m.bias.data.fill_(0.0) + def init_weights(self): + pass + def forward(self, batch, return_loss=False): if return_loss: return self.forward_train(batch) @@ -111,9 +111,8 @@ class SoftGroup(nn.Module): feats = batch['feats'].cuda() semantic_labels = batch['labels'].cuda() instance_labels = batch['instance_labels'].cuda() - # instance_pointnum = batch['instance_pointnum'].cuda() - # instance_cls = batch['instance_cls'].cuda() - # batch_offsets = batch['offsets'].cuda() + instance_pointnum = batch['instance_pointnum'].cuda() + instance_cls = batch['instance_cls'].cuda() pt_offset_labels = batch['pt_offset_labels'].cuda() spatial_shape = batch['spatial_shape'] batch_size = batch['batch_size'] @@ -124,9 +123,25 @@ class SoftGroup(nn.Module): input = spconv.SparseConvTensor(voxel_feats, voxel_coords.int(), spatial_shape, batch_size) semantic_scores, pt_offsets, output_feats, coords_float = self.forward_backbone( input, v2p_map, coords_float) + + # point wise losses point_wise_loss = self.point_wise_loss(semantic_scores, pt_offsets, semantic_labels, instance_labels, pt_offset_labels) losses.update(point_wise_loss) + + # instance losses + if not self.semantic_only: + proposals_idx, proposals_offset = self.forward_grouping(semantic_scores, pt_offsets, + batch_idxs, coords_float, + self.grouping_cfg) + instance_batch_idxs, cls_scores, iou_scores, mask_scores = self.forward_instance( + proposals_idx, proposals_offset, output_feats, coords_float) + instance_loss = self.instance_loss(cls_scores, mask_scores, iou_scores, proposals_idx, + proposals_offset, instance_labels, instance_pointnum, + instance_cls, instance_batch_idxs) + losses.update(instance_loss) + + # parse loss loss = sum(v[0] for v in losses.values()) losses['loss'] = (loss, batch_idxs.size(0)) return loss, losses @@ -134,18 +149,74 @@ class SoftGroup(nn.Module): def point_wise_loss(self, semantic_scores, pt_offsets, semantic_labels, instance_labels, pt_offset_labels): losses = {} - semantic_loss = self.semantic_loss(semantic_scores, semantic_labels) + semantic_loss = F.cross_entropy( + semantic_scores, semantic_labels, ignore_index=self.ignore_label) losses['semantic_loss'] = (semantic_loss, semantic_scores.size(0)) pos_inds = instance_labels != self.ignore_label if pos_inds.sum() == 0: offset_loss = 0 * pt_offsets.sum() else: - offset_loss = self.offset_loss(pt_offsets[pos_inds], - pt_offset_labels[pos_inds]) / pos_inds.sum() + offset_loss = F.l1_loss( + pt_offsets[pos_inds], pt_offset_labels[pos_inds], reduction='sum') / pos_inds.sum() losses['offset_loss'] = (offset_loss, pos_inds.sum()) return losses + def instance_loss(self, cls_scores, mask_scores, iou_scores, proposals_idx, proposals_offset, + instance_labels, instance_pointnum, instance_cls, instance_batch_idxs): + losses = {} + proposals_idx = proposals_idx[:, 1].cuda() + proposals_offset = proposals_offset.cuda() + + # cal iou of clustered instance + ious_on_cluster = softgroup_ops.get_mask_iou_on_cluster(proposals_idx, proposals_offset, + instance_labels, instance_pointnum) + + # filter out background instances + fg_inds = (instance_cls != self.ignore_label) + fg_instance_cls = instance_cls[fg_inds] + fg_ious_on_cluster = ious_on_cluster[:, fg_inds] + + # overlap > thr on fg instances are positive samples + max_iou, gt_inds = fg_ious_on_cluster.max(1) + pos_inds = max_iou >= self.train_cfg.pos_iou_thr + pos_gt_inds = gt_inds[pos_inds] + + # compute cls loss. follow detection convention: 0 -> K - 1 are fg, K is bg + labels = fg_instance_cls.new_full((fg_ious_on_cluster.size(0), ), self.instance_classes) + labels[pos_inds] = fg_instance_cls[pos_gt_inds] + cls_loss = F.cross_entropy(cls_scores, labels) + losses['cls_loss'] = (cls_loss, labels.size(0)) + + # compute mask loss + mask_cls_label = labels[instance_batch_idxs.long()] + slice_inds = torch.arange( + 0, mask_cls_label.size(0), dtype=torch.long, device=mask_cls_label.device) + mask_scores_sigmoid_slice = mask_scores.sigmoid()[slice_inds, mask_cls_label] + mask_label = softgroup_ops.get_mask_label(proposals_idx, proposals_offset, instance_labels, + instance_cls, instance_pointnum, ious_on_cluster, + self.train_cfg.pos_iou_thr) + mask_label_weight = (mask_label != -1).float() + mask_label[mask_label == -1.] = 0.5 # any value is ok + mask_loss = F.binary_cross_entropy( + mask_scores_sigmoid_slice, mask_label, weight=mask_label_weight, reduction='sum') + mask_loss /= (mask_label_weight.sum() + 1) + losses['mask_loss'] = (mask_loss, mask_label_weight.sum()) + + # compute iou score loss + ious = softgroup_ops.get_mask_iou_on_pred(proposals_idx, proposals_offset, instance_labels, + instance_pointnum, + mask_scores_sigmoid_slice.detach()) + fg_ious = ious[:, fg_inds] + gt_ious, _ = fg_ious.max(1) + slice_inds = torch.arange(0, labels.size(0), dtype=torch.long, device=labels.device) + iou_score_weight = (labels < self.instance_classes).float() + iou_score_slice = iou_scores[slice_inds, labels] + iou_score_loss = F.mse_loss(iou_score_slice, gt_ious, reduction='none') + iou_score_loss = (iou_score_loss * iou_score_weight).sum() / (iou_score_weight.sum() + 1) + losses['iou_score_loss'] = (iou_score_loss, iou_score_weight.sum()) + return losses + def forward_test(self, batch): batch_idxs = batch['batch_idxs'].cuda() voxel_coords = batch['voxel_locs'].cuda() @@ -155,9 +226,6 @@ class SoftGroup(nn.Module): feats = batch['feats'].cuda() labels = batch['labels'].cuda() instance_labels = batch['instance_labels'].cuda() - # instance_pointnum = batch['instance_pointnum'].cuda() - # instance_cls = batch['instance_cls'].cuda() - # batch_offsets = batch['offsets'].cuda() spatial_shape = batch['spatial_shape'] batch_size = batch['batch_size'] @@ -280,22 +348,20 @@ class SoftGroup(nn.Module): input_feats, inp_map = self.clusters_voxelization(proposals_idx, proposals_offset, output_feats, coords_float, **self.instance_voxel_cfg) - - # predict instance scores - score = self.intra_ins_unet(input_feats) - score = self.intra_ins_outputlayer(score) + feats = self.intra_ins_unet(input_feats) + feats = self.intra_ins_outputlayer(feats) # predict mask scores - mask_scores = self.mask_linear(score.features) + mask_scores = self.mask_linear(feats.features) mask_scores = mask_scores[inp_map.long()] - scores_batch_idxs = score.indices[:, 0][inp_map.long()] + instance_batch_idxs = feats.indices[:, 0][inp_map.long()] - # predict instance scores - score_feats = self.global_pool(score) - cls_scores = self.cls_linear(score_feats) - iou_scores = self.score_linear(score_feats) + # predict instance cls and iou scores + feats = self.global_pool(feats) + cls_scores = self.cls_linear(feats) + iou_scores = self.iou_score_linear(feats) - return scores_batch_idxs, cls_scores, iou_scores, mask_scores + return instance_batch_idxs, cls_scores, iou_scores, mask_scores def get_instances(self, scan_id, proposals_idx, semantic_scores, cls_scores, iou_scores, mask_scores):