From 0a17ca2cd149fa1931d6fcf3699030ffe3fd04d7 Mon Sep 17 00:00:00 2001 From: Thang Vu Date: Fri, 8 Apr 2022 09:06:37 +0000 Subject: [PATCH] refactor bfs_cluster --- config/softgroup_fold5_default_s3dis.yaml | 3 + lib/softgroup_ops/functions/softgroup_ops.py | 13 +- .../src/bfs_cluster/bfs_cluster.cpp | 66 +++-- .../src/bfs_cluster/bfs_cluster.h | 7 +- lib/softgroup_ops/src/cuda.cu | 6 +- .../hierarchical_aggregation.cpp | 210 --------------- .../hierarchical_aggregation.cu | 254 ------------------ .../hierarchical_aggregation.h | 31 --- lib/softgroup_ops/src/softgroup_api.cpp | 4 - lib/softgroup_ops/src/softgroup_ops.cpp | 7 +- lib/softgroup_ops/src/softgroup_ops.h | 9 +- model/softgroup.py | 21 +- 12 files changed, 59 insertions(+), 572 deletions(-) delete mode 100644 lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.cpp delete mode 100644 lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.cu delete mode 100644 lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.h diff --git a/config/softgroup_fold5_default_s3dis.yaml b/config/softgroup_fold5_default_s3dis.yaml index 574f6d5..4388845 100644 --- a/config/softgroup_fold5_default_s3dis.yaml +++ b/config/softgroup_fold5_default_s3dis.yaml @@ -12,10 +12,13 @@ model: mean_active: 300 class_numpoint_mean: [1823, 7457, 6189, 7424, 34229, 1724, 5439, 6016, 39796, 5279, 5092, 12210, 10225] + npoint_thr: 0.05 # absolute if class_numpoint == -1, relative if class_numpoint != -1 + ignore_classes: [0, 1] instance_voxel_cfg: scale: 50 spatial_shape: 20 train_cfg: + max_proposal_num: 200 pos_iou_thr: 0.5 test_cfg: x4_split: True diff --git a/lib/softgroup_ops/functions/softgroup_ops.py b/lib/softgroup_ops/functions/softgroup_ops.py index cc612eb..174f3cc 100644 --- a/lib/softgroup_ops/functions/softgroup_ops.py +++ b/lib/softgroup_ops/functions/softgroup_ops.py @@ -19,7 +19,6 @@ class HierarchicalAggregation(Function): ''' N = start_len.size(0) - assert cluster_numpoint_mean.is_contiguous() assert semantic_label.is_contiguous() assert coord_shift.is_contiguous() assert ball_query_idxs.is_contiguous() @@ -329,10 +328,9 @@ ballquery_batch_p = BallQueryBatchP.apply class BFSCluster(Function): @staticmethod - def forward(ctx, semantic_label, ball_query_idxs, start_len, threshold): + def forward(ctx, cluster_numpoint_mean, ball_query_idxs, start_len, threshold, class_id): ''' :param ctx: - :param semantic_label: (N), int :param ball_query_idxs: (nActive), int :param start_len: (N, 2), int :return: cluster_idxs: int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for corresponding point idxs in N @@ -340,15 +338,14 @@ class BFSCluster(Function): ''' N = start_len.size(0) - - assert semantic_label.is_contiguous() + assert cluster_numpoint_mean.is_contiguous() assert ball_query_idxs.is_contiguous() assert start_len.is_contiguous() - cluster_idxs = semantic_label.new() - cluster_offsets = semantic_label.new() + cluster_idxs = ball_query_idxs.new() + cluster_offsets = ball_query_idxs.new() - SOFTGROUP_OP.bfs_cluster(semantic_label, ball_query_idxs, start_len, cluster_idxs, cluster_offsets, N, threshold) + SOFTGROUP_OP.bfs_cluster(cluster_numpoint_mean, ball_query_idxs, start_len, cluster_idxs, cluster_offsets, N, threshold, class_id) return cluster_idxs, cluster_offsets diff --git a/lib/softgroup_ops/src/bfs_cluster/bfs_cluster.cpp b/lib/softgroup_ops/src/bfs_cluster/bfs_cluster.cpp index f9f0421..dab9eca 100644 --- a/lib/softgroup_ops/src/bfs_cluster/bfs_cluster.cpp +++ b/lib/softgroup_ops/src/bfs_cluster/bfs_cluster.cpp @@ -2,12 +2,13 @@ Ball Query with BatchIdx & Clustering Algorithm Written by Li Jiang All Rights Reserved 2020. + +Modified by Thang Vu - Remove semantic label in clustering */ #include "bfs_cluster.h" -/* ================================== ballquery_batch_p - * ================================== */ +/* =================== ballquery_batch_p================================= */ // input xyz: (n, 3) float // input batch_idxs: (n) int // input batch_offsets: (B+1) int, batch_offsets[-1] @@ -29,10 +30,8 @@ int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor, return cumsum; } -/* ================================== bfs_cluster - * ================================== */ -ConnectedComponent find_cc(Int idx, int *semantic_label, Int *ball_query_idxs, - int *start_len, int *visited) { +ConnectedComponent find_cc(Int idx, Int *ball_query_idxs, int *start_len, + int *visited) { ConnectedComponent cc; cc.addPoint(idx); visited[idx] = 1; @@ -46,47 +45,46 @@ ConnectedComponent find_cc(Int idx, int *semantic_label, Int *ball_query_idxs, Q.pop(); int start = start_len[cur * 2]; int len = start_len[cur * 2 + 1]; - int label_cur = semantic_label[cur]; for (Int i = start; i < start + len; i++) { Int idx_i = ball_query_idxs[i]; - if (semantic_label[idx_i] != label_cur) - continue; if (visited[idx_i] == 1) continue; - cc.addPoint(idx_i); visited[idx_i] = 1; - Q.push(idx_i); } } return cc; } -// input: semantic_label, int, N -// input: ball_query_idxs, Int, (nActive) -// input: start_len, int, (N, 2) -// output: clusters, CCs -int get_clusters(int *semantic_label, Int *ball_query_idxs, int *start_len, - const Int nPoint, int threshold, - ConnectedComponents &clusters) { - int visited[nPoint] = {0}; - +int get_clusters(float *class_numpoint_mean, int *ball_query_idxs, + int *start_len, const int nPoint, float threshold, + ConnectedComponents &clusters, const int class_id) { + int *visited = new int[nPoint]{0}; + float _class_numpoint_mean, thr; int sumNPoint = 0; - for (Int i = 0; i < nPoint; i++) { + + for (int i = 0; i < nPoint; i++) { if (visited[i] == 0) { - ConnectedComponent CC = - find_cc(i, semantic_label, ball_query_idxs, start_len, visited); - if ((int)CC.pt_idxs.size() >= threshold) { + ConnectedComponent CC = find_cc(i, ball_query_idxs, start_len, visited); + _class_numpoint_mean = class_numpoint_mean[class_id]; + + // if _class_num_point_mean is not defined (-1) directly use threshold + if (_class_numpoint_mean == -1) { + thr = threshold; + } else { + thr = threshold * _class_numpoint_mean; + } + if ((int)CC.pt_idxs.size() >= thr) { clusters.push_back(CC); sumNPoint += (int)CC.pt_idxs.size(); } } } - return sumNPoint; } +// convert from ConnectedComponents to (idxs, offsets) representation void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs, int *cluster_offsets) { for (int i = 0; i < (int)CCs.size(); i++) { @@ -99,33 +97,29 @@ void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs, } } -// input: semantic_label, int, N +// input: class_numpoint_mean_tensor // input: ball_query_idxs, int, (nActive) // input: start_len, int, (N, 2) // output: cluster_idxs, int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for // corresponding point idxs in N // output: cluster_offsets, int (nCluster + 1) -void bfs_cluster(at::Tensor semantic_label_tensor, +void bfs_cluster(at::Tensor class_numpoint_mean_tensor, at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor, at::Tensor cluster_idxs_tensor, at::Tensor cluster_offsets_tensor, const int N, - int threshold) { - int *semantic_label = semantic_label_tensor.data(); + float threshold, const int class_id) { + float *class_numpoint_mean = class_numpoint_mean_tensor.data(); Int *ball_query_idxs = ball_query_idxs_tensor.data(); int *start_len = start_len_tensor.data(); - ConnectedComponents CCs; - int sumNPoint = get_clusters(semantic_label, ball_query_idxs, start_len, N, - threshold, CCs); - + int sumNPoint = get_clusters(class_numpoint_mean, ball_query_idxs, start_len, + N, threshold, CCs, class_id); int nCluster = (int)CCs.size(); cluster_idxs_tensor.resize_({sumNPoint, 2}); cluster_offsets_tensor.resize_({nCluster + 1}); cluster_idxs_tensor.zero_(); cluster_offsets_tensor.zero_(); - int *cluster_idxs = cluster_idxs_tensor.data(); int *cluster_offsets = cluster_offsets_tensor.data(); - fill_cluster_idxs_(CCs, cluster_idxs, cluster_offsets); -} \ No newline at end of file +} diff --git a/lib/softgroup_ops/src/bfs_cluster/bfs_cluster.h b/lib/softgroup_ops/src/bfs_cluster/bfs_cluster.h index 4d7087c..37ca6b7 100644 --- a/lib/softgroup_ops/src/bfs_cluster/bfs_cluster.h +++ b/lib/softgroup_ops/src/bfs_cluster/bfs_cluster.h @@ -21,9 +21,10 @@ int ballquery_batch_p_cuda(int n, int meanActive, float radius, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream); -void bfs_cluster(at::Tensor semantic_label_tensor, +void bfs_cluster(at::Tensor class_numpoint_mean_tensor, at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor, at::Tensor cluster_idxs_tensor, - at::Tensor cluster_offsets_tensor, const int N, int threshold); + at::Tensor cluster_offsets_tensor, const int N, + float threshold, const int class_id); -#endif // BFS_CLUSTER_H \ No newline at end of file +#endif // BFS_CLUSTER_H diff --git a/lib/softgroup_ops/src/cuda.cu b/lib/softgroup_ops/src/cuda.cu index 63eeaf4..b611e88 100644 --- a/lib/softgroup_ops/src/cuda.cu +++ b/lib/softgroup_ops/src/cuda.cu @@ -1,10 +1,8 @@ #include "datatype/datatype.h" #include -#include "cal_iou_and_masklabel/cal_iou_and_masklabel.cu" -#include "hierarchical_aggregation/hierarchical_aggregation.cu" - #include "bfs_cluster/bfs_cluster.cu" +#include "cal_iou_and_masklabel/cal_iou_and_masklabel.cu" #include "get_iou/get_iou.cu" #include "roipool/roipool.cu" #include "sec_mean/sec_mean.cu" @@ -17,4 +15,4 @@ template void voxelize_fp_cuda(Int nOutputRows, Int maxActive, template void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, float *d_output_feats, - float *d_feats, Int *rules, bool average); \ No newline at end of file + float *d_feats, Int *rules, bool average); diff --git a/lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.cpp b/lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.cpp deleted file mode 100644 index 9c4af56..0000000 --- a/lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.cpp +++ /dev/null @@ -1,210 +0,0 @@ -#include "hierarchical_aggregation.h" -#include "time.h" - -/* ================================== hierarchical_aggregation - * ================================== */ - -// instance point num for each class, statistical data from the training set -float class_numpoint_mean_dict[20] = { - -1., -1., 3917., 12056., 2303., 8331., 3948., 3166., 5629., 11719., - 1003., 3317., 4912., 10221., 3889., 4136., 2120., 945., 3967., 2589.}; - -ConnectedComponent find_cc(int idx, int *semantic_label, float *coord_shift, - int *batch_idxs, int *ball_query_idxs, - int *start_len, int *visited) { - ConnectedComponent cc; - cc.addPoint(idx); - cc.accum_x += coord_shift[idx * 3 + 0]; - cc.accum_y += coord_shift[idx * 3 + 1]; - cc.accum_z += coord_shift[idx * 3 + 2]; - // cc.cls_label = semantic_label[idx]; // currently cc's label is the label of - // the start point, convert to float - cc.batch_idx = batch_idxs[idx]; // record batch info - visited[idx] = 1; - std::queue Q; - assert(Q.empty()); - Q.push(idx); - while (!Q.empty()) { - int cur = Q.front(); - Q.pop(); - int start = start_len[cur * 2]; - int len = start_len[cur * 2 + 1]; - // int label_cur = semantic_label[cur]; - for (int i = start; i < start + len; i++) { - int idx_i = ball_query_idxs[i]; - // if (semantic_label[idx_i] != label_cur) continue; - if (visited[idx_i] == 1) - continue; - cc.addPoint(idx_i); - cc.accum_x += coord_shift[idx_i * 3 + 0]; - cc.accum_y += coord_shift[idx_i * 3 + 1]; - cc.accum_z += coord_shift[idx_i * 3 + 2]; - visited[idx_i] = 1; - Q.push(idx_i); - } - } - return cc; -} - -// split clusters into fragment and primary based on point num -void split_clusters(float *class_numpoint_mean, int *semantic_label, - float *coord_shift, int *batch_idxs, int *ball_query_idxs, - int *start_len, const int nPoint, - ConnectedComponents &CCs_fragment, - ConnectedComponents &CCs_kept, - ConnectedComponents &CCs_primary, int *sumNPoint_fragment, - int *sumNPoint_kept, int *sumNPoint_primary, - const int class_id) { - int *visited = new int[nPoint]{0}; - // int _class_idx; - float _class_numpoint_mean, low_thre, high_thre; - - for (int i = 0; i < nPoint; i++) { - if (visited[i] == 0) { - ConnectedComponent CC = - find_cc(i, semantic_label, coord_shift, batch_idxs, ball_query_idxs, - start_len, visited); - // _class_idx = CC.cls_label; - _class_numpoint_mean = class_numpoint_mean[class_id]; - - low_thre = 0.05 * _class_numpoint_mean; - high_thre = 0.3 * _class_numpoint_mean; - - if ((int)CC.pt_idxs.size() < high_thre) { - CCs_fragment.push_back(CC); - *sumNPoint_fragment += (int)CC.pt_idxs.size(); - - // keep fragments which are large enough to be independent instances - if ((int)CC.pt_idxs.size() >= low_thre && - (int)CC.pt_idxs.size() < high_thre) { - CCs_kept.push_back(CC); - *sumNPoint_kept += (int)CC.pt_idxs.size(); - } - } else { - CCs_primary.push_back(CC); - *sumNPoint_primary += (int)CC.pt_idxs.size(); - } - } - } - return; -} - -// convert from ConnectedComponents to (idxs, offsets) representation -void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs, - int *cluster_offsets, float *cluster_centers) { - for (int i = 0; i < (int)CCs.size(); i++) { - cluster_offsets[i + 1] = cluster_offsets[i] + (int)CCs[i].pt_idxs.size(); - - cluster_centers[i * 5 + 0] = CCs[i].accum_x / (float)CCs[i].pt_idxs.size(); - cluster_centers[i * 5 + 1] = CCs[i].accum_y / (float)CCs[i].pt_idxs.size(); - cluster_centers[i * 5 + 2] = CCs[i].accum_z / (float)CCs[i].pt_idxs.size(); - cluster_centers[i * 5 + 3] = (float)CCs[i].cls_label; - cluster_centers[i * 5 + 4] = (float)CCs[i].batch_idx; - - for (int j = 0; j < (int)CCs[i].pt_idxs.size(); j++) { - int idx = CCs[i].pt_idxs[j]; - cluster_idxs[(cluster_offsets[i] + j) * 2 + 0] = i; - cluster_idxs[(cluster_offsets[i] + j) * 2 + 1] = idx; - } - } -} - -// input: semantic_label, int, (N) -// input: coord_shift, float, (N, 3) -// input: batch_idxs, int, (N) -// input: ball_query_idxs, int, (nActive) -// input: start_len, int, (N, 2) -//(fragment_idxs, fragment_offsets, fragment_centers) for fragment clusters -//(cluster_idxs_kept_tensor, cluster_offsets_kept_tensor, -//cluster_centers_kept_tensor) for keeping some fragments -//(primary_idxs_tensor, primary_offsets, primary_centers) for primary clusters -//(primary_idxs_post_tensor, primary_offsets_post_tensor) for aggregated -//clusters -void hierarchical_aggregation( - at::Tensor class_numpoint_mean_tensor, at::Tensor semantic_label_tensor, - at::Tensor coord_shift_tensor, at::Tensor batch_idxs_tensor, - at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor, - at::Tensor fragment_idxs_tensor, at::Tensor fragment_offsets_tensor, - at::Tensor fragment_centers_tensor, at::Tensor cluster_idxs_kept_tensor, - at::Tensor cluster_offsets_kept_tensor, - at::Tensor cluster_centers_kept_tensor, at::Tensor primary_idxs_tensor, - at::Tensor primary_offsets_tensor, at::Tensor primary_centers_tensor, - at::Tensor primary_idxs_post_tensor, at::Tensor primary_offsets_post_tensor, - const int N, const int training_mode_, const int using_set_aggr_, - const int class_id) { - float *class_numpoint_mean = class_numpoint_mean_tensor.data(); - int *semantic_label = semantic_label_tensor.data(); - float *coord_shift = coord_shift_tensor.data(); - int *batch_idxs = batch_idxs_tensor.data(); - int *ball_query_idxs = ball_query_idxs_tensor.data(); - int *start_len = start_len_tensor.data(); - - ConnectedComponents CCs_fragment; - ConnectedComponents CCs_kept; - ConnectedComponents CCs_primary; - - int sumNPoint_fragment = 0, sumNPoint_kept = 0, sumNPoint_primary = 0; - split_clusters(class_numpoint_mean, semantic_label, coord_shift, batch_idxs, - ball_query_idxs, start_len, N, CCs_fragment, CCs_kept, - CCs_primary, &sumNPoint_fragment, &sumNPoint_kept, - &sumNPoint_primary, class_id); - - cluster_idxs_kept_tensor.resize_({sumNPoint_kept, 2}); - cluster_offsets_kept_tensor.resize_({(int)CCs_kept.size() + 1}); - cluster_centers_kept_tensor.resize_({(int)CCs_kept.size(), 5}); - cluster_idxs_kept_tensor.zero_(); - cluster_offsets_kept_tensor.zero_(); - cluster_centers_kept_tensor.zero_(); - int *cluster_idxs_kept = cluster_idxs_kept_tensor.data(); - int *cluster_offsets_kept = cluster_offsets_kept_tensor.data(); - float *cluster_centers_kept = cluster_centers_kept_tensor.data(); - fill_cluster_idxs_(CCs_kept, cluster_idxs_kept, cluster_offsets_kept, - cluster_centers_kept); - - primary_idxs_tensor.resize_({sumNPoint_primary, 2}); - primary_offsets_tensor.resize_({(int)CCs_primary.size() + 1}); - primary_centers_tensor.resize_({(int)CCs_primary.size(), 5}); - primary_idxs_tensor.zero_(); - primary_offsets_tensor.zero_(); - primary_centers_tensor.zero_(); - int *primary_idxs = primary_idxs_tensor.data(); - int *primary_offsets = primary_offsets_tensor.data(); - float *primary_centers = primary_centers_tensor.data(); - fill_cluster_idxs_(CCs_primary, primary_idxs, primary_offsets, - primary_centers); - - if (using_set_aggr_ == 0) { // only point aggr - return; - } - - fragment_idxs_tensor.resize_({sumNPoint_fragment, 2}); - fragment_offsets_tensor.resize_({(int)CCs_fragment.size() + 1}); - fragment_centers_tensor.resize_( - {(int)CCs_fragment.size(), - 5}); //[:, -2] for cls_label, [:, -1] for batch_idx - fragment_idxs_tensor.zero_(); - fragment_offsets_tensor.zero_(); - fragment_centers_tensor.zero_(); - int *fragment_idxs = fragment_idxs_tensor.data(); - int *fragment_offsets = fragment_offsets_tensor.data(); - float *fragment_centers = fragment_centers_tensor.data(); - fill_cluster_idxs_(CCs_fragment, fragment_idxs, fragment_offsets, - fragment_centers); - - // prerare tensor for storing post-primary - primary_idxs_post_tensor.resize_( - {sumNPoint_fragment + sumNPoint_primary, - 2}); // never overflow, but need to cut off tails - primary_offsets_post_tensor.resize_({(int)CCs_primary.size() + 1}); - primary_idxs_post_tensor.zero_(); - primary_offsets_post_tensor.zero_(); - int *primary_idxs_post = primary_idxs_post_tensor.data(); - int *primary_offsets_post = primary_offsets_post_tensor.data(); - - // set aggr - hierarchical_aggregation_cuda( - sumNPoint_fragment, (int)CCs_fragment.size(), fragment_idxs, - fragment_offsets, fragment_centers, sumNPoint_primary, - (int)CCs_primary.size(), primary_idxs, primary_offsets, primary_centers, - primary_idxs_post, primary_offsets_post); -} diff --git a/lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.cu b/lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.cu deleted file mode 100644 index b5069c2..0000000 --- a/lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.cu +++ /dev/null @@ -1,254 +0,0 @@ -#include "hierarchical_aggregation.h" -#include -#include -#include -#include -#include - -#define MAX_PRIMARY_NUM 1024 -#define MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM 1024 -#define INFINITY_DIS_SQUARE 10000 -#define MAX_PER_PRIMARY_ABSORB_POINT_NUM 8192 -#define MAX_THREADS_PER_BLOCK 512 -#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) - -// input: cuda_fragment_centers (fragment_num * 5,), 5 for (x, y, z, cls_label, -// batch_idx) -// input: cuda_primary_centers (primary_num * 5,), 5 for (x, y, z, cls_label, -// batch_idx) -// input: ... -// output: cuda_primary_absorb_fragment_idx -// output: cuda_primary_absorb_fragment_cnt -__global__ void -fragment_find_primary_(int primary_num, int *cuda_primary_offsets, - float *cuda_primary_centers, int fragment_num, - int *cuda_fragment_offsets, float *cuda_fragment_centers, - int *cuda_primary_absorb_fragment_idx, - int *cuda_primary_absorb_fragment_cnt) { - - int fragment_idx = blockIdx.x * blockDim.x + threadIdx.x; - if (fragment_idx >= fragment_num) - return; - - // find the nearest primary for each fragment - float nearest_dis_square = INFINITY_DIS_SQUARE; - int nearest_idx = -1; // primary_idx - for (int i = 0; i < primary_num; i++) { - if (abs(cuda_primary_centers[i * 5 + 3] - - cuda_fragment_centers[fragment_idx * 5 + 3]) > - 0.1) { // judge same cls_label or not - continue; - } - if (abs(cuda_primary_centers[i * 5 + 4] - - cuda_fragment_centers[fragment_idx * 5 + 4]) > - 0.1) { // judge same batch_idx or not - continue; - } - float temp_dis_square = pow((cuda_primary_centers[i * 5 + 0] - - cuda_fragment_centers[fragment_idx * 5 + 0]), - 2) + - pow((cuda_primary_centers[i * 5 + 1] - - cuda_fragment_centers[fragment_idx * 5 + 1]), - 2) + - pow((cuda_primary_centers[i * 5 + 2] - - cuda_fragment_centers[fragment_idx * 5 + 2]), - 2); - if (temp_dis_square < nearest_dis_square) { - nearest_dis_square = temp_dis_square; - nearest_idx = i; - } - } - if (nearest_idx == -1) - return; // fragment not belong to any primary - - // r_size - int primary_point_num = - cuda_primary_offsets[nearest_idx + 1] - cuda_primary_offsets[nearest_idx]; - float r_size = 0.01 * sqrt(float(primary_point_num)); - - // r_cls - // instance radius for each class, statistical data from the training set - float class_radius_mean[20] = {-1., - -1., - 0.7047687683952325, - 1.1732690381942337, - 0.39644035821116036, - 1.011516629020215, - 0.7260155292902369, - 0.8674973999335017, - 0.8374931435447094, - 1.0454153869133096, - 0.32879464797430913, - 1.1954566226966346, - 0.8628817944400078, - 1.0416287916782507, - 0.6602697958671507, - 0.8541363897836871, - 0.38055290598206537, - 0.3011878752684007, - 0.7420871812436316, - 0.4474268644407741}; - int _class_idx = (int)cuda_fragment_centers[fragment_idx * 5 + 3]; - float r_cls = class_radius_mean[_class_idx] * 1.; - - // r_set - float r_set = max(r_size, r_cls); - - // judge - if (nearest_dis_square < r_set * r_set) { - int _offect = atomicAdd(cuda_primary_absorb_fragment_cnt + nearest_idx, 1); - if (_offect < MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM) - cuda_primary_absorb_fragment_idx[nearest_idx * - MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM + - _offect] = fragment_idx; - else { - ; - } - } -} - -// input: ... -// output: cuda_concat_idxs -// output: cuda_concat_point_num, -__global__ void -concat_fragments_(int *cuda_fragment_idxs, int *cuda_fragment_offsets, - int *cuda_primary_idxs, int *cuda_primary_offsets, - int *cuda_primary_absorb_fragment_idx, - int *cuda_primary_absorb_fragment_cnt, int *cuda_concat_idxs, - int *cuda_concat_point_num, int primary_num) { - - int primary_idx = blockIdx.x; - if (primary_idx >= primary_num) - return; - - int _accu_offset = 0; // unit is point - for (int i = 0; i < cuda_primary_absorb_fragment_cnt[primary_idx] && - i < MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM; - i++) { - int idx = cuda_primary_absorb_fragment_idx - [primary_idx * MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM + i]; - for (int j = cuda_fragment_offsets[idx]; j < cuda_fragment_offsets[idx + 1]; - j++) { - if (_accu_offset < MAX_PER_PRIMARY_ABSORB_POINT_NUM) { - cuda_concat_idxs[primary_idx * MAX_PER_PRIMARY_ABSORB_POINT_NUM * 2 + - _accu_offset * 2 + 0] = primary_idx; - cuda_concat_idxs[primary_idx * MAX_PER_PRIMARY_ABSORB_POINT_NUM * 2 + - _accu_offset * 2 + 1] = cuda_fragment_idxs[j * 2 + 1]; - _accu_offset++; - } else { - ; - } - } - } - cuda_concat_point_num[primary_idx] = _accu_offset; -} - -void hierarchical_aggregation_cuda( - int fragment_total_point_num, int fragment_num, int *fragment_idxs, - int *fragment_offsets, float *fragment_centers, int primary_total_point_num, - int primary_num, int *primary_idxs, int *primary_offsets, - float *primary_centers, int *primary_idxs_post, int *primary_offsets_post) { - - if (primary_num == 0) { - return; - } - // on devices, allocate and copy memory - int *cuda_fragment_idxs; - int *cuda_fragment_offsets; - float *cuda_fragment_centers; - cudaMalloc((void **)&cuda_fragment_idxs, - fragment_total_point_num * 2 * sizeof(int) + - sizeof(int)); // prevent alloc 0 space - cudaMalloc((void **)&cuda_fragment_offsets, (fragment_num + 1) * sizeof(int)); - cudaMalloc((void **)&cuda_fragment_centers, - fragment_num * 5 * sizeof(float) + - sizeof(float)); // prevent alloc 0 space - cudaMemcpy(cuda_fragment_idxs, fragment_idxs, - fragment_total_point_num * 2 * sizeof(int), - cudaMemcpyHostToDevice); - cudaMemcpy(cuda_fragment_offsets, fragment_offsets, - (fragment_num + 1) * sizeof(int), cudaMemcpyHostToDevice); - cudaMemcpy(cuda_fragment_centers, fragment_centers, - fragment_num * 5 * sizeof(float), cudaMemcpyHostToDevice); - - int *cuda_primary_idxs; - int *cuda_primary_offsets; - float *cuda_primary_centers; - cudaMalloc((void **)&cuda_primary_idxs, - primary_total_point_num * 2 * sizeof(int) + - sizeof(int)); // prevent alloc 0 space - cudaMalloc((void **)&cuda_primary_offsets, (primary_num + 1) * sizeof(int)); - cudaMalloc((void **)&cuda_primary_centers, - primary_num * 5 * sizeof(float) + - sizeof(float)); // prevent alloc 0 space - cudaMemcpy(cuda_primary_idxs, primary_idxs, - primary_total_point_num * 2 * sizeof(int), cudaMemcpyHostToDevice); - cudaMemcpy(cuda_primary_offsets, primary_offsets, - (primary_num + 1) * sizeof(int), cudaMemcpyHostToDevice); - cudaMemcpy(cuda_primary_centers, primary_centers, - primary_num * 5 * sizeof(float), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - - // // for each fragment, find its primary - int *cuda_primary_absorb_fragment_idx; // array for saving the fragment idxs - int *cuda_primary_absorb_fragment_cnt; // array for saving the fragment nums - cudaMalloc((void **)&cuda_primary_absorb_fragment_idx, - primary_num * MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM * sizeof(int) + - sizeof(int)); - cudaMalloc((void **)&cuda_primary_absorb_fragment_cnt, - primary_num * sizeof(int) + sizeof(int)); - if (fragment_num != 0) - fragment_find_primary_<<>>( - primary_num, cuda_primary_offsets, cuda_primary_centers, fragment_num, - cuda_fragment_offsets, cuda_fragment_centers, - cuda_primary_absorb_fragment_idx, cuda_primary_absorb_fragment_cnt); - cudaDeviceSynchronize(); - - // concatenate fragments belonging to the same primary - int *cuda_concat_idxs; - int *cuda_concat_point_num; - cudaMalloc((void **)&cuda_concat_idxs, - primary_num * MAX_PER_PRIMARY_ABSORB_POINT_NUM * 2 * sizeof(int) + - sizeof(int)); - cudaMalloc((void **)&cuda_concat_point_num, - primary_num * sizeof(int) + sizeof(int)); - assert(primary_num <= MAX_PRIMARY_NUM); - concat_fragments_<<>>( - cuda_fragment_idxs, cuda_fragment_offsets, cuda_primary_idxs, - cuda_primary_offsets, cuda_primary_absorb_fragment_idx, - cuda_primary_absorb_fragment_cnt, cuda_concat_idxs, cuda_concat_point_num, - primary_num); - cudaDeviceSynchronize(); - - // merge primary instances and fragments - int *concat_point_num = new int[primary_num + 1]; // allocate on host - cudaMemcpy(concat_point_num, cuda_concat_point_num, primary_num * sizeof(int), - cudaMemcpyDeviceToHost); - int _accu_offset = 0; - for (int i = 0; i < primary_num; i++) { - // add primary instances - cudaMemcpy(primary_idxs_post + _accu_offset * 2, - cuda_primary_idxs + primary_offsets[i] * 2, - (primary_offsets[i + 1] - primary_offsets[i]) * 2 * sizeof(int), - cudaMemcpyDeviceToHost); - _accu_offset += (primary_offsets[i + 1] - primary_offsets[i]); - - // add absorbed fragments - cudaMemcpy(primary_idxs_post + _accu_offset * 2, - cuda_concat_idxs + i * MAX_PER_PRIMARY_ABSORB_POINT_NUM * 2, - concat_point_num[i] * 2 * sizeof(int), cudaMemcpyDeviceToHost); - _accu_offset += concat_point_num[i]; - - // writing offsets - primary_offsets_post[i + 1] = _accu_offset; - } - cudaDeviceSynchronize(); - - cudaError_t err; - err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); - exit(-1); - } -} \ No newline at end of file diff --git a/lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.h b/lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.h deleted file mode 100644 index 62d729c..0000000 --- a/lib/softgroup_ops/src/hierarchical_aggregation/hierarchical_aggregation.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -Hierarchichal Aggregation Algorithm -*/ - -#ifndef HIERARCHICAL_AGGREGATION_H -#define HIERARCHICAL_AGGREGATION_H -#include -#include -#include - -#include "../datatype/datatype.h" - -void hierarchical_aggregation( - at::Tensor class_numpoint_mean_tensor, at::Tensor semantic_label_tensor, - at::Tensor coord_shift_tensor, at::Tensor batch_idxs_tensor, - at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor, - at::Tensor fragment_idxs_tensor, at::Tensor fragment_offsets_tensor, - at::Tensor fragment_centers_tensor, at::Tensor cluster_idxs_kept_tensor, - at::Tensor cluster_offsets_kept_tensor, - at::Tensor cluster_centers_kept_tensor, at::Tensor primary_idxs_tensor, - at::Tensor primary_offsets_tensor, at::Tensor primary_centers_tensor, - at::Tensor primary_idxs_post_tensor, at::Tensor primary_offsets_post_tensor, - const int N, const int training_mode_, const int using_set_aggr_, - const int class_id); - -void hierarchical_aggregation_cuda( - int fragment_total_point_num, int fragment_num, int *fragment_idxs, - int *fragment_offsets, float *fragment_centers, int primary_total_point_num, - int primary_num, int *primary_idxs, int *primary_offsets, - float *primary_centers, int *primary_idxs_post, int *primary_offsets_post); -#endif // HIERARCHICAL_AGGREGATION_H diff --git a/lib/softgroup_ops/src/softgroup_api.cpp b/lib/softgroup_ops/src/softgroup_api.cpp index 1baf15b..5e05f7d 100644 --- a/lib/softgroup_ops/src/softgroup_api.cpp +++ b/lib/softgroup_ops/src/softgroup_api.cpp @@ -5,10 +5,6 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("hierarchical_aggregation", &hierarchical_aggregation, - "hierarchical_aggregation"); - // m.def("cal_iou_and_masklabel", &cal_iou_and_masklabel, - // "cal_iou_and_masklabel"); m.def("get_mask_iou_on_cluster", &get_mask_iou_on_cluster, "get_mask_iou_on_cluster"); m.def("get_mask_iou_on_pred", &get_mask_iou_on_pred, "get_mask_iou_on_pred"); diff --git a/lib/softgroup_ops/src/softgroup_ops.cpp b/lib/softgroup_ops/src/softgroup_ops.cpp index 91d2939..dc71a19 100644 --- a/lib/softgroup_ops/src/softgroup_ops.cpp +++ b/lib/softgroup_ops/src/softgroup_ops.cpp @@ -2,12 +2,9 @@ #include #include -#include "datatype/datatype.cpp" - -#include "cal_iou_and_masklabel/cal_iou_and_masklabel.cpp" -#include "hierarchical_aggregation/hierarchical_aggregation.cpp" - #include "bfs_cluster/bfs_cluster.cpp" +#include "cal_iou_and_masklabel/cal_iou_and_masklabel.cpp" +#include "datatype/datatype.cpp" #include "get_iou/get_iou.cpp" #include "roipool/roipool.cpp" #include "sec_mean/sec_mean.cpp" diff --git a/lib/softgroup_ops/src/softgroup_ops.h b/lib/softgroup_ops/src/softgroup_ops.h index af1ca76..0cd0349 100644 --- a/lib/softgroup_ops/src/softgroup_ops.h +++ b/lib/softgroup_ops/src/softgroup_ops.h @@ -1,11 +1,8 @@ #ifndef HAIS_H #define HAIS_H -#include "datatype/datatype.h" - -#include "cal_iou_and_masklabel/cal_iou_and_masklabel.h" -#include "hierarchical_aggregation/hierarchical_aggregation.h" - #include "bfs_cluster/bfs_cluster.h" +#include "cal_iou_and_masklabel/cal_iou_and_masklabel.h" +#include "datatype/datatype.h" #include "get_iou/get_iou.h" #include "roipool/roipool.h" #include "sec_mean/sec_mean.h" @@ -37,4 +34,4 @@ void point_recover_bp_feat(/* cuda float N*C */ at::Tensor d_output_feats, /* cuda Int M*(maxActive+1) */ at::Tensor idx_map, Int nActive, Int maxActive, Int nPlane); -#endif // HAIS_H \ No newline at end of file +#endif // HAIS_H diff --git a/model/softgroup.py b/model/softgroup.py index 641faf9..4766d61 100644 --- a/model/softgroup.py +++ b/model/softgroup.py @@ -134,6 +134,10 @@ class SoftGroup(nn.Module): proposals_idx, proposals_offset = self.forward_grouping(semantic_scores, pt_offsets, batch_idxs, coords_float, self.grouping_cfg) + if proposals_offset.shape[0] > self.train_cfg.max_proposal_num: + proposals_offset = proposals_offset[:self.train_cfg.max_proposal_num + 1] + proposals_idx = proposals_idx[:proposals_offset[-1]] + assert proposals_idx.shape[0] == proposals_offset[-1] instance_batch_idxs, cls_scores, iou_scores, mask_scores = self.forward_instance( proposals_idx, proposals_offset, output_feats, coords_float) instance_loss = self.instance_loss(cls_scores, mask_scores, iou_scores, proposals_idx, @@ -303,32 +307,27 @@ class SoftGroup(nn.Module): proposals_offset_list = [] batch_size = batch_idxs.max() + 1 semantic_scores = semantic_scores.softmax(dim=-1) - semantic_preds = semantic_scores.max(1)[1] # TODO remove this radius = self.grouping_cfg.radius mean_active = self.grouping_cfg.mean_active + npoint_thr = self.grouping_cfg.npoint_thr class_numpoint_mean = torch.tensor( self.grouping_cfg.class_numpoint_mean, dtype=torch.float32) - training_mode = None # TODO remove this for class_id in range(self.semantic_classes): - # ignore "floor" and "wall" - if class_id < 2: + if class_id in self.grouping_cfg.ignore_classes: continue scores = semantic_scores[:, class_id].contiguous() object_idxs = (scores > self.grouping_cfg.score_thr).nonzero().view(-1) - if object_idxs.size(0) < 100: # TODO + if object_idxs.size(0) < self.test_cfg.min_npoint: continue batch_idxs_ = batch_idxs[object_idxs] batch_offsets_ = utils.get_batch_offsets(batch_idxs_, batch_size) coords_ = coords_float[object_idxs] - pt_offsets_ = pt_offsets[object_idxs] # (N_fg, 3), float32 - semantic_preds_cpu = semantic_preds[object_idxs].int().cpu() + pt_offsets_ = pt_offsets[object_idxs] idx, start_len = softgroup_ops.ballquery_batch_p(coords_ + pt_offsets_, batch_idxs_, batch_offsets_, radius, mean_active) - using_set_aggr = False # TODO refactor this - proposals_idx, proposals_offset = softgroup_ops.hierarchical_aggregation( - class_numpoint_mean, semantic_preds_cpu, (coords_ + pt_offsets_).cpu(), idx.cpu(), - start_len.cpu(), batch_idxs_.cpu(), training_mode, using_set_aggr, class_id) + proposals_idx, proposals_offset = softgroup_ops.bfs_cluster( + class_numpoint_mean, idx.cpu(), start_len.cpu(), npoint_thr, class_id) proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int() # merge proposals