mirror of
https://github.com/botastic/SoftGroup.git
synced 2026-07-01 16:00:06 +00:00
refactor bfs_cluster
This commit is contained in:
@@ -12,10 +12,13 @@ model:
|
||||
mean_active: 300
|
||||
class_numpoint_mean: [1823, 7457, 6189, 7424, 34229, 1724, 5439,
|
||||
6016, 39796, 5279, 5092, 12210, 10225]
|
||||
npoint_thr: 0.05 # absolute if class_numpoint == -1, relative if class_numpoint != -1
|
||||
ignore_classes: [0, 1]
|
||||
instance_voxel_cfg:
|
||||
scale: 50
|
||||
spatial_shape: 20
|
||||
train_cfg:
|
||||
max_proposal_num: 200
|
||||
pos_iou_thr: 0.5
|
||||
test_cfg:
|
||||
x4_split: True
|
||||
|
||||
@@ -19,7 +19,6 @@ class HierarchicalAggregation(Function):
|
||||
'''
|
||||
N = start_len.size(0)
|
||||
|
||||
assert cluster_numpoint_mean.is_contiguous()
|
||||
assert semantic_label.is_contiguous()
|
||||
assert coord_shift.is_contiguous()
|
||||
assert ball_query_idxs.is_contiguous()
|
||||
@@ -329,10 +328,9 @@ ballquery_batch_p = BallQueryBatchP.apply
|
||||
|
||||
class BFSCluster(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, semantic_label, ball_query_idxs, start_len, threshold):
|
||||
def forward(ctx, cluster_numpoint_mean, ball_query_idxs, start_len, threshold, class_id):
|
||||
'''
|
||||
:param ctx:
|
||||
:param semantic_label: (N), int
|
||||
:param ball_query_idxs: (nActive), int
|
||||
:param start_len: (N, 2), int
|
||||
:return: cluster_idxs: int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for corresponding point idxs in N
|
||||
@@ -340,15 +338,14 @@ class BFSCluster(Function):
|
||||
'''
|
||||
|
||||
N = start_len.size(0)
|
||||
|
||||
assert semantic_label.is_contiguous()
|
||||
assert cluster_numpoint_mean.is_contiguous()
|
||||
assert ball_query_idxs.is_contiguous()
|
||||
assert start_len.is_contiguous()
|
||||
|
||||
cluster_idxs = semantic_label.new()
|
||||
cluster_offsets = semantic_label.new()
|
||||
cluster_idxs = ball_query_idxs.new()
|
||||
cluster_offsets = ball_query_idxs.new()
|
||||
|
||||
SOFTGROUP_OP.bfs_cluster(semantic_label, ball_query_idxs, start_len, cluster_idxs, cluster_offsets, N, threshold)
|
||||
SOFTGROUP_OP.bfs_cluster(cluster_numpoint_mean, ball_query_idxs, start_len, cluster_idxs, cluster_offsets, N, threshold, class_id)
|
||||
|
||||
return cluster_idxs, cluster_offsets
|
||||
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
Ball Query with BatchIdx & Clustering Algorithm
|
||||
Written by Li Jiang
|
||||
All Rights Reserved 2020.
|
||||
|
||||
Modified by Thang Vu - Remove semantic label in clustering
|
||||
*/
|
||||
|
||||
#include "bfs_cluster.h"
|
||||
|
||||
/* ================================== ballquery_batch_p
|
||||
* ================================== */
|
||||
/* =================== ballquery_batch_p================================= */
|
||||
// input xyz: (n, 3) float
|
||||
// input batch_idxs: (n) int
|
||||
// input batch_offsets: (B+1) int, batch_offsets[-1]
|
||||
@@ -29,10 +30,8 @@ int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor,
|
||||
return cumsum;
|
||||
}
|
||||
|
||||
/* ================================== bfs_cluster
|
||||
* ================================== */
|
||||
ConnectedComponent find_cc(Int idx, int *semantic_label, Int *ball_query_idxs,
|
||||
int *start_len, int *visited) {
|
||||
ConnectedComponent find_cc(Int idx, Int *ball_query_idxs, int *start_len,
|
||||
int *visited) {
|
||||
ConnectedComponent cc;
|
||||
cc.addPoint(idx);
|
||||
visited[idx] = 1;
|
||||
@@ -46,47 +45,46 @@ ConnectedComponent find_cc(Int idx, int *semantic_label, Int *ball_query_idxs,
|
||||
Q.pop();
|
||||
int start = start_len[cur * 2];
|
||||
int len = start_len[cur * 2 + 1];
|
||||
int label_cur = semantic_label[cur];
|
||||
for (Int i = start; i < start + len; i++) {
|
||||
Int idx_i = ball_query_idxs[i];
|
||||
if (semantic_label[idx_i] != label_cur)
|
||||
continue;
|
||||
if (visited[idx_i] == 1)
|
||||
continue;
|
||||
|
||||
cc.addPoint(idx_i);
|
||||
visited[idx_i] = 1;
|
||||
|
||||
Q.push(idx_i);
|
||||
}
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
||||
// input: semantic_label, int, N
|
||||
// input: ball_query_idxs, Int, (nActive)
|
||||
// input: start_len, int, (N, 2)
|
||||
// output: clusters, CCs
|
||||
int get_clusters(int *semantic_label, Int *ball_query_idxs, int *start_len,
|
||||
const Int nPoint, int threshold,
|
||||
ConnectedComponents &clusters) {
|
||||
int visited[nPoint] = {0};
|
||||
|
||||
int get_clusters(float *class_numpoint_mean, int *ball_query_idxs,
|
||||
int *start_len, const int nPoint, float threshold,
|
||||
ConnectedComponents &clusters, const int class_id) {
|
||||
int *visited = new int[nPoint]{0};
|
||||
float _class_numpoint_mean, thr;
|
||||
int sumNPoint = 0;
|
||||
for (Int i = 0; i < nPoint; i++) {
|
||||
|
||||
for (int i = 0; i < nPoint; i++) {
|
||||
if (visited[i] == 0) {
|
||||
ConnectedComponent CC =
|
||||
find_cc(i, semantic_label, ball_query_idxs, start_len, visited);
|
||||
if ((int)CC.pt_idxs.size() >= threshold) {
|
||||
ConnectedComponent CC = find_cc(i, ball_query_idxs, start_len, visited);
|
||||
_class_numpoint_mean = class_numpoint_mean[class_id];
|
||||
|
||||
// if _class_num_point_mean is not defined (-1) directly use threshold
|
||||
if (_class_numpoint_mean == -1) {
|
||||
thr = threshold;
|
||||
} else {
|
||||
thr = threshold * _class_numpoint_mean;
|
||||
}
|
||||
if ((int)CC.pt_idxs.size() >= thr) {
|
||||
clusters.push_back(CC);
|
||||
sumNPoint += (int)CC.pt_idxs.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sumNPoint;
|
||||
}
|
||||
|
||||
// convert from ConnectedComponents to (idxs, offsets) representation
|
||||
void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs,
|
||||
int *cluster_offsets) {
|
||||
for (int i = 0; i < (int)CCs.size(); i++) {
|
||||
@@ -99,33 +97,29 @@ void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs,
|
||||
}
|
||||
}
|
||||
|
||||
// input: semantic_label, int, N
|
||||
// input: class_numpoint_mean_tensor
|
||||
// input: ball_query_idxs, int, (nActive)
|
||||
// input: start_len, int, (N, 2)
|
||||
// output: cluster_idxs, int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for
|
||||
// corresponding point idxs in N
|
||||
// output: cluster_offsets, int (nCluster + 1)
|
||||
void bfs_cluster(at::Tensor semantic_label_tensor,
|
||||
void bfs_cluster(at::Tensor class_numpoint_mean_tensor,
|
||||
at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor,
|
||||
at::Tensor cluster_idxs_tensor,
|
||||
at::Tensor cluster_offsets_tensor, const int N,
|
||||
int threshold) {
|
||||
int *semantic_label = semantic_label_tensor.data<int>();
|
||||
float threshold, const int class_id) {
|
||||
float *class_numpoint_mean = class_numpoint_mean_tensor.data<float>();
|
||||
Int *ball_query_idxs = ball_query_idxs_tensor.data<Int>();
|
||||
int *start_len = start_len_tensor.data<int>();
|
||||
|
||||
ConnectedComponents CCs;
|
||||
int sumNPoint = get_clusters(semantic_label, ball_query_idxs, start_len, N,
|
||||
threshold, CCs);
|
||||
|
||||
int sumNPoint = get_clusters(class_numpoint_mean, ball_query_idxs, start_len,
|
||||
N, threshold, CCs, class_id);
|
||||
int nCluster = (int)CCs.size();
|
||||
cluster_idxs_tensor.resize_({sumNPoint, 2});
|
||||
cluster_offsets_tensor.resize_({nCluster + 1});
|
||||
cluster_idxs_tensor.zero_();
|
||||
cluster_offsets_tensor.zero_();
|
||||
|
||||
int *cluster_idxs = cluster_idxs_tensor.data<int>();
|
||||
int *cluster_offsets = cluster_offsets_tensor.data<int>();
|
||||
|
||||
fill_cluster_idxs_(CCs, cluster_idxs, cluster_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,9 +21,10 @@ int ballquery_batch_p_cuda(int n, int meanActive, float radius,
|
||||
const int *batch_offsets, int *idx, int *start_len,
|
||||
cudaStream_t stream);
|
||||
|
||||
void bfs_cluster(at::Tensor semantic_label_tensor,
|
||||
void bfs_cluster(at::Tensor class_numpoint_mean_tensor,
|
||||
at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor,
|
||||
at::Tensor cluster_idxs_tensor,
|
||||
at::Tensor cluster_offsets_tensor, const int N, int threshold);
|
||||
at::Tensor cluster_offsets_tensor, const int N,
|
||||
float threshold, const int class_id);
|
||||
|
||||
#endif // BFS_CLUSTER_H
|
||||
#endif // BFS_CLUSTER_H
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
#include "datatype/datatype.h"
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
#include "cal_iou_and_masklabel/cal_iou_and_masklabel.cu"
|
||||
#include "hierarchical_aggregation/hierarchical_aggregation.cu"
|
||||
|
||||
#include "bfs_cluster/bfs_cluster.cu"
|
||||
#include "cal_iou_and_masklabel/cal_iou_and_masklabel.cu"
|
||||
#include "get_iou/get_iou.cu"
|
||||
#include "roipool/roipool.cu"
|
||||
#include "sec_mean/sec_mean.cu"
|
||||
@@ -17,4 +15,4 @@ template void voxelize_fp_cuda<float>(Int nOutputRows, Int maxActive,
|
||||
|
||||
template void voxelize_bp_cuda<float>(Int nOutputRows, Int maxActive,
|
||||
Int nPlanes, float *d_output_feats,
|
||||
float *d_feats, Int *rules, bool average);
|
||||
float *d_feats, Int *rules, bool average);
|
||||
|
||||
@@ -1,210 +0,0 @@
|
||||
#include "hierarchical_aggregation.h"
|
||||
#include "time.h"
|
||||
|
||||
/* ================================== hierarchical_aggregation
|
||||
* ================================== */
|
||||
|
||||
// instance point num for each class, statistical data from the training set
|
||||
float class_numpoint_mean_dict[20] = {
|
||||
-1., -1., 3917., 12056., 2303., 8331., 3948., 3166., 5629., 11719.,
|
||||
1003., 3317., 4912., 10221., 3889., 4136., 2120., 945., 3967., 2589.};
|
||||
|
||||
ConnectedComponent find_cc(int idx, int *semantic_label, float *coord_shift,
|
||||
int *batch_idxs, int *ball_query_idxs,
|
||||
int *start_len, int *visited) {
|
||||
ConnectedComponent cc;
|
||||
cc.addPoint(idx);
|
||||
cc.accum_x += coord_shift[idx * 3 + 0];
|
||||
cc.accum_y += coord_shift[idx * 3 + 1];
|
||||
cc.accum_z += coord_shift[idx * 3 + 2];
|
||||
// cc.cls_label = semantic_label[idx]; // currently cc's label is the label of
|
||||
// the start point, convert to float
|
||||
cc.batch_idx = batch_idxs[idx]; // record batch info
|
||||
visited[idx] = 1;
|
||||
std::queue<int> Q;
|
||||
assert(Q.empty());
|
||||
Q.push(idx);
|
||||
while (!Q.empty()) {
|
||||
int cur = Q.front();
|
||||
Q.pop();
|
||||
int start = start_len[cur * 2];
|
||||
int len = start_len[cur * 2 + 1];
|
||||
// int label_cur = semantic_label[cur];
|
||||
for (int i = start; i < start + len; i++) {
|
||||
int idx_i = ball_query_idxs[i];
|
||||
// if (semantic_label[idx_i] != label_cur) continue;
|
||||
if (visited[idx_i] == 1)
|
||||
continue;
|
||||
cc.addPoint(idx_i);
|
||||
cc.accum_x += coord_shift[idx_i * 3 + 0];
|
||||
cc.accum_y += coord_shift[idx_i * 3 + 1];
|
||||
cc.accum_z += coord_shift[idx_i * 3 + 2];
|
||||
visited[idx_i] = 1;
|
||||
Q.push(idx_i);
|
||||
}
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
||||
// split clusters into fragment and primary based on point num
|
||||
void split_clusters(float *class_numpoint_mean, int *semantic_label,
|
||||
float *coord_shift, int *batch_idxs, int *ball_query_idxs,
|
||||
int *start_len, const int nPoint,
|
||||
ConnectedComponents &CCs_fragment,
|
||||
ConnectedComponents &CCs_kept,
|
||||
ConnectedComponents &CCs_primary, int *sumNPoint_fragment,
|
||||
int *sumNPoint_kept, int *sumNPoint_primary,
|
||||
const int class_id) {
|
||||
int *visited = new int[nPoint]{0};
|
||||
// int _class_idx;
|
||||
float _class_numpoint_mean, low_thre, high_thre;
|
||||
|
||||
for (int i = 0; i < nPoint; i++) {
|
||||
if (visited[i] == 0) {
|
||||
ConnectedComponent CC =
|
||||
find_cc(i, semantic_label, coord_shift, batch_idxs, ball_query_idxs,
|
||||
start_len, visited);
|
||||
// _class_idx = CC.cls_label;
|
||||
_class_numpoint_mean = class_numpoint_mean[class_id];
|
||||
|
||||
low_thre = 0.05 * _class_numpoint_mean;
|
||||
high_thre = 0.3 * _class_numpoint_mean;
|
||||
|
||||
if ((int)CC.pt_idxs.size() < high_thre) {
|
||||
CCs_fragment.push_back(CC);
|
||||
*sumNPoint_fragment += (int)CC.pt_idxs.size();
|
||||
|
||||
// keep fragments which are large enough to be independent instances
|
||||
if ((int)CC.pt_idxs.size() >= low_thre &&
|
||||
(int)CC.pt_idxs.size() < high_thre) {
|
||||
CCs_kept.push_back(CC);
|
||||
*sumNPoint_kept += (int)CC.pt_idxs.size();
|
||||
}
|
||||
} else {
|
||||
CCs_primary.push_back(CC);
|
||||
*sumNPoint_primary += (int)CC.pt_idxs.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// convert from ConnectedComponents to (idxs, offsets) representation
|
||||
void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs,
|
||||
int *cluster_offsets, float *cluster_centers) {
|
||||
for (int i = 0; i < (int)CCs.size(); i++) {
|
||||
cluster_offsets[i + 1] = cluster_offsets[i] + (int)CCs[i].pt_idxs.size();
|
||||
|
||||
cluster_centers[i * 5 + 0] = CCs[i].accum_x / (float)CCs[i].pt_idxs.size();
|
||||
cluster_centers[i * 5 + 1] = CCs[i].accum_y / (float)CCs[i].pt_idxs.size();
|
||||
cluster_centers[i * 5 + 2] = CCs[i].accum_z / (float)CCs[i].pt_idxs.size();
|
||||
cluster_centers[i * 5 + 3] = (float)CCs[i].cls_label;
|
||||
cluster_centers[i * 5 + 4] = (float)CCs[i].batch_idx;
|
||||
|
||||
for (int j = 0; j < (int)CCs[i].pt_idxs.size(); j++) {
|
||||
int idx = CCs[i].pt_idxs[j];
|
||||
cluster_idxs[(cluster_offsets[i] + j) * 2 + 0] = i;
|
||||
cluster_idxs[(cluster_offsets[i] + j) * 2 + 1] = idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// input: semantic_label, int, (N)
|
||||
// input: coord_shift, float, (N, 3)
|
||||
// input: batch_idxs, int, (N)
|
||||
// input: ball_query_idxs, int, (nActive)
|
||||
// input: start_len, int, (N, 2)
|
||||
//(fragment_idxs, fragment_offsets, fragment_centers) for fragment clusters
|
||||
//(cluster_idxs_kept_tensor, cluster_offsets_kept_tensor,
|
||||
//cluster_centers_kept_tensor) for keeping some fragments
|
||||
//(primary_idxs_tensor, primary_offsets, primary_centers) for primary clusters
|
||||
//(primary_idxs_post_tensor, primary_offsets_post_tensor) for aggregated
|
||||
//clusters
|
||||
void hierarchical_aggregation(
|
||||
at::Tensor class_numpoint_mean_tensor, at::Tensor semantic_label_tensor,
|
||||
at::Tensor coord_shift_tensor, at::Tensor batch_idxs_tensor,
|
||||
at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor,
|
||||
at::Tensor fragment_idxs_tensor, at::Tensor fragment_offsets_tensor,
|
||||
at::Tensor fragment_centers_tensor, at::Tensor cluster_idxs_kept_tensor,
|
||||
at::Tensor cluster_offsets_kept_tensor,
|
||||
at::Tensor cluster_centers_kept_tensor, at::Tensor primary_idxs_tensor,
|
||||
at::Tensor primary_offsets_tensor, at::Tensor primary_centers_tensor,
|
||||
at::Tensor primary_idxs_post_tensor, at::Tensor primary_offsets_post_tensor,
|
||||
const int N, const int training_mode_, const int using_set_aggr_,
|
||||
const int class_id) {
|
||||
float *class_numpoint_mean = class_numpoint_mean_tensor.data<float>();
|
||||
int *semantic_label = semantic_label_tensor.data<int>();
|
||||
float *coord_shift = coord_shift_tensor.data<float>();
|
||||
int *batch_idxs = batch_idxs_tensor.data<int>();
|
||||
int *ball_query_idxs = ball_query_idxs_tensor.data<int>();
|
||||
int *start_len = start_len_tensor.data<int>();
|
||||
|
||||
ConnectedComponents CCs_fragment;
|
||||
ConnectedComponents CCs_kept;
|
||||
ConnectedComponents CCs_primary;
|
||||
|
||||
int sumNPoint_fragment = 0, sumNPoint_kept = 0, sumNPoint_primary = 0;
|
||||
split_clusters(class_numpoint_mean, semantic_label, coord_shift, batch_idxs,
|
||||
ball_query_idxs, start_len, N, CCs_fragment, CCs_kept,
|
||||
CCs_primary, &sumNPoint_fragment, &sumNPoint_kept,
|
||||
&sumNPoint_primary, class_id);
|
||||
|
||||
cluster_idxs_kept_tensor.resize_({sumNPoint_kept, 2});
|
||||
cluster_offsets_kept_tensor.resize_({(int)CCs_kept.size() + 1});
|
||||
cluster_centers_kept_tensor.resize_({(int)CCs_kept.size(), 5});
|
||||
cluster_idxs_kept_tensor.zero_();
|
||||
cluster_offsets_kept_tensor.zero_();
|
||||
cluster_centers_kept_tensor.zero_();
|
||||
int *cluster_idxs_kept = cluster_idxs_kept_tensor.data<int>();
|
||||
int *cluster_offsets_kept = cluster_offsets_kept_tensor.data<int>();
|
||||
float *cluster_centers_kept = cluster_centers_kept_tensor.data<float>();
|
||||
fill_cluster_idxs_(CCs_kept, cluster_idxs_kept, cluster_offsets_kept,
|
||||
cluster_centers_kept);
|
||||
|
||||
primary_idxs_tensor.resize_({sumNPoint_primary, 2});
|
||||
primary_offsets_tensor.resize_({(int)CCs_primary.size() + 1});
|
||||
primary_centers_tensor.resize_({(int)CCs_primary.size(), 5});
|
||||
primary_idxs_tensor.zero_();
|
||||
primary_offsets_tensor.zero_();
|
||||
primary_centers_tensor.zero_();
|
||||
int *primary_idxs = primary_idxs_tensor.data<int>();
|
||||
int *primary_offsets = primary_offsets_tensor.data<int>();
|
||||
float *primary_centers = primary_centers_tensor.data<float>();
|
||||
fill_cluster_idxs_(CCs_primary, primary_idxs, primary_offsets,
|
||||
primary_centers);
|
||||
|
||||
if (using_set_aggr_ == 0) { // only point aggr
|
||||
return;
|
||||
}
|
||||
|
||||
fragment_idxs_tensor.resize_({sumNPoint_fragment, 2});
|
||||
fragment_offsets_tensor.resize_({(int)CCs_fragment.size() + 1});
|
||||
fragment_centers_tensor.resize_(
|
||||
{(int)CCs_fragment.size(),
|
||||
5}); //[:, -2] for cls_label, [:, -1] for batch_idx
|
||||
fragment_idxs_tensor.zero_();
|
||||
fragment_offsets_tensor.zero_();
|
||||
fragment_centers_tensor.zero_();
|
||||
int *fragment_idxs = fragment_idxs_tensor.data<int>();
|
||||
int *fragment_offsets = fragment_offsets_tensor.data<int>();
|
||||
float *fragment_centers = fragment_centers_tensor.data<float>();
|
||||
fill_cluster_idxs_(CCs_fragment, fragment_idxs, fragment_offsets,
|
||||
fragment_centers);
|
||||
|
||||
// prerare tensor for storing post-primary
|
||||
primary_idxs_post_tensor.resize_(
|
||||
{sumNPoint_fragment + sumNPoint_primary,
|
||||
2}); // never overflow, but need to cut off tails
|
||||
primary_offsets_post_tensor.resize_({(int)CCs_primary.size() + 1});
|
||||
primary_idxs_post_tensor.zero_();
|
||||
primary_offsets_post_tensor.zero_();
|
||||
int *primary_idxs_post = primary_idxs_post_tensor.data<int>();
|
||||
int *primary_offsets_post = primary_offsets_post_tensor.data<int>();
|
||||
|
||||
// set aggr
|
||||
hierarchical_aggregation_cuda(
|
||||
sumNPoint_fragment, (int)CCs_fragment.size(), fragment_idxs,
|
||||
fragment_offsets, fragment_centers, sumNPoint_primary,
|
||||
(int)CCs_primary.size(), primary_idxs, primary_offsets, primary_centers,
|
||||
primary_idxs_post, primary_offsets_post);
|
||||
}
|
||||
@@ -1,254 +0,0 @@
|
||||
#include "hierarchical_aggregation.h"
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#define MAX_PRIMARY_NUM 1024
|
||||
#define MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM 1024
|
||||
#define INFINITY_DIS_SQUARE 10000
|
||||
#define MAX_PER_PRIMARY_ABSORB_POINT_NUM 8192
|
||||
#define MAX_THREADS_PER_BLOCK 512
|
||||
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
|
||||
|
||||
// input: cuda_fragment_centers (fragment_num * 5,), 5 for (x, y, z, cls_label,
|
||||
// batch_idx)
|
||||
// input: cuda_primary_centers (primary_num * 5,), 5 for (x, y, z, cls_label,
|
||||
// batch_idx)
|
||||
// input: ...
|
||||
// output: cuda_primary_absorb_fragment_idx
|
||||
// output: cuda_primary_absorb_fragment_cnt
|
||||
__global__ void
|
||||
fragment_find_primary_(int primary_num, int *cuda_primary_offsets,
|
||||
float *cuda_primary_centers, int fragment_num,
|
||||
int *cuda_fragment_offsets, float *cuda_fragment_centers,
|
||||
int *cuda_primary_absorb_fragment_idx,
|
||||
int *cuda_primary_absorb_fragment_cnt) {
|
||||
|
||||
int fragment_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (fragment_idx >= fragment_num)
|
||||
return;
|
||||
|
||||
// find the nearest primary for each fragment
|
||||
float nearest_dis_square = INFINITY_DIS_SQUARE;
|
||||
int nearest_idx = -1; // primary_idx
|
||||
for (int i = 0; i < primary_num; i++) {
|
||||
if (abs(cuda_primary_centers[i * 5 + 3] -
|
||||
cuda_fragment_centers[fragment_idx * 5 + 3]) >
|
||||
0.1) { // judge same cls_label or not
|
||||
continue;
|
||||
}
|
||||
if (abs(cuda_primary_centers[i * 5 + 4] -
|
||||
cuda_fragment_centers[fragment_idx * 5 + 4]) >
|
||||
0.1) { // judge same batch_idx or not
|
||||
continue;
|
||||
}
|
||||
float temp_dis_square = pow((cuda_primary_centers[i * 5 + 0] -
|
||||
cuda_fragment_centers[fragment_idx * 5 + 0]),
|
||||
2) +
|
||||
pow((cuda_primary_centers[i * 5 + 1] -
|
||||
cuda_fragment_centers[fragment_idx * 5 + 1]),
|
||||
2) +
|
||||
pow((cuda_primary_centers[i * 5 + 2] -
|
||||
cuda_fragment_centers[fragment_idx * 5 + 2]),
|
||||
2);
|
||||
if (temp_dis_square < nearest_dis_square) {
|
||||
nearest_dis_square = temp_dis_square;
|
||||
nearest_idx = i;
|
||||
}
|
||||
}
|
||||
if (nearest_idx == -1)
|
||||
return; // fragment not belong to any primary
|
||||
|
||||
// r_size
|
||||
int primary_point_num =
|
||||
cuda_primary_offsets[nearest_idx + 1] - cuda_primary_offsets[nearest_idx];
|
||||
float r_size = 0.01 * sqrt(float(primary_point_num));
|
||||
|
||||
// r_cls
|
||||
// instance radius for each class, statistical data from the training set
|
||||
float class_radius_mean[20] = {-1.,
|
||||
-1.,
|
||||
0.7047687683952325,
|
||||
1.1732690381942337,
|
||||
0.39644035821116036,
|
||||
1.011516629020215,
|
||||
0.7260155292902369,
|
||||
0.8674973999335017,
|
||||
0.8374931435447094,
|
||||
1.0454153869133096,
|
||||
0.32879464797430913,
|
||||
1.1954566226966346,
|
||||
0.8628817944400078,
|
||||
1.0416287916782507,
|
||||
0.6602697958671507,
|
||||
0.8541363897836871,
|
||||
0.38055290598206537,
|
||||
0.3011878752684007,
|
||||
0.7420871812436316,
|
||||
0.4474268644407741};
|
||||
int _class_idx = (int)cuda_fragment_centers[fragment_idx * 5 + 3];
|
||||
float r_cls = class_radius_mean[_class_idx] * 1.;
|
||||
|
||||
// r_set
|
||||
float r_set = max(r_size, r_cls);
|
||||
|
||||
// judge
|
||||
if (nearest_dis_square < r_set * r_set) {
|
||||
int _offect = atomicAdd(cuda_primary_absorb_fragment_cnt + nearest_idx, 1);
|
||||
if (_offect < MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM)
|
||||
cuda_primary_absorb_fragment_idx[nearest_idx *
|
||||
MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM +
|
||||
_offect] = fragment_idx;
|
||||
else {
|
||||
;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// input: ...
|
||||
// output: cuda_concat_idxs
|
||||
// output: cuda_concat_point_num,
|
||||
__global__ void
|
||||
concat_fragments_(int *cuda_fragment_idxs, int *cuda_fragment_offsets,
|
||||
int *cuda_primary_idxs, int *cuda_primary_offsets,
|
||||
int *cuda_primary_absorb_fragment_idx,
|
||||
int *cuda_primary_absorb_fragment_cnt, int *cuda_concat_idxs,
|
||||
int *cuda_concat_point_num, int primary_num) {
|
||||
|
||||
int primary_idx = blockIdx.x;
|
||||
if (primary_idx >= primary_num)
|
||||
return;
|
||||
|
||||
int _accu_offset = 0; // unit is point
|
||||
for (int i = 0; i < cuda_primary_absorb_fragment_cnt[primary_idx] &&
|
||||
i < MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM;
|
||||
i++) {
|
||||
int idx = cuda_primary_absorb_fragment_idx
|
||||
[primary_idx * MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM + i];
|
||||
for (int j = cuda_fragment_offsets[idx]; j < cuda_fragment_offsets[idx + 1];
|
||||
j++) {
|
||||
if (_accu_offset < MAX_PER_PRIMARY_ABSORB_POINT_NUM) {
|
||||
cuda_concat_idxs[primary_idx * MAX_PER_PRIMARY_ABSORB_POINT_NUM * 2 +
|
||||
_accu_offset * 2 + 0] = primary_idx;
|
||||
cuda_concat_idxs[primary_idx * MAX_PER_PRIMARY_ABSORB_POINT_NUM * 2 +
|
||||
_accu_offset * 2 + 1] = cuda_fragment_idxs[j * 2 + 1];
|
||||
_accu_offset++;
|
||||
} else {
|
||||
;
|
||||
}
|
||||
}
|
||||
}
|
||||
cuda_concat_point_num[primary_idx] = _accu_offset;
|
||||
}
|
||||
|
||||
void hierarchical_aggregation_cuda(
|
||||
int fragment_total_point_num, int fragment_num, int *fragment_idxs,
|
||||
int *fragment_offsets, float *fragment_centers, int primary_total_point_num,
|
||||
int primary_num, int *primary_idxs, int *primary_offsets,
|
||||
float *primary_centers, int *primary_idxs_post, int *primary_offsets_post) {
|
||||
|
||||
if (primary_num == 0) {
|
||||
return;
|
||||
}
|
||||
// on devices, allocate and copy memory
|
||||
int *cuda_fragment_idxs;
|
||||
int *cuda_fragment_offsets;
|
||||
float *cuda_fragment_centers;
|
||||
cudaMalloc((void **)&cuda_fragment_idxs,
|
||||
fragment_total_point_num * 2 * sizeof(int) +
|
||||
sizeof(int)); // prevent alloc 0 space
|
||||
cudaMalloc((void **)&cuda_fragment_offsets, (fragment_num + 1) * sizeof(int));
|
||||
cudaMalloc((void **)&cuda_fragment_centers,
|
||||
fragment_num * 5 * sizeof(float) +
|
||||
sizeof(float)); // prevent alloc 0 space
|
||||
cudaMemcpy(cuda_fragment_idxs, fragment_idxs,
|
||||
fragment_total_point_num * 2 * sizeof(int),
|
||||
cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(cuda_fragment_offsets, fragment_offsets,
|
||||
(fragment_num + 1) * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(cuda_fragment_centers, fragment_centers,
|
||||
fragment_num * 5 * sizeof(float), cudaMemcpyHostToDevice);
|
||||
|
||||
int *cuda_primary_idxs;
|
||||
int *cuda_primary_offsets;
|
||||
float *cuda_primary_centers;
|
||||
cudaMalloc((void **)&cuda_primary_idxs,
|
||||
primary_total_point_num * 2 * sizeof(int) +
|
||||
sizeof(int)); // prevent alloc 0 space
|
||||
cudaMalloc((void **)&cuda_primary_offsets, (primary_num + 1) * sizeof(int));
|
||||
cudaMalloc((void **)&cuda_primary_centers,
|
||||
primary_num * 5 * sizeof(float) +
|
||||
sizeof(float)); // prevent alloc 0 space
|
||||
cudaMemcpy(cuda_primary_idxs, primary_idxs,
|
||||
primary_total_point_num * 2 * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(cuda_primary_offsets, primary_offsets,
|
||||
(primary_num + 1) * sizeof(int), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(cuda_primary_centers, primary_centers,
|
||||
primary_num * 5 * sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
// // for each fragment, find its primary
|
||||
int *cuda_primary_absorb_fragment_idx; // array for saving the fragment idxs
|
||||
int *cuda_primary_absorb_fragment_cnt; // array for saving the fragment nums
|
||||
cudaMalloc((void **)&cuda_primary_absorb_fragment_idx,
|
||||
primary_num * MAX_PER_PRIMARY_ABSORB_FRAGMENT_NUM * sizeof(int) +
|
||||
sizeof(int));
|
||||
cudaMalloc((void **)&cuda_primary_absorb_fragment_cnt,
|
||||
primary_num * sizeof(int) + sizeof(int));
|
||||
if (fragment_num != 0)
|
||||
fragment_find_primary_<<<int(DIVUP(fragment_num, MAX_THREADS_PER_BLOCK)),
|
||||
(int)MAX_THREADS_PER_BLOCK>>>(
|
||||
primary_num, cuda_primary_offsets, cuda_primary_centers, fragment_num,
|
||||
cuda_fragment_offsets, cuda_fragment_centers,
|
||||
cuda_primary_absorb_fragment_idx, cuda_primary_absorb_fragment_cnt);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
// concatenate fragments belonging to the same primary
|
||||
int *cuda_concat_idxs;
|
||||
int *cuda_concat_point_num;
|
||||
cudaMalloc((void **)&cuda_concat_idxs,
|
||||
primary_num * MAX_PER_PRIMARY_ABSORB_POINT_NUM * 2 * sizeof(int) +
|
||||
sizeof(int));
|
||||
cudaMalloc((void **)&cuda_concat_point_num,
|
||||
primary_num * sizeof(int) + sizeof(int));
|
||||
assert(primary_num <= MAX_PRIMARY_NUM);
|
||||
concat_fragments_<<<primary_num, (int)1>>>(
|
||||
cuda_fragment_idxs, cuda_fragment_offsets, cuda_primary_idxs,
|
||||
cuda_primary_offsets, cuda_primary_absorb_fragment_idx,
|
||||
cuda_primary_absorb_fragment_cnt, cuda_concat_idxs, cuda_concat_point_num,
|
||||
primary_num);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
// merge primary instances and fragments
|
||||
int *concat_point_num = new int[primary_num + 1]; // allocate on host
|
||||
cudaMemcpy(concat_point_num, cuda_concat_point_num, primary_num * sizeof(int),
|
||||
cudaMemcpyDeviceToHost);
|
||||
int _accu_offset = 0;
|
||||
for (int i = 0; i < primary_num; i++) {
|
||||
// add primary instances
|
||||
cudaMemcpy(primary_idxs_post + _accu_offset * 2,
|
||||
cuda_primary_idxs + primary_offsets[i] * 2,
|
||||
(primary_offsets[i + 1] - primary_offsets[i]) * 2 * sizeof(int),
|
||||
cudaMemcpyDeviceToHost);
|
||||
_accu_offset += (primary_offsets[i + 1] - primary_offsets[i]);
|
||||
|
||||
// add absorbed fragments
|
||||
cudaMemcpy(primary_idxs_post + _accu_offset * 2,
|
||||
cuda_concat_idxs + i * MAX_PER_PRIMARY_ABSORB_POINT_NUM * 2,
|
||||
concat_point_num[i] * 2 * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
_accu_offset += concat_point_num[i];
|
||||
|
||||
// writing offsets
|
||||
primary_offsets_post[i + 1] = _accu_offset;
|
||||
}
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
cudaError_t err;
|
||||
err = cudaGetLastError();
|
||||
if (cudaSuccess != err) {
|
||||
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
/*
|
||||
Hierarchichal Aggregation Algorithm
|
||||
*/
|
||||
|
||||
#ifndef HIERARCHICAL_AGGREGATION_H
|
||||
#define HIERARCHICAL_AGGREGATION_H
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <THC/THC.h>
|
||||
#include <torch/serialize/tensor.h>
|
||||
|
||||
#include "../datatype/datatype.h"
|
||||
|
||||
void hierarchical_aggregation(
|
||||
at::Tensor class_numpoint_mean_tensor, at::Tensor semantic_label_tensor,
|
||||
at::Tensor coord_shift_tensor, at::Tensor batch_idxs_tensor,
|
||||
at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor,
|
||||
at::Tensor fragment_idxs_tensor, at::Tensor fragment_offsets_tensor,
|
||||
at::Tensor fragment_centers_tensor, at::Tensor cluster_idxs_kept_tensor,
|
||||
at::Tensor cluster_offsets_kept_tensor,
|
||||
at::Tensor cluster_centers_kept_tensor, at::Tensor primary_idxs_tensor,
|
||||
at::Tensor primary_offsets_tensor, at::Tensor primary_centers_tensor,
|
||||
at::Tensor primary_idxs_post_tensor, at::Tensor primary_offsets_post_tensor,
|
||||
const int N, const int training_mode_, const int using_set_aggr_,
|
||||
const int class_id);
|
||||
|
||||
void hierarchical_aggregation_cuda(
|
||||
int fragment_total_point_num, int fragment_num, int *fragment_idxs,
|
||||
int *fragment_offsets, float *fragment_centers, int primary_total_point_num,
|
||||
int primary_num, int *primary_idxs, int *primary_offsets,
|
||||
float *primary_centers, int *primary_idxs_post, int *primary_offsets_post);
|
||||
#endif // HIERARCHICAL_AGGREGATION_H
|
||||
@@ -5,10 +5,6 @@
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
|
||||
m.def("hierarchical_aggregation", &hierarchical_aggregation,
|
||||
"hierarchical_aggregation");
|
||||
// m.def("cal_iou_and_masklabel", &cal_iou_and_masklabel,
|
||||
// "cal_iou_and_masklabel");
|
||||
m.def("get_mask_iou_on_cluster", &get_mask_iou_on_cluster,
|
||||
"get_mask_iou_on_cluster");
|
||||
m.def("get_mask_iou_on_pred", &get_mask_iou_on_pred, "get_mask_iou_on_pred");
|
||||
|
||||
@@ -2,12 +2,9 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include "datatype/datatype.cpp"
|
||||
|
||||
#include "cal_iou_and_masklabel/cal_iou_and_masklabel.cpp"
|
||||
#include "hierarchical_aggregation/hierarchical_aggregation.cpp"
|
||||
|
||||
#include "bfs_cluster/bfs_cluster.cpp"
|
||||
#include "cal_iou_and_masklabel/cal_iou_and_masklabel.cpp"
|
||||
#include "datatype/datatype.cpp"
|
||||
#include "get_iou/get_iou.cpp"
|
||||
#include "roipool/roipool.cpp"
|
||||
#include "sec_mean/sec_mean.cpp"
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
#ifndef HAIS_H
|
||||
#define HAIS_H
|
||||
#include "datatype/datatype.h"
|
||||
|
||||
#include "cal_iou_and_masklabel/cal_iou_and_masklabel.h"
|
||||
#include "hierarchical_aggregation/hierarchical_aggregation.h"
|
||||
|
||||
#include "bfs_cluster/bfs_cluster.h"
|
||||
#include "cal_iou_and_masklabel/cal_iou_and_masklabel.h"
|
||||
#include "datatype/datatype.h"
|
||||
#include "get_iou/get_iou.h"
|
||||
#include "roipool/roipool.h"
|
||||
#include "sec_mean/sec_mean.h"
|
||||
@@ -37,4 +34,4 @@ void point_recover_bp_feat(/* cuda float N*C */ at::Tensor d_output_feats,
|
||||
/* cuda Int M*(maxActive+1) */ at::Tensor idx_map,
|
||||
Int nActive, Int maxActive, Int nPlane);
|
||||
|
||||
#endif // HAIS_H
|
||||
#endif // HAIS_H
|
||||
|
||||
@@ -134,6 +134,10 @@ class SoftGroup(nn.Module):
|
||||
proposals_idx, proposals_offset = self.forward_grouping(semantic_scores, pt_offsets,
|
||||
batch_idxs, coords_float,
|
||||
self.grouping_cfg)
|
||||
if proposals_offset.shape[0] > self.train_cfg.max_proposal_num:
|
||||
proposals_offset = proposals_offset[:self.train_cfg.max_proposal_num + 1]
|
||||
proposals_idx = proposals_idx[:proposals_offset[-1]]
|
||||
assert proposals_idx.shape[0] == proposals_offset[-1]
|
||||
instance_batch_idxs, cls_scores, iou_scores, mask_scores = self.forward_instance(
|
||||
proposals_idx, proposals_offset, output_feats, coords_float)
|
||||
instance_loss = self.instance_loss(cls_scores, mask_scores, iou_scores, proposals_idx,
|
||||
@@ -303,32 +307,27 @@ class SoftGroup(nn.Module):
|
||||
proposals_offset_list = []
|
||||
batch_size = batch_idxs.max() + 1
|
||||
semantic_scores = semantic_scores.softmax(dim=-1)
|
||||
semantic_preds = semantic_scores.max(1)[1] # TODO remove this
|
||||
|
||||
radius = self.grouping_cfg.radius
|
||||
mean_active = self.grouping_cfg.mean_active
|
||||
npoint_thr = self.grouping_cfg.npoint_thr
|
||||
class_numpoint_mean = torch.tensor(
|
||||
self.grouping_cfg.class_numpoint_mean, dtype=torch.float32)
|
||||
training_mode = None # TODO remove this
|
||||
for class_id in range(self.semantic_classes):
|
||||
# ignore "floor" and "wall"
|
||||
if class_id < 2:
|
||||
if class_id in self.grouping_cfg.ignore_classes:
|
||||
continue
|
||||
scores = semantic_scores[:, class_id].contiguous()
|
||||
object_idxs = (scores > self.grouping_cfg.score_thr).nonzero().view(-1)
|
||||
if object_idxs.size(0) < 100: # TODO
|
||||
if object_idxs.size(0) < self.test_cfg.min_npoint:
|
||||
continue
|
||||
batch_idxs_ = batch_idxs[object_idxs]
|
||||
batch_offsets_ = utils.get_batch_offsets(batch_idxs_, batch_size)
|
||||
coords_ = coords_float[object_idxs]
|
||||
pt_offsets_ = pt_offsets[object_idxs] # (N_fg, 3), float32
|
||||
semantic_preds_cpu = semantic_preds[object_idxs].int().cpu()
|
||||
pt_offsets_ = pt_offsets[object_idxs]
|
||||
idx, start_len = softgroup_ops.ballquery_batch_p(coords_ + pt_offsets_, batch_idxs_,
|
||||
batch_offsets_, radius, mean_active)
|
||||
using_set_aggr = False # TODO refactor this
|
||||
proposals_idx, proposals_offset = softgroup_ops.hierarchical_aggregation(
|
||||
class_numpoint_mean, semantic_preds_cpu, (coords_ + pt_offsets_).cpu(), idx.cpu(),
|
||||
start_len.cpu(), batch_idxs_.cpu(), training_mode, using_set_aggr, class_id)
|
||||
proposals_idx, proposals_offset = softgroup_ops.bfs_cluster(
|
||||
class_numpoint_mean, idx.cpu(), start_len.cpu(), npoint_thr, class_id)
|
||||
proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int()
|
||||
|
||||
# merge proposals
|
||||
|
||||
Reference in New Issue
Block a user