merged camera

2025-10-16 11:45:42 +00:00 · 2021-02-07 17:40:13 +01:00 · 2021-02-07 17:40:13 +01:00 · 26401f959e
commit 26401f959e
parent ca3b4e7167
6 changed files with 96 additions and 76 deletions
--- a/config.yaml
+++ b/config.yaml
@ -23,7 +23,7 @@ pose:
  lr: 0.01
  optimizer: Adam # currently supported Adam, LBFGS
  iterations: 100
-  useCameraIntrinsics: false
+  useCameraIntrinsics: true
  bodyMeanLoss:
    enabled: false
    weight: 0.1
--- a/example_fit.py
+++ b/example_fit.py
@ -42,16 +42,6 @@ camera = TorchCameraEstimate(
 # render camera to the scene
 camera.setup_visualization(r.init_keypoints, r.keypoints)
 # run camera optimizer
 cam, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
    camera,
    dtype=dtype,
    device=device,
 )
 # apply transform to scene
 r.set_group_pose("body", cam_trans.cpu().numpy())
 # train for pose
 train_pose_with_conf(
@ -59,7 +49,7 @@ train_pose_with_conf(
    model=model,
    keypoints=keypoints,
    keypoint_conf=conf,
-    camera=cam,
+    camera=camera,
    renderer=r,
    device=device,
 )
--- a/example_pose.py
+++ b/example_pose.py
@ -3,12 +3,8 @@ import numpy as np
 # local imports
 from renderer import DefaultRenderer
 from train_pose import train_pose_with_conf
 from modules.camera import SimpleCamera
 from model import SMPLyModel
-from utils.general import load_config, setup_training
+from utils.general import load_config
 from camera_estimation import TorchCameraEstimate
 from dataset import SMPLyDataset
 # this a simple pose playground with a async renderer for quick prototyping
--- a/modules/angle_clip.py
+++ b/modules/angle_clip.py
@ -2,12 +2,13 @@ import torch
 import torch.nn as nn
 import numpy as np
 class AngleClipper(nn.Module):
    def __init__(
        self,
        device=torch.device('cpu'),
        dtype=torch.float32,
-        angle_idx=[24, 10 , 9],
+        angle_idx=[24, 10, 9],
        # directions=[-1, 1, 1, 1],
        weights=[1.0, 1.0, 1.0]
    ):
@ -35,8 +36,7 @@ class AngleClipper(nn.Module):
        angles = pose[:, self.angle_idx]
-        penalty = angles[torch.abs(angles) > self.limit] 
+        penalty = angles[torch.abs(angles) > self.limit]
        # get relevant angles
        return penalty.pow(2).sum() * 0.01
--- a/modules/camera.py
+++ b/modules/camera.py
@ -8,34 +8,63 @@ from model import *
 from dataset import *
-class SimpleCamera(nn.Module):
+class TransformCamera(nn.Module):
    def __init__(
        self,
        transform_mat: torch.Tensor,
        dtype=torch.float32,
        device=None,
        transform_mat=None,
        camera_intrinsics=None,
        camera_trans_rot=None
    ):
-        super(SimpleCamera, self).__init__()
+        super(TransformCamera, self).__init__()
-        self.hasTransform = False
+
        self.hasCameraTransform = False
        self.dtype = dtype
        self.device = device
        self.model_type = "smplx"
-        if camera_intrinsics is not None:
+        self.register_buffer("trans", transform_mat.to(
-            self.hasCameraTransform = True
+            device=device, dtype=dtype))
            self.register_buffer("cam_int", camera_intrinsics)
            self.register_buffer("cam_trans_rot", camera_trans_rot)
            self.register_buffer("trans", transform_mat)
            # self.register_buffer("disp_trans", camera_trans_rot)
        elif transform_mat is not None:
            self.hasTransform = True
            self.register_buffer("trans", transform_mat)
            # self.register_buffer("disp_trans", transform_mat)
-    def from_estimation_cam(cam: TorchCameraEstimate, device=None, dtype=None):
+    def forward(self, points):
        proj_points = self.trans @ points.reshape(-1, 4, 1)
        proj_points = proj_points.reshape(1, -1, 4)[:, :, :2] * 1
        proj_points = F.pad(proj_points, (0, 1, 0, 0), value=0)
        return proj_points
 class IntrinsicsCamera(nn.Module):
    def __init__(
        self,
        transform_mat: torch.Tensor,
        camera_intrinsics: torch.Tensor,
        camera_trans_rot: torch.Tensor,
        dtype=torch.float32,
        device=None
    ):
        super(IntrinsicsCamera, self).__init__()
        self.dtype = dtype
        self.device = device
        self.register_buffer("cam_int", camera_intrinsics.to(
            device=device, dtype=dtype))
        self.register_buffer("cam_trans_rot", camera_trans_rot.to(
            device=device, dtype=dtype))
        self.register_buffer("trans", transform_mat.to(
            device=device, dtype=dtype))
    def forward(self, points):
        proj_points = self.cam_int[:3, :3] @ self.cam_trans_rot[:3,
                                                                :] @ self.trans @ points.reshape(-1, 4, 1)
        result = proj_points.squeeze(2)
        denomiator = torch.zeros(points.shape[1], 3)
        for i in range(points.shape[1]):
            denomiator[i, :] = result[i, 2]
        result = result/denomiator
        result[:, 2] = 0
        return result
 class SimpleCamera(nn.Module):
    def from_estimation_cam(cam: TorchCameraEstimate, use_intrinsics=False, device=None, dtype=None):
        """utility to create camera module from estimation camera
        Args:
@ -44,29 +73,21 @@ class SimpleCamera(nn.Module):
        cam_trans, cam_int, cam_params = cam.get_results(
            device=device, dtype=dtype)
-        return SimpleCamera(
+        cam_layer = None
            dtype,
            device,
            transform_mat=cam_trans,
              camera_intrinsics=cam_int, camera_trans_rot=cam_params
        ), cam_trans, cam_int, cam_params
-    def forward(self, points):
+        if use_intrinsics:
-        if self.hasTransform:
+            cam_layer = IntrinsicsCamera(
-            proj_points = self.trans @ points.reshape(-1, 4, 1)
+                transform_mat=cam_trans,
-            proj_points = proj_points.reshape(1, -1, 4)[:, :, :2] * 1
+                camera_intrinsics=cam_int,
-            proj_points = F.pad(proj_points, (0, 1, 0, 0), value=0)
+                camera_trans_rot=cam_params,
-            return proj_points
+                device=device,
-        if self.hasCameraTransform:
+                dtype=dtype,
-            proj_points = self.cam_int[:3, :3] @ self.cam_trans_rot[:3,
+            )
-                                                                    :] @ self.trans @ points.reshape(-1, 4, 1)
+        else:
-            result = proj_points.squeeze(2)
+            cam_layer = TransformCamera(
-            denomiator = torch.zeros(points.shape[1], 3)
+                transform_mat=cam_trans,
-            for i in range(points.shape[1]):
+                device=device,
-                denomiator[i, :] = result[i, 2]
+                dtype=dtype,
-            result = result/denomiator
+            )
            result[:, 2] = 0
            return result
-        # scale = (points[:, :, 2] / self.z_scale)
+        return cam_layer, cam_trans, cam_int, cam_params
        # print(points.shape, scale.shape)
--- a/train_pose.py
+++ b/train_pose.py
@ -1,3 +1,4 @@
 from camera_estimation import TorchCameraEstimate
 from modules.angle_clip import AngleClipper
 from modules.angle import AnglePriorsLoss
 import smplx
@ -41,7 +42,7 @@ def train_pose(
    print("[pose] starting training")
    print("[pose] dtype=", dtype)
-    loss_layer = torch.nn.MSELoss().to(device=device, dtype=dtype) #MSELoss()
+    loss_layer = torch.nn.MSELoss().to(device=device, dtype=dtype)  # MSELoss()
    clip_loss_layer = AngleClipper().to(device=device, dtype=dtype)
@ -91,10 +92,9 @@ def train_pose(
        pose_extra = None
        # if useBodyPrior:
-            # body = vposer_layer()
+        # body = vposer_layer()
-            # poZ = body.poZ_body
+        # poZ = body.poZ_body
-            # pose_extra = body.pose_body
+        # pose_extra = body.pose_body
        # return joints based on current model state
        body_joints, cur_pose = pose_layer()
@ -115,12 +115,13 @@ def train_pose(
        body_mean_loss = 0.0
        if body_mean_loss:
            body_mean_loss = (cur_pose -
-                           body_mean_pose).pow(2).sum() * body_mean_weight
+                              body_mean_pose).pow(2).sum() * body_mean_weight
        body_prior_loss = 0.0
        if useBodyPrior:
            # apply pose prior loss.
-            body_prior_loss = latent_body.pose_body.pow(2).sum() * body_prior_weight
+            body_prior_loss = latent_body.pose_body.pow(
                2).sum() * body_prior_weight
        angle_prior_loss = 0.0
        if useAnglePrior:
@ -130,11 +131,11 @@ def train_pose(
        angle_sum_loss = 0.0
        if use_angle_sum_loss:
-            angle_sum_loss = clip_loss_layer(cur_pose) * angle_sum_weight
+            angle_sum_loss = clip_loss_layer(cur_pose)  # * angle_sum_weight
        loss = loss + body_mean_loss + body_prior_loss + angle_prior_loss + angle_sum_loss
-        return loss 
+        return loss
    def optim_closure():
        if torch.is_grad_enabled():
@ -191,32 +192,44 @@ def train_pose(
    pbar.close()
    print("Final result:", loss.item())
-    return pose_layer.cur_out
+    return pose_layer.cur_out, best_pose
 def train_pose_with_conf(
    config,
    camera: TorchCameraEstimate,
    model: smplx.SMPL,
    keypoints,
    keypoint_conf,
    camera: SimpleCamera,
    device=torch.device('cpu'),
    dtype=torch.float32,
    renderer: Renderer = None,
 ):
    # configure PyTorch device and format
-    dtype = torch.float64
+    # dtype = torch.float64
    if 'device' in config['pose'] is not None:
        device = torch.device(config['pose']['device'])
    else:
        device = torch.device('cpu')
    # create camera module
    pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
        cam=camera,
        use_intrinsics=config['pose']['useCameraIntrinsics'],
        dtype=dtype,
        device=device,
    )
    # apply transform to scene
    if renderer is not None:
        renderer.set_group_pose("body", cam_trans.cpu().numpy())
    return train_pose(
        model=model.to(dtype=dtype),
        keypoints=keypoints,
        keypoint_conf=keypoint_conf,
-        camera=camera,
+        camera=pose_camera,
        device=device,
        dtype=dtype,
        renderer=renderer,