making camera work again

2025-10-16 11:45:42 +00:00 · 2021-02-26 12:50:38 +01:00 · 2021-02-26 12:50:38 +01:00 · 36cd2c6648
commit 36cd2c6648
parent eb1b5876a8
13 changed files with 236 additions and 40 deletions
--- a/.gitignore
+++ b/.gitignore
@ -101,4 +101,6 @@ vposer_v1_0
 results/
 output/
 tests/
-samples/video*
+samples/
+raw/
+presentation/
--- a/README.md
+++ b/README.md
@ -33,7 +33,7 @@ This should copy and rename the SMPL model to the correct folders. Either way th
 To use `bodyPrior` in the configuration please download vposer and plate it into `./vposer_v1_0` directory in the project root. Vposer can be downloaded from this link after creating an account with SMPL-X [link](https://psfiles.is.tuebingen.mpg.de/downloads/smplx/vposer_v1_0-zip)

 ### Mesh intersection 
-To use `intersectLoss` in the configuration please pull the [github](https://github.com/gosticks/torch-mesh-isect) repo. This repo is patched to run on the newer versions of pytorch. 
+To use `intersectLoss` in the configuration please pull the [github](https://github.com/gosticks/torch-mesh-isect) repo. This repo is patched to run on the newer versions of pytorch.
 Note: It only runs for Linux based operating systems. We had troubles getting it to work on Windows.

 ### Conda Environment
--- a/config.yaml
+++ b/config.yaml
@ -19,8 +19,8 @@ camera:
  patience: 10
  optimizer: Adam
 orientation:
-  lr: 0.03
-  optimizer: LBFGS
+  lr: 0.5
+  optimizer: Adam
  iterations: 5
  joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization
 pose:
--- a/example_fit_camera.py
+++ b/example_fit_camera.py
@ -0,0 +1,84 @@
+import pickle
+import time
+from train import create_animation
+from dataset import SMPLyDataset
+from model import *
+from utils.general import *
+from renderer import *
+from utils.general import rename_files, get_new_filename
+
+START_IDX = 1  # starting index of the frame to optimize for
+FINISH_IDX = 60   # choose a big number to optimize for all frames in samples directory
+# if False, only run already saved animation without optimization
+RUN_OPTIMIZATION = True
+
+result_image = []
+idx = START_IDX
+
+device = torch.device('cpu')
+dtype = torch.float32
+
+config = load_config()
+dataset = SMPLyDataset.from_config(config)
+model = SMPLyModel.model_from_conf(config)
+
+
+# Rename files in samples directory to uniform format
+if config['data']['renameFiles']:
+    rename_files(config['data']['rootDir'] + "/")
+
+
+'''
+Optimization part without visualization
+'''
+if RUN_OPTIMIZATION:
+    model_outs, filename = create_animation(
+        dataset,
+        config,
+        START_IDX,
+        FINISH_IDX,
+        verbose=False,
+        offscreen=True,
+        save_to_file=True,
+        interpolate=False
+    )
+
+
+def replay_animation(file, start_frame=0, end_frame=None, with_background=False, fps=30, interpolated=False):
+    r = Renderer()
+    r.start()
+
+    model_anim = SMPLyModel.model_from_conf(config)
+
+    with open(file, "rb") as fp:
+        results = pickle.load(fp)
+
+    if end_frame is None:
+        end_frame = len(results)
+
+    for model, camera_transform in results[start_frame::]:
+        if interpolated:
+            vertices = model
+        else:
+            vertices = model.vertices
+
+        r.render_model_geometry(
+            faces=model_anim.faces,
+            vertices=vertices,
+            pose=camera_transform
+        )
+
+        time.sleep(1 / fps)
+
+
+'''
+Play the animation.
+'''
+if RUN_OPTIMIZATION:
+    anim_file = filename
+else:
+    results_dir = config['output']['rootDir']
+    result_prefix = config['output']['prefix']
+    anim_file = results_dir + result_prefix + "0.pkl"
+
+replay_animation(anim_file, interpolated=True)
--- a/example_render_video.py
+++ b/example_render_video.py
@ -9,7 +9,7 @@ from renderer import *
 from utils.general import rename_files, get_new_filename

 START_IDX = 150  # starting index of the frame to optimize for
-FINISH_IDX = 300  # choose a big number to optimize for all frames in samples directory
+FINISH_IDX = 400  # choose a big number to optimize for all frames in samples directory

 result_image = []
 idx = START_IDX
--- a/examples/init.py
+++ b/examples/init.py
--- a/modules/perspective_cam.py
+++ b/modules/perspective_cam.py
@ -0,0 +1,84 @@
+from collections import namedtuple
+
+import torch
+import torch.nn as nn
+
+from smplx.lbs import transform_mat
+
+
+class PerspectiveCamera(nn.Module):
+
+    FOCAL_LENGTH = 5000
+
+    def __init__(self, rotation=None, translation=None,
+                 focal_length_x=None, focal_length_y=None,
+                 batch_size=1,
+                 center=None, dtype=torch.float32, **kwargs):
+        super(PerspectiveCamera, self).__init__()
+        self.batch_size = batch_size
+        self.dtype = dtype
+        # Make a buffer so that PyTorch does not complain when creating
+        # the camera matrix
+        self.register_buffer('zero',
+                             torch.zeros([batch_size], dtype=dtype))
+
+        if focal_length_x is None or type(focal_length_x) == float:
+            focal_length_x = torch.full(
+                [batch_size],
+                self.FOCAL_LENGTH if focal_length_x is None else
+                focal_length_x,
+                dtype=dtype)
+
+        if focal_length_y is None or type(focal_length_y) == float:
+            focal_length_y = torch.full(
+                [batch_size],
+                self.FOCAL_LENGTH if focal_length_y is None else
+                focal_length_y,
+                dtype=dtype)
+
+        self.register_buffer('focal_length_x', focal_length_x)
+        self.register_buffer('focal_length_y', focal_length_y)
+
+        if center is None:
+            center = torch.zeros([batch_size, 2], dtype=dtype)
+        self.register_buffer('center', center)
+
+        if rotation is None:
+            rotation = torch.eye(
+                3, dtype=dtype).unsqueeze(dim=0).repeat(batch_size, 1, 1)
+
+        rotation = nn.Parameter(rotation, requires_grad=True)
+        self.register_parameter('rotation', rotation)
+
+        if translation is None:
+            translation = torch.zeros([batch_size, 3], dtype=dtype)
+
+        translation = nn.Parameter(translation,
+                                   requires_grad=True)
+        self.register_parameter('translation', translation)
+
+    def forward(self, points):
+        device = points.device
+
+        with torch.no_grad():
+            camera_mat = torch.zeros([self.batch_size, 2, 2],
+                                     dtype=self.dtype, device=points.device)
+            camera_mat[:, 0, 0] = self.focal_length_x
+            camera_mat[:, 1, 1] = self.focal_length_y
+
+        camera_transform = transform_mat(self.rotation,
+                                         self.translation.unsqueeze(dim=-1))
+        homog_coord = torch.ones(list(points.shape)[:-1] + [1],
+                                 dtype=points.dtype,
+                                 device=device)
+        # Convert the points to homogeneous coordinates
+        points_h = torch.cat([points, homog_coord], dim=-1)
+
+        projected_points = torch.einsum('bki,bji->bjk',
+                                        [camera_transform, points_h])
+
+        img_points = torch.div(projected_points[:, :, :2],
+                               projected_points[:, :, 2].unsqueeze(dim=-1))
+        img_points = torch.einsum('bki,bji->bjk', [camera_mat, img_points]) \
+            + self.center.unsqueeze(dim=1)
+        return img_points
--- a/train.py
+++ b/train.py
@ -60,12 +60,12 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
        device = torch.device('cpu')

    # get camera estimation
-    pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
-        cam=camera,
-        use_intrinsics=config['pose']['useCameraIntrinsics'],
-        dtype=dtype,
-        device=device,
-    )
+    # pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
+    #     cam=camera,
+    #     use_intrinsics=config['pose']['useCameraIntrinsics'],
+    #     dtype=dtype,
+    #     device=device,
+    # )

    params = defaultdict(
        body_pose=initial_pose,
@ -74,15 +74,17 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
    with torch.no_grad():
        model(**params)

+    r.start()
+
    # apply transform to scene
-    if r is not None:
-        r.set_group_pose("body", cam_trans.cpu().numpy())
+    # if r is not None:
+    #r.set_group_pose("body", cam_trans.cpu().numpy())

    global_orient = train_orient_with_conf(
        config=config,
        model=model,
        keypoints=keypoints,
-        camera_layer=pose_camera,
+        camera_layer=None,  # pose_camera,
        renderer=r,
        device=device,
        use_progress_bar=verbose,
--- a/train_camera.py
+++ b/train_camera.py
--- a/train_orient.py
+++ b/train_orient.py
@ -1,4 +1,5 @@
-from utils.mapping import get_indices_by_name
+from modules.perspective_cam import PerspectiveCamera
+from utils.mapping import get_indices_by_name, opengl_to_screen_space
 from modules.distance_loss import WeightedMSELoss
 from modules.utils import get_loss_layers
 from camera_estimation import TorchCameraEstimate
@ -52,10 +53,18 @@ def train_orient(
    )

    # make sure camera module is on the correct device
-    camera = camera.to(device=device, dtype=dtype)
+    #camera = camera.to(device=device, dtype=dtype)
+    pers_cam = PerspectiveCamera(
+        dtype=dtype, device=device,
+        center=torch.tensor([[1920/2, 1080/2]], dtype=dtype),
+        focal_length_x=850.0,
+        focal_length_y=850.0
+    ).to(device=device)

    # setup keypoint data
    keypoints = torch.tensor(keypoints).to(device=device, dtype=dtype)
+    # keypoints = opengl_to_screen_space(keypoints, (1920, 1080))
+    # do some janky conversion back to pixel :(

    # torso indices
    torso_indices = get_indices_by_name(joint_names)
@ -67,7 +76,7 @@ def train_orient(
    pose_layer = BodyPose(model, dtype=dtype, device=device,
                          useBodyMeanAngles=False).to(device=device, dtype=dtype)

-    parameters = [model.global_orient]
+    parameters = [model.global_orient, pers_cam.rotation, pers_cam.translation]

    if use_progress_bar:
        pbar = tqdm(total=iterations)
@ -85,19 +94,23 @@ def train_orient(

    optimizer = optimizer(parameters, learning_rate)

+    print(keypoints[0][0])
+    body_joints, cur_pose = pose_layer()
+    body_joints = opengl_to_screen_space(body_joints.clone(), (1080, 1080))
+    print(body_joints[0][0])
+
    # prediction and loss computation closere
    def predict():
        # return joints based on current model state
        body_joints, cur_pose = pose_layer()
-
+        # body_joints = opengl_to_screen_space(body_joints.clone(), (1920, 1080))
        # compute homogeneous coordinates and project them to 2D space
-        points = tgm.convert_points_to_homogeneous(body_joints)
-        points = camera(points).squeeze()
-
+        #points = tgm.convert_points_to_homogeneous(body_joints)
+        points = pers_cam(body_joints).squeeze()
+        print(points[0][0])
        # compute loss between 2D joint projection and OpenPose keypoints
        loss = loss_layer(points[torso_indices],
-                          keypoints[torso_indices])
-
+                          keypoints[torso_indices][:, :2])
        return loss

    # main optimizer closure
@ -112,10 +125,10 @@ def train_orient(
        return loss

    # camera translation
-    R = camera.trans.detach().cpu().numpy().squeeze()
+    #R = camera.trans.detach().cpu().numpy().squeeze()

    # main optimization loop
-    for t in range(iterations):
+    for t in range(2000):
        loss = optimizer.step(optim_closure)

        # compute loss
@ -136,12 +149,14 @@ def train_orient(
            pbar.set_description("Error %f" % cur_loss)
            pbar.update(1)

-        if renderer is not None and render_steps:
-            renderer.render_model(
-                model=model,
-                model_out=pose_layer.cur_out,
-                transform=R
-            )
+        # if renderer is not None and render_steps:
+            # renderer.render_model(
+            #     model=model,
+            #     model_out=pose_layer.cur_out,
+            #     transform=R
+            # )
+
+    print("translation", pers_cam.translation)

    if use_progress_bar:
        pbar.close()
--- a/utils/general.py
+++ b/utils/general.py
@ -106,7 +106,7 @@ def estimate_scale(joints, keypoints, pairs=[
    smpl_height = np.linalg.norm(smpl_dists, axis=0).mean()
    ops_height = np.linalg.norm(ops_dists, axis=0).mean()

-    return cam_fy / 1080 * smpl_height / ops_height
+    return smpl_height / ops_height


 def estimate_focal_length(run_estimation: bool = False):
@ -192,7 +192,7 @@ def setup_training(model, dataset, sample_index, renderer=True, offscreen=False)
    est_scale = estimate_scale(joints, keypoints)

    # apply scaling to keypoints
-    keypoints = keypoints * est_scale
+    keypoints = keypoints  # * est_scale

    # integrating Camera Estimation

--- a/utils/mapping.py
+++ b/utils/mapping.py
@ -278,13 +278,15 @@ def openpose_to_opengl_coords(
        [type]: [description]
    """

-    points = np.array([
-        [
-            x / real_width * 2 - 1,
-            -y / real_height * 2 + 1,
-            0
-        ] for (x, y, z) in input_data])
+    # points = np.array([
+    #     [
+    #         x / real_width * 2 - 1,
+    #         -y / real_height * 2 + 1,
+    #         0
+    #     ] for (x, y, z) in input_data])

+    points = np.array(input_data)[:, :3]
+    points[:, 2] = 0
    conf = np.array([
        z for (_, _, z) in input_data
    ])
@ -292,6 +294,13 @@ def openpose_to_opengl_coords(
    return (points, conf)


+def opengl_to_screen_space(points, size):
+    points[:, 0] = (points[:, 0] + 1) / 2 * size[0]
+    points[:, 1] = -((points[:, 1] - 1) / 2 * size[1])
+    points[:, 2] = ((points[:, 2] - 1) / 2 * size[1])
+    return points
+
+
 def smpl_to_openpose(print_mapping: True):
    """Utility for remapping smpl mapping indices to openpose mapping indices. 

--- a/utils/video.py
+++ b/utils/video.py
@ -93,7 +93,7 @@ def save_to_video(
        r.render_model_geometry(
            faces=model_anim.faces,
            vertices=vertices,
-            pose=cam_trans  # cam_transform,
+            pose=cam_transform  # cam_transform,
        )
        frames.append(r.get_snapshot())