cleanup utils and modules

2025-10-16 11:45:42 +00:00 · 2021-01-23 22:28:28 +01:00 · 2021-01-23 22:28:28 +01:00 · a5ba89f982
commit a5ba89f982
parent a1ea513d04
7 changed files with 312 additions and 346 deletions
--- a/modules/camera.py
+++ b/modules/camera.py
@ -1,80 +1,38 @@
-import yaml
+
 import torch
-import math
-import time
-from math import cos, sin
-
-from model import *
-# from renderer import *
-from dataset import *
-from utils import get_named_joint, get_named_joints
-from collections import defaultdict
-import torch.nn.functional as F
 import torch.nn as nn
-
-# Camera module is heavily inspired by the SMPLify implementation
+import torch.nn.functional as F
+from math import cos, sin
+from model import *
+from dataset import *


-# class Camera(nn.Module):
-#     def __init__(
-#         self,
-#         fx=None,
-#         fy=None,
-#         center=None,
-#         plain_depth=0,
-#         dtype=torch.float32,
-#         device=None,
-#     ):
-#         super(Camera, self).__init__()
-
-#         self.dtype = dtype
-#         self.device = device
-
-#         self.register_buffer("fx", torch.Tensor([fx], device=device))
-#         self.register_buffer("fy", torch.Tensor([fy], device=device))
-#         camera_intrinsics = torch.zeros(
-#             [2, 2], dtype=dtype, device=device)
-#         camera_intrinsics[0, 0] = self.fx
-#         camera_intrinsics[1, 1] = self.fy
-
-#         self.register_buffer("camera_intrinsics",
-#                              torch.inverse(camera_intrinsics))
-#         self.register_buffer("center", center)
-
-#     def forward(self, joints):
-#         # translate to homogeneous coordinates
-#         homog_coord = torch.ones(
-#             list(joints.shape)[:-1] + [1],
-#             dtype=self.dtype,
-#             device=self.device)
-#         # Convert the points to homogeneous coordinates
-#         projected_points = torch.cat([joints, homog_coord], dim=-1)
-
-#         img_points = torch.div(projected_points[:, :, :2],
-#                                projected_points[:, :, 2].unsqueeze(dim=-1))
-#         img_points = torch.einsum('bki,bji->bjk', [self.camera_intrinsics, img_points]) \
-#             + self.center.unsqueeze(dim=1)
-#         return img_points
-
-class CameraProjSimple(nn.Module):
+class SimpleCamera(nn.Module):
    def __init__(
        self,
        dtype=torch.float32,
        device=None,
-        z_scale=0.5
+        z_scale=0.5,
+        transform_mat=None,
    ):
-        super(CameraProjSimple, self).__init__()
-
+        super(SimpleCamera, self).__init__()
+        self.hasTransform = False
        self.dtype = dtype
        self.device = device

        zs = torch.ones(1, device=device)
        zs *= z_scale
-        print(zs)
        self.register_buffer("z_scale", zs)

+        if transform_mat is not None:
+            self.hasTransform = True
+            self.register_buffer("trans", transform_mat)
+
    def forward(self, points):
-        proj_points = torch.mul(
-            points[:, :, :2], points[:, :, 2] / self.z_scale)
+        if self.hasTransform:
+            proj_points = points @ self.trans
+        # scale = (points[:, :, 2] / self.z_scale)
+        # print(points.shape, scale.shape)
+        proj_points = points[:, :, :2] * 1
        proj_points = F.pad(proj_points, (0, 1, 0, 0), value=0)
        return proj_points
--- a/modules/transform.py
+++ b/modules/transform.py
@ -1,4 +1,3 @@
-import math
 import torch
 import torch.nn.functional as F
 import torch.nn as nn
@ -13,7 +12,7 @@ class Transform(nn.Module):
        self.device = device

        # init parameters
-        translation = torch.zeros(3, device=device, dtype=dtype)
+        translation = torch.rand(3, device=device, dtype=dtype)
        translation = nn.Parameter(translation, requires_grad=True)
        self.register_parameter("translation", translation)

@ -21,50 +20,15 @@ class Transform(nn.Module):
        orientation = nn.Parameter(orientation, requires_grad=True)
        self.register_parameter("orientation", orientation)

-        # self.roll = torch.randn(
-        #     1,  device=device, dtype=dtype,  requires_grad=True)
-        # self.yaw = torch.randn(
-        #     1,  device=device, dtype=dtype,  requires_grad=True)
-        # self.pitch = torch.randn(
-        #     1,  device=device, dtype=dtype,  requires_grad=True)
-
-        # init addition buffers
-        # tensor_0 = torch.zeros(1,  device=device, dtype=dtype)
-        # self.register_buffer("tensor_0", tensor_0)
-        # tensor_1 = torch.ones(1,  device=device, dtype=dtype)
-        # self.register_buffer("tensor_1", tensor_1)
-
-    def get_transform_mat(self):
-        # tensor_1 = self.tensor_1.squeeze()
-        # tensor_0 = self.tensor_0.squeeze()
-        # roll = self.orientation[0]
-        # pitch = self.orientation[1]
-        # yaw = self.orientation[2]
-
-        # RX = torch.stack([
-        #     torch.stack([tensor_1, tensor_0, tensor_0]),
-        #     torch.stack([tensor_0, torch.cos(roll), -torch.sin(roll)]),
-        #     torch.stack([tensor_0, torch.sin(roll), torch.cos(roll)])]).reshape(3, 3)
-
-        # RY = torch.stack([
-        #     torch.stack([torch.cos(pitch), tensor_0, torch.sin(pitch)]),
-        #     torch.stack([tensor_0, tensor_1, tensor_0]),
-        #     torch.stack([-torch.sin(pitch), tensor_0, torch.cos(pitch)])]).reshape(3, 3)
-
-        # RZ = torch.stack([
-        #     torch.stack([torch.cos(yaw), -torch.sin(yaw), tensor_0]),
-        #     torch.stack([torch.sin(yaw), torch.cos(yaw), tensor_0]),
-        #     torch.stack([tensor_0, tensor_0, tensor_1])]).reshape(3, 3)
-
-        # R = torch.mm(RX, RY)
-        # R = torch.mm(R, RZ)
-        # R = torch.mm(RZ, RY)
-        #R = torch.mm(R, RX)
-
+    def get_transform_mat(self, with_translate=False):

        transform = tgm.angle_axis_to_rotation_matrix(self.orientation)
+        # print(transform.shape)
+        if with_translate:
+            transform[:, :3, 3] = self.translation
        return transform

    def forward(self, joints):
        R = self.get_transform_mat()
-        return joints @ R + F.pad(self.translation, (0,1), value=1)
+        translation = F.pad(self.translation, (0, 1), value=1)
+        return joints @ R + translation
--- a/utils.py
+++ b/utils.py
@ -1,242 +0,0 @@
-from typing import List, Set, Dict, Tuple, Optional
-import numpy as np
-import trimesh
-import pyrender
-import cv2
-
-openpose_to_smpl = np.array([
-    8,  # hip - middle
-    12,  # hip - right
-    9,  # hip - left
-    -1,  # body center (belly, not present in body_25)
-    13,  # left knee
-    10,  # right knee,
-        -1,
-    14,  # left ankle
-    11,  # right ankle
-        -1,
-        -1,
-        -1,
-    1,  # chest
-        -1,
-        -1,
-        -1,
-    5,  # left shoulder
-    2,  # right shoulder
-    6,  # left elbow
-    3,  # right elbow
-    7,  # left hand
-    4,  # right hand
-        -1,
-        -1,
-    0,  # head
-    15,
-    16,
-    17,
-    18,
-    19,  # left toe
-    20,
-    21,
-    22,  # right toe
-    23,
-    24,
-        -1,
-        -1,
-        -1,
-        -1,
-        -1,
-        -1,
-        -1,
-        -1,
-        -1,
-        -1,
-])
-
-
-def get_mapping_arr(
-    input_format: str = "body_25",
-    output_format: str = "smpl",
-) -> list:
-    # TODO: expand features as needed
-    # based on mappings found here
-    # https://github.com/ortegatron/playing_smplifyx/blob/master/smplifyx/utils.py
-    return openpose_to_smpl
-
-
-joint_names_body_25 = {
-    "hip-left": 9,
-    "hip-right": 12,
-    "belly": 8,
-    "knee-left": 10,
-    "knee-right": 13,
-    "ankle-left": 11,
-    "ankle-right": 14,
-    "toes-left": 22,
-    "toes-right": 19,
-    "neck": 1,
-    "head": 0,
-    "shoulder-left": 2,
-    "shoulder-right": 5,
-    "elbow-left": 3,
-    "elbow-right": 6,
-    "hand-left": 4,
-    "hand-right": 7,
-}
-
-
-def get_named_joint(joints: List, name: str, type="smpl"):
-    """get SMPL joint by name
-
-    Args:
-        joints (List): list of SMPL joints
-        name (str): joint to be extracted
-
-    Returns:
-        Tuple[float, float, float]: Coordinates of the selected joint
-    """
-    if type == "smpl":
-        mapping = get_mapping_arr()
-        index = joint_names_body_25[name]
-        return joints[np.where(mapping == index)]
-    if type == "body_25":
-        return joints[joint_names_body_25[name]]
-
-
-def get_named_joints(joints: List, names: List[str], type="smpl"):
-    return [get_named_joint(joints, name, type=type) for name in names]
-
-
-def apply_mapping(
-        input_data: List,
-        mapping: list):
-    return [input_data[i] if i != -1 else (0, 0, 0) for i in mapping]
-
-
-def openpose_to_opengl_coords(
-    input_data: List[Tuple[float, float]],
-    real_width: int,
-    real_height: int
-) -> (List[Tuple[float, float, float]], List[float]):
-    """converts a list of OpenPose 2d keypoints with confidence to a opengl coordinate system 3d point list and a confidence array
-
-    Args:
-        input_data (List[Tuple[float, float]]): [description]
-        real_width (int): OpenPose input image/data width
-        real_height (int): OpenPose input image/data height
-
-
-    Returns:
-        [type]: [description]
-    """
-
-    points = np.array([
-        [
-            x / real_width * 2 - 1,
-            -y / real_height * 2 + 1,
-            0
-        ] for (x, y, z) in input_data])
-
-    conf = np.array([
-        z for (_, _, z) in input_data
-    ])
-
-    return (points, conf)
-
-
-def render_model(
-    scene,
-    model,
-    model_out,
-    color=[0.3, 0.3, 0.3, 0.8],
-    name=None,
-    replace=False,
-):
-    vertices = model_out.vertices.detach().cpu().numpy().squeeze()
-
-    # set vertex colors, maybe use this to highlight accuracies
-    vertex_colors = np.ones([vertices.shape[0], 4]) * color
-
-    # triangulate vertex mesh
-    tri_mesh = trimesh.Trimesh(vertices, model.faces,
-                               vertex_colors=vertex_colors)
-
-    mesh = pyrender.Mesh.from_trimesh(tri_mesh)
-
-    if name is not None and replace:
-        for node in scene.get_nodes(name=name):
-            scene.remove_node(node)
-
-    return scene.add(mesh, name=name)
-
-
-def render_points(scene, points, radius=0.005, color=[0.0, 0.0, 1.0, 1.0], name=None):
-    sm = trimesh.creation.uv_sphere(radius=radius)
-    sm.visual.vertex_colors = color
-    tfs = np.tile(np.eye(4), (len(points), 1, 1))
-    tfs[:, :3, 3] = points
-    pcl = pyrender.Mesh.from_trimesh(sm, poses=tfs)
-    # return the render scsene node
-    return scene.add(pcl, name=name)
-
-
-def estimate_scale(joints, keypoints, pairs=[
-    ("shoulder-right", "hip-right"),
-    ("shoulder-left", "hip-left")
-], cam_fy=1):
-    """estimate image depth based on the height changes due to perspective.
-    This method only provides a rough estimate by computing shoulder to hip distances
-    between SMPL joints and OpenPose keypoints.
-
-    Args:
-        joints ([type]): List of all SMPL joints
-        keypoints ([type]): List of all OpenPose keypoints
-        cam_fy (int, optional): Camera Y focal length. Defaults to 1.
-    """
-
-    # store distance vectors
-    smpl_dists = []
-    ops_dists = []
-
-    for (j1, j2) in pairs:
-        smpl_joints = get_named_joints(joints, [j1, j2])
-        ops_keyp = get_named_joints(keypoints, [j1, j2])
-
-        smpl_dists.append(smpl_joints[0] - smpl_joints[1])
-        ops_dists.append(ops_keyp[0] - ops_keyp[1])
-
-    smpl_height = np.linalg.norm(smpl_dists, axis=1).mean()
-    ops_height = np.linalg.norm(ops_dists, axis=1).mean()
-
-    return cam_fy * smpl_height / ops_height
-
-def estimate_focal_length(run_estimation: bool = False):
-    """
-    Estimate focal length by selecting a region of image whose real width is known.
-    Executed once to compute a camera intrinsics matrix.
-    For now, focal length = 1000
-
-    :return: focal_length
-    """
-
-    # TODO: adjust known distances with more precise values if this method works
-
-    if run_estimation:
-        image = cv2.imread('samples/001.jpg')
-        cv2.imshow("image", image)
-        marker = cv2.selectROI("image", image, fromCenter=False, showCrosshair=True)
-
-        # width of the selected region (object) in the image
-        region_width = marker[2]
-
-        # known real distance from the camera to the object
-        known_distance = 200
-
-        # known real width of the object
-        known_width = 50
-
-        focal_length = (region_width * known_distance) / known_width
-        print("Focal length:", focal_length)
-    else:
-        focal_length = 1000
-
-    return focal_length
--- a/utils/init.py
+++ b/utils/init.py
--- a/utils/general.py
+++ b/utils/general.py
@ -0,0 +1,79 @@
+from typing import List, Set, Dict, Tuple, Optional
+from utils.mapping import get_named_joints
+import numpy as np
+import cv2
+import yaml
+
+
+def load_config():
+    with open('./config.yaml') as file:
+        # The FullLoader parameter handles the conversion from YAML
+        # scalar values to Python the dictionary format
+        config = yaml.load(file, Loader=yaml.FullLoader)
+
+    return config
+
+
+def estimate_scale(joints, keypoints, pairs=[
+    ("shoulder-right", "hip-right"),
+    ("shoulder-left", "hip-left")
+], cam_fy=1):
+    """estimate image depth based on the height changes due to perspective.
+    This method only provides a rough estimate by computing shoulder to hip distances
+    between SMPL joints and OpenPose keypoints.
+
+    Args:
+        joints ([type]): List of all SMPL joints
+        keypoints ([type]): List of all OpenPose keypoints
+        cam_fy (int, optional): Camera Y focal length. Defaults to 1.
+    """
+
+    # store distance vectors
+    smpl_dists = []
+    ops_dists = []
+
+    for (j1, j2) in pairs:
+        smpl_joints = get_named_joints(joints, [j1, j2])
+        ops_keyp = get_named_joints(keypoints, [j1, j2])
+
+        smpl_dists.append(smpl_joints[0] - smpl_joints[1])
+        ops_dists.append(ops_keyp[0] - ops_keyp[1])
+
+    smpl_height = np.linalg.norm(smpl_dists, axis=1).mean()
+    ops_height = np.linalg.norm(ops_dists, axis=1).mean()
+
+    return cam_fy * smpl_height / ops_height
+
+
+def estimate_focal_length(run_estimation: bool = False):
+    """
+    Estimate focal length by selecting a region of image whose real width is known.
+    Executed once to compute a camera intrinsics matrix.
+    For now, focal length = 1000
+
+    :return: focal_length
+    """
+
+    # TODO: adjust known distances with more precise values if this method works
+
+    if run_estimation:
+        image = cv2.imread('samples/001.jpg')
+        cv2.imshow("image", image)
+        marker = cv2.selectROI(
+            "image", image, fromCenter=False, showCrosshair=True)
+
+        # width of the selected region (object) in the image
+        region_width = marker[2]
+
+        # known real distance from the camera to the object
+        known_distance = 200
+
+        # known real width of the object
+        known_width = 50
+
+        focal_length = (region_width * known_distance) / known_width
+        print("Focal length:", focal_length)
+    else:
+        focal_length = 1000
+
+    return focal_length
--- a/utils/mapping.py
+++ b/utils/mapping.py
@ -0,0 +1,142 @@
+
+from typing import List, Set, Dict, Tuple, Optional
+import numpy as np
+from trimesh.triangles import normals
+
+openpose_to_smpl = np.array([
+    8,  # hip - middle
+    12,  # hip - right
+    9,  # hip - left
+    -1,  # body center (belly, not present in body_25)
+    13,  # left knee
+    10,  # right knee,
+        -1,
+    14,  # left ankle
+    11,  # right ankle
+        -1,
+        -1,
+        -1,
+    1,  # chest
+        -1,
+        -1,
+        -1,
+    5,  # left shoulder
+    2,  # right shoulder
+    6,  # left elbow
+    3,  # right elbow
+    7,  # left hand
+    4,  # right hand
+        -1,
+        -1,
+    0,  # head
+    15,
+    16,
+    17,
+    18,
+    19,  # left toe
+    20,
+    21,
+    22,  # right toe
+    23,
+    24,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+])
+
+
+def get_mapping_arr(
+    input_format: str = "body_25",
+    output_format: str = "smpl",
+) -> list:
+    # TODO: expand features as needed
+    # based on mappings found here
+    # https://github.com/ortegatron/playing_smplifyx/blob/master/smplifyx/utils.py
+    return openpose_to_smpl
+
+
+joint_names_body_25 = {
+    "hip-left": 9,
+    "hip-right": 12,
+    "belly": 8,
+    "knee-left": 10,
+    "knee-right": 13,
+    "ankle-left": 11,
+    "ankle-right": 14,
+    "toes-left": 22,
+    "toes-right": 19,
+    "neck": 1,
+    "head": 0,
+    "shoulder-left": 2,
+    "shoulder-right": 5,
+    "elbow-left": 3,
+    "elbow-right": 6,
+    "hand-left": 4,
+    "hand-right": 7,
+}
+
+
+def get_named_joint(joints: List, name: str, type="smpl"):
+    """get SMPL joint by name
+
+    Args:
+        joints (List): list of SMPL joints
+        name (str): joint to be extracted
+
+    Returns:
+        Tuple[float, float, float]: Coordinates of the selected joint
+    """
+    if type == "smpl":
+        mapping = get_mapping_arr()
+        index = joint_names_body_25[name]
+        return joints[np.where(mapping == index)]
+    if type == "body_25":
+        return joints[joint_names_body_25[name]]
+
+
+def get_named_joints(joints: List, names: List[str], type="smpl"):
+    return [get_named_joint(joints, name, type=type) for name in names]
+
+
+def apply_mapping(
+        input_data: List,
+        mapping: list):
+    return [input_data[i] if i != -1 else (0, 0, 0) for i in mapping]
+
+
+def openpose_to_opengl_coords(
+    input_data: List[Tuple[float, float]],
+    real_width: int,
+    real_height: int
+) -> (List[Tuple[float, float, float]], List[float]):
+    """converts a list of OpenPose 2d keypoints with confidence to a opengl coordinate system 3d point list and a confidence array
+
+    Args:
+        input_data (List[Tuple[float, float]]): [description]
+        real_width (int): OpenPose input image/data width
+        real_height (int): OpenPose input image/data height
+
+
+    Returns:
+        [type]: [description]
+    """
+
+    points = np.array([
+        [
+            x / real_width * 2 - 1,
+            -y / real_height * 2 + 1,
+            0
+        ] for (x, y, z) in input_data])
+
+    conf = np.array([
+        z for (_, _, z) in input_data
+    ])
+
+    return (points, conf)
--- a/utils/render.py
+++ b/utils/render.py
@ -0,0 +1,65 @@
+from typing import List, Set, Dict, Tuple, Optional
+import numpy as np
+import trimesh
+import pyrender
+
+
+def render_model(
+    scene,
+    model,
+    model_out,
+    color=[0.3, 0.3, 0.3, 0.8],
+    name=None,
+    replace=False,
+    pose=None
+):
+    vertices = model_out.vertices.detach().cpu().numpy().squeeze()
+
+    # set vertex colors, maybe use this to highlight accuracies
+    vertex_colors = np.ones([vertices.shape[0], 4]) * color
+
+    # triangulate vertex mesh
+    tri_mesh = trimesh.Trimesh(vertices, model.faces,
+                               vertex_colors=vertex_colors)
+
+    mesh = pyrender.Mesh.from_trimesh(tri_mesh)
+
+    if name is not None and replace:
+        for node in scene.get_nodes(name=name):
+            scene.remove_node(node)
+
+    return scene.add(mesh, name=name, pose=pose)
+
+
+def render_points(scene, points, radius=0.005, color=[0.0, 0.0, 1.0, 1.0], name=None):
+    sm = trimesh.creation.uv_sphere(radius=radius)
+    sm.visual.vertex_colors = color
+    tfs = np.tile(np.eye(4), (len(points), 1, 1))
+    tfs[:, :3, 3] = points
+    pcl = pyrender.Mesh.from_trimesh(sm, poses=tfs)
+    # return the render scene node
+    return scene.add(pcl, name=name)
+
+
+def render_camera(scene, radius=0.5, height=0.5, color=[0.0, 0.0, 1.0, 1.0], name=None):
+
+    sm = trimesh.creation.cone(radius, height, sections=None, transform=None)
+    sm.visual.vertex_colors = color
+    tfs = np.eye(4)
+    pcl = pyrender.Mesh.from_trimesh(sm, poses=tfs)
+    # return the render scene node
+    return scene.add(pcl, name=name)
+
+
+def render_image_plane(scene, image, name=None):
+    height, width, _ = image.shape
+    mat = trimesh.visual.texture.TextureVisuals(
+        image=image, uv=[[0, 0], [0, 1], [1, 0], [1, 1]])
+    tm = trimesh.load('plane.obj', visual=mat)
+    tm.visual = mat
+    tfs = np.eye(4)
+    tfs[0, 0] = width / height
+    tfs[0, 3] = 0.75
+    material2 = pyrender.Material(name=name, emissiveTexture=image)
+    m = pyrender.Mesh.from_trimesh(tm, poses=tfs)
+    return scene.add(m, name=name)