mirror of
https://github.com/gosticks/body-pose-animation.git
synced 2025-10-16 11:45:42 +00:00
making camera work again
This commit is contained in:
parent
eb1b5876a8
commit
36cd2c6648
4
.gitignore
vendored
4
.gitignore
vendored
@ -101,4 +101,6 @@ vposer_v1_0
|
||||
results/
|
||||
output/
|
||||
tests/
|
||||
samples/video*
|
||||
samples/
|
||||
raw/
|
||||
presentation/
|
||||
@ -33,7 +33,7 @@ This should copy and rename the SMPL model to the correct folders. Either way th
|
||||
To use `bodyPrior` in the configuration please download vposer and plate it into `./vposer_v1_0` directory in the project root. Vposer can be downloaded from this link after creating an account with SMPL-X [link](https://psfiles.is.tuebingen.mpg.de/downloads/smplx/vposer_v1_0-zip)
|
||||
|
||||
### Mesh intersection
|
||||
To use `intersectLoss` in the configuration please pull the [github](https://github.com/gosticks/torch-mesh-isect) repo. This repo is patched to run on the newer versions of pytorch.
|
||||
To use `intersectLoss` in the configuration please pull the [github](https://github.com/gosticks/torch-mesh-isect) repo. This repo is patched to run on the newer versions of pytorch.
|
||||
Note: It only runs for Linux based operating systems. We had troubles getting it to work on Windows.
|
||||
|
||||
### Conda Environment
|
||||
|
||||
@ -19,8 +19,8 @@ camera:
|
||||
patience: 10
|
||||
optimizer: Adam
|
||||
orientation:
|
||||
lr: 0.03
|
||||
optimizer: LBFGS
|
||||
lr: 0.5
|
||||
optimizer: Adam
|
||||
iterations: 5
|
||||
joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization
|
||||
pose:
|
||||
|
||||
84
example_fit_camera.py
Normal file
84
example_fit_camera.py
Normal file
@ -0,0 +1,84 @@
|
||||
import pickle
|
||||
import time
|
||||
from train import create_animation
|
||||
from dataset import SMPLyDataset
|
||||
from model import *
|
||||
from utils.general import *
|
||||
from renderer import *
|
||||
from utils.general import rename_files, get_new_filename
|
||||
|
||||
START_IDX = 1 # starting index of the frame to optimize for
|
||||
FINISH_IDX = 60 # choose a big number to optimize for all frames in samples directory
|
||||
# if False, only run already saved animation without optimization
|
||||
RUN_OPTIMIZATION = True
|
||||
|
||||
result_image = []
|
||||
idx = START_IDX
|
||||
|
||||
device = torch.device('cpu')
|
||||
dtype = torch.float32
|
||||
|
||||
config = load_config()
|
||||
dataset = SMPLyDataset.from_config(config)
|
||||
model = SMPLyModel.model_from_conf(config)
|
||||
|
||||
|
||||
# Rename files in samples directory to uniform format
|
||||
if config['data']['renameFiles']:
|
||||
rename_files(config['data']['rootDir'] + "/")
|
||||
|
||||
|
||||
'''
|
||||
Optimization part without visualization
|
||||
'''
|
||||
if RUN_OPTIMIZATION:
|
||||
model_outs, filename = create_animation(
|
||||
dataset,
|
||||
config,
|
||||
START_IDX,
|
||||
FINISH_IDX,
|
||||
verbose=False,
|
||||
offscreen=True,
|
||||
save_to_file=True,
|
||||
interpolate=False
|
||||
)
|
||||
|
||||
|
||||
def replay_animation(file, start_frame=0, end_frame=None, with_background=False, fps=30, interpolated=False):
|
||||
r = Renderer()
|
||||
r.start()
|
||||
|
||||
model_anim = SMPLyModel.model_from_conf(config)
|
||||
|
||||
with open(file, "rb") as fp:
|
||||
results = pickle.load(fp)
|
||||
|
||||
if end_frame is None:
|
||||
end_frame = len(results)
|
||||
|
||||
for model, camera_transform in results[start_frame::]:
|
||||
if interpolated:
|
||||
vertices = model
|
||||
else:
|
||||
vertices = model.vertices
|
||||
|
||||
r.render_model_geometry(
|
||||
faces=model_anim.faces,
|
||||
vertices=vertices,
|
||||
pose=camera_transform
|
||||
)
|
||||
|
||||
time.sleep(1 / fps)
|
||||
|
||||
|
||||
'''
|
||||
Play the animation.
|
||||
'''
|
||||
if RUN_OPTIMIZATION:
|
||||
anim_file = filename
|
||||
else:
|
||||
results_dir = config['output']['rootDir']
|
||||
result_prefix = config['output']['prefix']
|
||||
anim_file = results_dir + result_prefix + "0.pkl"
|
||||
|
||||
replay_animation(anim_file, interpolated=True)
|
||||
@ -9,7 +9,7 @@ from renderer import *
|
||||
from utils.general import rename_files, get_new_filename
|
||||
|
||||
START_IDX = 150 # starting index of the frame to optimize for
|
||||
FINISH_IDX = 300 # choose a big number to optimize for all frames in samples directory
|
||||
FINISH_IDX = 400 # choose a big number to optimize for all frames in samples directory
|
||||
|
||||
result_image = []
|
||||
idx = START_IDX
|
||||
|
||||
0
examples/__init__.py
Normal file
0
examples/__init__.py
Normal file
84
modules/perspective_cam.py
Normal file
84
modules/perspective_cam.py
Normal file
@ -0,0 +1,84 @@
|
||||
from collections import namedtuple
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from smplx.lbs import transform_mat
|
||||
|
||||
|
||||
class PerspectiveCamera(nn.Module):
|
||||
|
||||
FOCAL_LENGTH = 5000
|
||||
|
||||
def __init__(self, rotation=None, translation=None,
|
||||
focal_length_x=None, focal_length_y=None,
|
||||
batch_size=1,
|
||||
center=None, dtype=torch.float32, **kwargs):
|
||||
super(PerspectiveCamera, self).__init__()
|
||||
self.batch_size = batch_size
|
||||
self.dtype = dtype
|
||||
# Make a buffer so that PyTorch does not complain when creating
|
||||
# the camera matrix
|
||||
self.register_buffer('zero',
|
||||
torch.zeros([batch_size], dtype=dtype))
|
||||
|
||||
if focal_length_x is None or type(focal_length_x) == float:
|
||||
focal_length_x = torch.full(
|
||||
[batch_size],
|
||||
self.FOCAL_LENGTH if focal_length_x is None else
|
||||
focal_length_x,
|
||||
dtype=dtype)
|
||||
|
||||
if focal_length_y is None or type(focal_length_y) == float:
|
||||
focal_length_y = torch.full(
|
||||
[batch_size],
|
||||
self.FOCAL_LENGTH if focal_length_y is None else
|
||||
focal_length_y,
|
||||
dtype=dtype)
|
||||
|
||||
self.register_buffer('focal_length_x', focal_length_x)
|
||||
self.register_buffer('focal_length_y', focal_length_y)
|
||||
|
||||
if center is None:
|
||||
center = torch.zeros([batch_size, 2], dtype=dtype)
|
||||
self.register_buffer('center', center)
|
||||
|
||||
if rotation is None:
|
||||
rotation = torch.eye(
|
||||
3, dtype=dtype).unsqueeze(dim=0).repeat(batch_size, 1, 1)
|
||||
|
||||
rotation = nn.Parameter(rotation, requires_grad=True)
|
||||
self.register_parameter('rotation', rotation)
|
||||
|
||||
if translation is None:
|
||||
translation = torch.zeros([batch_size, 3], dtype=dtype)
|
||||
|
||||
translation = nn.Parameter(translation,
|
||||
requires_grad=True)
|
||||
self.register_parameter('translation', translation)
|
||||
|
||||
def forward(self, points):
|
||||
device = points.device
|
||||
|
||||
with torch.no_grad():
|
||||
camera_mat = torch.zeros([self.batch_size, 2, 2],
|
||||
dtype=self.dtype, device=points.device)
|
||||
camera_mat[:, 0, 0] = self.focal_length_x
|
||||
camera_mat[:, 1, 1] = self.focal_length_y
|
||||
|
||||
camera_transform = transform_mat(self.rotation,
|
||||
self.translation.unsqueeze(dim=-1))
|
||||
homog_coord = torch.ones(list(points.shape)[:-1] + [1],
|
||||
dtype=points.dtype,
|
||||
device=device)
|
||||
# Convert the points to homogeneous coordinates
|
||||
points_h = torch.cat([points, homog_coord], dim=-1)
|
||||
|
||||
projected_points = torch.einsum('bki,bji->bjk',
|
||||
[camera_transform, points_h])
|
||||
|
||||
img_points = torch.div(projected_points[:, :, :2],
|
||||
projected_points[:, :, 2].unsqueeze(dim=-1))
|
||||
img_points = torch.einsum('bki,bji->bjk', [camera_mat, img_points]) \
|
||||
+ self.center.unsqueeze(dim=1)
|
||||
return img_points
|
||||
20
train.py
20
train.py
@ -60,12 +60,12 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
|
||||
device = torch.device('cpu')
|
||||
|
||||
# get camera estimation
|
||||
pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
|
||||
cam=camera,
|
||||
use_intrinsics=config['pose']['useCameraIntrinsics'],
|
||||
dtype=dtype,
|
||||
device=device,
|
||||
)
|
||||
# pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
|
||||
# cam=camera,
|
||||
# use_intrinsics=config['pose']['useCameraIntrinsics'],
|
||||
# dtype=dtype,
|
||||
# device=device,
|
||||
# )
|
||||
|
||||
params = defaultdict(
|
||||
body_pose=initial_pose,
|
||||
@ -74,15 +74,17 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
|
||||
with torch.no_grad():
|
||||
model(**params)
|
||||
|
||||
r.start()
|
||||
|
||||
# apply transform to scene
|
||||
if r is not None:
|
||||
r.set_group_pose("body", cam_trans.cpu().numpy())
|
||||
# if r is not None:
|
||||
#r.set_group_pose("body", cam_trans.cpu().numpy())
|
||||
|
||||
global_orient = train_orient_with_conf(
|
||||
config=config,
|
||||
model=model,
|
||||
keypoints=keypoints,
|
||||
camera_layer=pose_camera,
|
||||
camera_layer=None, # pose_camera,
|
||||
renderer=r,
|
||||
device=device,
|
||||
use_progress_bar=verbose,
|
||||
|
||||
0
train_camera.py
Normal file
0
train_camera.py
Normal file
@ -1,4 +1,5 @@
|
||||
from utils.mapping import get_indices_by_name
|
||||
from modules.perspective_cam import PerspectiveCamera
|
||||
from utils.mapping import get_indices_by_name, opengl_to_screen_space
|
||||
from modules.distance_loss import WeightedMSELoss
|
||||
from modules.utils import get_loss_layers
|
||||
from camera_estimation import TorchCameraEstimate
|
||||
@ -52,10 +53,18 @@ def train_orient(
|
||||
)
|
||||
|
||||
# make sure camera module is on the correct device
|
||||
camera = camera.to(device=device, dtype=dtype)
|
||||
#camera = camera.to(device=device, dtype=dtype)
|
||||
pers_cam = PerspectiveCamera(
|
||||
dtype=dtype, device=device,
|
||||
center=torch.tensor([[1920/2, 1080/2]], dtype=dtype),
|
||||
focal_length_x=850.0,
|
||||
focal_length_y=850.0
|
||||
).to(device=device)
|
||||
|
||||
# setup keypoint data
|
||||
keypoints = torch.tensor(keypoints).to(device=device, dtype=dtype)
|
||||
# keypoints = opengl_to_screen_space(keypoints, (1920, 1080))
|
||||
# do some janky conversion back to pixel :(
|
||||
|
||||
# torso indices
|
||||
torso_indices = get_indices_by_name(joint_names)
|
||||
@ -67,7 +76,7 @@ def train_orient(
|
||||
pose_layer = BodyPose(model, dtype=dtype, device=device,
|
||||
useBodyMeanAngles=False).to(device=device, dtype=dtype)
|
||||
|
||||
parameters = [model.global_orient]
|
||||
parameters = [model.global_orient, pers_cam.rotation, pers_cam.translation]
|
||||
|
||||
if use_progress_bar:
|
||||
pbar = tqdm(total=iterations)
|
||||
@ -85,19 +94,23 @@ def train_orient(
|
||||
|
||||
optimizer = optimizer(parameters, learning_rate)
|
||||
|
||||
print(keypoints[0][0])
|
||||
body_joints, cur_pose = pose_layer()
|
||||
body_joints = opengl_to_screen_space(body_joints.clone(), (1080, 1080))
|
||||
print(body_joints[0][0])
|
||||
|
||||
# prediction and loss computation closere
|
||||
def predict():
|
||||
# return joints based on current model state
|
||||
body_joints, cur_pose = pose_layer()
|
||||
|
||||
# body_joints = opengl_to_screen_space(body_joints.clone(), (1920, 1080))
|
||||
# compute homogeneous coordinates and project them to 2D space
|
||||
points = tgm.convert_points_to_homogeneous(body_joints)
|
||||
points = camera(points).squeeze()
|
||||
|
||||
#points = tgm.convert_points_to_homogeneous(body_joints)
|
||||
points = pers_cam(body_joints).squeeze()
|
||||
print(points[0][0])
|
||||
# compute loss between 2D joint projection and OpenPose keypoints
|
||||
loss = loss_layer(points[torso_indices],
|
||||
keypoints[torso_indices])
|
||||
|
||||
keypoints[torso_indices][:, :2])
|
||||
return loss
|
||||
|
||||
# main optimizer closure
|
||||
@ -112,10 +125,10 @@ def train_orient(
|
||||
return loss
|
||||
|
||||
# camera translation
|
||||
R = camera.trans.detach().cpu().numpy().squeeze()
|
||||
#R = camera.trans.detach().cpu().numpy().squeeze()
|
||||
|
||||
# main optimization loop
|
||||
for t in range(iterations):
|
||||
for t in range(2000):
|
||||
loss = optimizer.step(optim_closure)
|
||||
|
||||
# compute loss
|
||||
@ -136,12 +149,14 @@ def train_orient(
|
||||
pbar.set_description("Error %f" % cur_loss)
|
||||
pbar.update(1)
|
||||
|
||||
if renderer is not None and render_steps:
|
||||
renderer.render_model(
|
||||
model=model,
|
||||
model_out=pose_layer.cur_out,
|
||||
transform=R
|
||||
)
|
||||
# if renderer is not None and render_steps:
|
||||
# renderer.render_model(
|
||||
# model=model,
|
||||
# model_out=pose_layer.cur_out,
|
||||
# transform=R
|
||||
# )
|
||||
|
||||
print("translation", pers_cam.translation)
|
||||
|
||||
if use_progress_bar:
|
||||
pbar.close()
|
||||
|
||||
@ -106,7 +106,7 @@ def estimate_scale(joints, keypoints, pairs=[
|
||||
smpl_height = np.linalg.norm(smpl_dists, axis=0).mean()
|
||||
ops_height = np.linalg.norm(ops_dists, axis=0).mean()
|
||||
|
||||
return cam_fy / 1080 * smpl_height / ops_height
|
||||
return smpl_height / ops_height
|
||||
|
||||
|
||||
def estimate_focal_length(run_estimation: bool = False):
|
||||
@ -192,7 +192,7 @@ def setup_training(model, dataset, sample_index, renderer=True, offscreen=False)
|
||||
est_scale = estimate_scale(joints, keypoints)
|
||||
|
||||
# apply scaling to keypoints
|
||||
keypoints = keypoints * est_scale
|
||||
keypoints = keypoints # * est_scale
|
||||
|
||||
# integrating Camera Estimation
|
||||
|
||||
|
||||
@ -278,13 +278,15 @@ def openpose_to_opengl_coords(
|
||||
[type]: [description]
|
||||
"""
|
||||
|
||||
points = np.array([
|
||||
[
|
||||
x / real_width * 2 - 1,
|
||||
-y / real_height * 2 + 1,
|
||||
0
|
||||
] for (x, y, z) in input_data])
|
||||
# points = np.array([
|
||||
# [
|
||||
# x / real_width * 2 - 1,
|
||||
# -y / real_height * 2 + 1,
|
||||
# 0
|
||||
# ] for (x, y, z) in input_data])
|
||||
|
||||
points = np.array(input_data)[:, :3]
|
||||
points[:, 2] = 0
|
||||
conf = np.array([
|
||||
z for (_, _, z) in input_data
|
||||
])
|
||||
@ -292,6 +294,13 @@ def openpose_to_opengl_coords(
|
||||
return (points, conf)
|
||||
|
||||
|
||||
def opengl_to_screen_space(points, size):
|
||||
points[:, 0] = (points[:, 0] + 1) / 2 * size[0]
|
||||
points[:, 1] = -((points[:, 1] - 1) / 2 * size[1])
|
||||
points[:, 2] = ((points[:, 2] - 1) / 2 * size[1])
|
||||
return points
|
||||
|
||||
|
||||
def smpl_to_openpose(print_mapping: True):
|
||||
"""Utility for remapping smpl mapping indices to openpose mapping indices.
|
||||
|
||||
|
||||
@ -93,7 +93,7 @@ def save_to_video(
|
||||
r.render_model_geometry(
|
||||
faces=model_anim.faces,
|
||||
vertices=vertices,
|
||||
pose=cam_trans # cam_transform,
|
||||
pose=cam_transform # cam_transform,
|
||||
)
|
||||
frames.append(r.get_snapshot())
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user