mirror of
https://github.com/gosticks/body-pose-animation.git
synced 2025-10-16 11:45:42 +00:00
making camera work again
This commit is contained in:
parent
eb1b5876a8
commit
36cd2c6648
4
.gitignore
vendored
4
.gitignore
vendored
@ -101,4 +101,6 @@ vposer_v1_0
|
|||||||
results/
|
results/
|
||||||
output/
|
output/
|
||||||
tests/
|
tests/
|
||||||
samples/video*
|
samples/
|
||||||
|
raw/
|
||||||
|
presentation/
|
||||||
@ -19,8 +19,8 @@ camera:
|
|||||||
patience: 10
|
patience: 10
|
||||||
optimizer: Adam
|
optimizer: Adam
|
||||||
orientation:
|
orientation:
|
||||||
lr: 0.03
|
lr: 0.5
|
||||||
optimizer: LBFGS
|
optimizer: Adam
|
||||||
iterations: 5
|
iterations: 5
|
||||||
joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization
|
joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization
|
||||||
pose:
|
pose:
|
||||||
|
|||||||
84
example_fit_camera.py
Normal file
84
example_fit_camera.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
import pickle
|
||||||
|
import time
|
||||||
|
from train import create_animation
|
||||||
|
from dataset import SMPLyDataset
|
||||||
|
from model import *
|
||||||
|
from utils.general import *
|
||||||
|
from renderer import *
|
||||||
|
from utils.general import rename_files, get_new_filename
|
||||||
|
|
||||||
|
START_IDX = 1 # starting index of the frame to optimize for
|
||||||
|
FINISH_IDX = 60 # choose a big number to optimize for all frames in samples directory
|
||||||
|
# if False, only run already saved animation without optimization
|
||||||
|
RUN_OPTIMIZATION = True
|
||||||
|
|
||||||
|
result_image = []
|
||||||
|
idx = START_IDX
|
||||||
|
|
||||||
|
device = torch.device('cpu')
|
||||||
|
dtype = torch.float32
|
||||||
|
|
||||||
|
config = load_config()
|
||||||
|
dataset = SMPLyDataset.from_config(config)
|
||||||
|
model = SMPLyModel.model_from_conf(config)
|
||||||
|
|
||||||
|
|
||||||
|
# Rename files in samples directory to uniform format
|
||||||
|
if config['data']['renameFiles']:
|
||||||
|
rename_files(config['data']['rootDir'] + "/")
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Optimization part without visualization
|
||||||
|
'''
|
||||||
|
if RUN_OPTIMIZATION:
|
||||||
|
model_outs, filename = create_animation(
|
||||||
|
dataset,
|
||||||
|
config,
|
||||||
|
START_IDX,
|
||||||
|
FINISH_IDX,
|
||||||
|
verbose=False,
|
||||||
|
offscreen=True,
|
||||||
|
save_to_file=True,
|
||||||
|
interpolate=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def replay_animation(file, start_frame=0, end_frame=None, with_background=False, fps=30, interpolated=False):
|
||||||
|
r = Renderer()
|
||||||
|
r.start()
|
||||||
|
|
||||||
|
model_anim = SMPLyModel.model_from_conf(config)
|
||||||
|
|
||||||
|
with open(file, "rb") as fp:
|
||||||
|
results = pickle.load(fp)
|
||||||
|
|
||||||
|
if end_frame is None:
|
||||||
|
end_frame = len(results)
|
||||||
|
|
||||||
|
for model, camera_transform in results[start_frame::]:
|
||||||
|
if interpolated:
|
||||||
|
vertices = model
|
||||||
|
else:
|
||||||
|
vertices = model.vertices
|
||||||
|
|
||||||
|
r.render_model_geometry(
|
||||||
|
faces=model_anim.faces,
|
||||||
|
vertices=vertices,
|
||||||
|
pose=camera_transform
|
||||||
|
)
|
||||||
|
|
||||||
|
time.sleep(1 / fps)
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Play the animation.
|
||||||
|
'''
|
||||||
|
if RUN_OPTIMIZATION:
|
||||||
|
anim_file = filename
|
||||||
|
else:
|
||||||
|
results_dir = config['output']['rootDir']
|
||||||
|
result_prefix = config['output']['prefix']
|
||||||
|
anim_file = results_dir + result_prefix + "0.pkl"
|
||||||
|
|
||||||
|
replay_animation(anim_file, interpolated=True)
|
||||||
@ -9,7 +9,7 @@ from renderer import *
|
|||||||
from utils.general import rename_files, get_new_filename
|
from utils.general import rename_files, get_new_filename
|
||||||
|
|
||||||
START_IDX = 150 # starting index of the frame to optimize for
|
START_IDX = 150 # starting index of the frame to optimize for
|
||||||
FINISH_IDX = 300 # choose a big number to optimize for all frames in samples directory
|
FINISH_IDX = 400 # choose a big number to optimize for all frames in samples directory
|
||||||
|
|
||||||
result_image = []
|
result_image = []
|
||||||
idx = START_IDX
|
idx = START_IDX
|
||||||
|
|||||||
0
examples/__init__.py
Normal file
0
examples/__init__.py
Normal file
84
modules/perspective_cam.py
Normal file
84
modules/perspective_cam.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
from smplx.lbs import transform_mat
|
||||||
|
|
||||||
|
|
||||||
|
class PerspectiveCamera(nn.Module):
|
||||||
|
|
||||||
|
FOCAL_LENGTH = 5000
|
||||||
|
|
||||||
|
def __init__(self, rotation=None, translation=None,
|
||||||
|
focal_length_x=None, focal_length_y=None,
|
||||||
|
batch_size=1,
|
||||||
|
center=None, dtype=torch.float32, **kwargs):
|
||||||
|
super(PerspectiveCamera, self).__init__()
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.dtype = dtype
|
||||||
|
# Make a buffer so that PyTorch does not complain when creating
|
||||||
|
# the camera matrix
|
||||||
|
self.register_buffer('zero',
|
||||||
|
torch.zeros([batch_size], dtype=dtype))
|
||||||
|
|
||||||
|
if focal_length_x is None or type(focal_length_x) == float:
|
||||||
|
focal_length_x = torch.full(
|
||||||
|
[batch_size],
|
||||||
|
self.FOCAL_LENGTH if focal_length_x is None else
|
||||||
|
focal_length_x,
|
||||||
|
dtype=dtype)
|
||||||
|
|
||||||
|
if focal_length_y is None or type(focal_length_y) == float:
|
||||||
|
focal_length_y = torch.full(
|
||||||
|
[batch_size],
|
||||||
|
self.FOCAL_LENGTH if focal_length_y is None else
|
||||||
|
focal_length_y,
|
||||||
|
dtype=dtype)
|
||||||
|
|
||||||
|
self.register_buffer('focal_length_x', focal_length_x)
|
||||||
|
self.register_buffer('focal_length_y', focal_length_y)
|
||||||
|
|
||||||
|
if center is None:
|
||||||
|
center = torch.zeros([batch_size, 2], dtype=dtype)
|
||||||
|
self.register_buffer('center', center)
|
||||||
|
|
||||||
|
if rotation is None:
|
||||||
|
rotation = torch.eye(
|
||||||
|
3, dtype=dtype).unsqueeze(dim=0).repeat(batch_size, 1, 1)
|
||||||
|
|
||||||
|
rotation = nn.Parameter(rotation, requires_grad=True)
|
||||||
|
self.register_parameter('rotation', rotation)
|
||||||
|
|
||||||
|
if translation is None:
|
||||||
|
translation = torch.zeros([batch_size, 3], dtype=dtype)
|
||||||
|
|
||||||
|
translation = nn.Parameter(translation,
|
||||||
|
requires_grad=True)
|
||||||
|
self.register_parameter('translation', translation)
|
||||||
|
|
||||||
|
def forward(self, points):
|
||||||
|
device = points.device
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
camera_mat = torch.zeros([self.batch_size, 2, 2],
|
||||||
|
dtype=self.dtype, device=points.device)
|
||||||
|
camera_mat[:, 0, 0] = self.focal_length_x
|
||||||
|
camera_mat[:, 1, 1] = self.focal_length_y
|
||||||
|
|
||||||
|
camera_transform = transform_mat(self.rotation,
|
||||||
|
self.translation.unsqueeze(dim=-1))
|
||||||
|
homog_coord = torch.ones(list(points.shape)[:-1] + [1],
|
||||||
|
dtype=points.dtype,
|
||||||
|
device=device)
|
||||||
|
# Convert the points to homogeneous coordinates
|
||||||
|
points_h = torch.cat([points, homog_coord], dim=-1)
|
||||||
|
|
||||||
|
projected_points = torch.einsum('bki,bji->bjk',
|
||||||
|
[camera_transform, points_h])
|
||||||
|
|
||||||
|
img_points = torch.div(projected_points[:, :, :2],
|
||||||
|
projected_points[:, :, 2].unsqueeze(dim=-1))
|
||||||
|
img_points = torch.einsum('bki,bji->bjk', [camera_mat, img_points]) \
|
||||||
|
+ self.center.unsqueeze(dim=1)
|
||||||
|
return img_points
|
||||||
20
train.py
20
train.py
@ -60,12 +60,12 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
|
|||||||
device = torch.device('cpu')
|
device = torch.device('cpu')
|
||||||
|
|
||||||
# get camera estimation
|
# get camera estimation
|
||||||
pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
|
# pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
|
||||||
cam=camera,
|
# cam=camera,
|
||||||
use_intrinsics=config['pose']['useCameraIntrinsics'],
|
# use_intrinsics=config['pose']['useCameraIntrinsics'],
|
||||||
dtype=dtype,
|
# dtype=dtype,
|
||||||
device=device,
|
# device=device,
|
||||||
)
|
# )
|
||||||
|
|
||||||
params = defaultdict(
|
params = defaultdict(
|
||||||
body_pose=initial_pose,
|
body_pose=initial_pose,
|
||||||
@ -74,15 +74,17 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
|
|||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
model(**params)
|
model(**params)
|
||||||
|
|
||||||
|
r.start()
|
||||||
|
|
||||||
# apply transform to scene
|
# apply transform to scene
|
||||||
if r is not None:
|
# if r is not None:
|
||||||
r.set_group_pose("body", cam_trans.cpu().numpy())
|
#r.set_group_pose("body", cam_trans.cpu().numpy())
|
||||||
|
|
||||||
global_orient = train_orient_with_conf(
|
global_orient = train_orient_with_conf(
|
||||||
config=config,
|
config=config,
|
||||||
model=model,
|
model=model,
|
||||||
keypoints=keypoints,
|
keypoints=keypoints,
|
||||||
camera_layer=pose_camera,
|
camera_layer=None, # pose_camera,
|
||||||
renderer=r,
|
renderer=r,
|
||||||
device=device,
|
device=device,
|
||||||
use_progress_bar=verbose,
|
use_progress_bar=verbose,
|
||||||
|
|||||||
0
train_camera.py
Normal file
0
train_camera.py
Normal file
@ -1,4 +1,5 @@
|
|||||||
from utils.mapping import get_indices_by_name
|
from modules.perspective_cam import PerspectiveCamera
|
||||||
|
from utils.mapping import get_indices_by_name, opengl_to_screen_space
|
||||||
from modules.distance_loss import WeightedMSELoss
|
from modules.distance_loss import WeightedMSELoss
|
||||||
from modules.utils import get_loss_layers
|
from modules.utils import get_loss_layers
|
||||||
from camera_estimation import TorchCameraEstimate
|
from camera_estimation import TorchCameraEstimate
|
||||||
@ -52,10 +53,18 @@ def train_orient(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# make sure camera module is on the correct device
|
# make sure camera module is on the correct device
|
||||||
camera = camera.to(device=device, dtype=dtype)
|
#camera = camera.to(device=device, dtype=dtype)
|
||||||
|
pers_cam = PerspectiveCamera(
|
||||||
|
dtype=dtype, device=device,
|
||||||
|
center=torch.tensor([[1920/2, 1080/2]], dtype=dtype),
|
||||||
|
focal_length_x=850.0,
|
||||||
|
focal_length_y=850.0
|
||||||
|
).to(device=device)
|
||||||
|
|
||||||
# setup keypoint data
|
# setup keypoint data
|
||||||
keypoints = torch.tensor(keypoints).to(device=device, dtype=dtype)
|
keypoints = torch.tensor(keypoints).to(device=device, dtype=dtype)
|
||||||
|
# keypoints = opengl_to_screen_space(keypoints, (1920, 1080))
|
||||||
|
# do some janky conversion back to pixel :(
|
||||||
|
|
||||||
# torso indices
|
# torso indices
|
||||||
torso_indices = get_indices_by_name(joint_names)
|
torso_indices = get_indices_by_name(joint_names)
|
||||||
@ -67,7 +76,7 @@ def train_orient(
|
|||||||
pose_layer = BodyPose(model, dtype=dtype, device=device,
|
pose_layer = BodyPose(model, dtype=dtype, device=device,
|
||||||
useBodyMeanAngles=False).to(device=device, dtype=dtype)
|
useBodyMeanAngles=False).to(device=device, dtype=dtype)
|
||||||
|
|
||||||
parameters = [model.global_orient]
|
parameters = [model.global_orient, pers_cam.rotation, pers_cam.translation]
|
||||||
|
|
||||||
if use_progress_bar:
|
if use_progress_bar:
|
||||||
pbar = tqdm(total=iterations)
|
pbar = tqdm(total=iterations)
|
||||||
@ -85,19 +94,23 @@ def train_orient(
|
|||||||
|
|
||||||
optimizer = optimizer(parameters, learning_rate)
|
optimizer = optimizer(parameters, learning_rate)
|
||||||
|
|
||||||
|
print(keypoints[0][0])
|
||||||
|
body_joints, cur_pose = pose_layer()
|
||||||
|
body_joints = opengl_to_screen_space(body_joints.clone(), (1080, 1080))
|
||||||
|
print(body_joints[0][0])
|
||||||
|
|
||||||
# prediction and loss computation closere
|
# prediction and loss computation closere
|
||||||
def predict():
|
def predict():
|
||||||
# return joints based on current model state
|
# return joints based on current model state
|
||||||
body_joints, cur_pose = pose_layer()
|
body_joints, cur_pose = pose_layer()
|
||||||
|
# body_joints = opengl_to_screen_space(body_joints.clone(), (1920, 1080))
|
||||||
# compute homogeneous coordinates and project them to 2D space
|
# compute homogeneous coordinates and project them to 2D space
|
||||||
points = tgm.convert_points_to_homogeneous(body_joints)
|
#points = tgm.convert_points_to_homogeneous(body_joints)
|
||||||
points = camera(points).squeeze()
|
points = pers_cam(body_joints).squeeze()
|
||||||
|
print(points[0][0])
|
||||||
# compute loss between 2D joint projection and OpenPose keypoints
|
# compute loss between 2D joint projection and OpenPose keypoints
|
||||||
loss = loss_layer(points[torso_indices],
|
loss = loss_layer(points[torso_indices],
|
||||||
keypoints[torso_indices])
|
keypoints[torso_indices][:, :2])
|
||||||
|
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
# main optimizer closure
|
# main optimizer closure
|
||||||
@ -112,10 +125,10 @@ def train_orient(
|
|||||||
return loss
|
return loss
|
||||||
|
|
||||||
# camera translation
|
# camera translation
|
||||||
R = camera.trans.detach().cpu().numpy().squeeze()
|
#R = camera.trans.detach().cpu().numpy().squeeze()
|
||||||
|
|
||||||
# main optimization loop
|
# main optimization loop
|
||||||
for t in range(iterations):
|
for t in range(2000):
|
||||||
loss = optimizer.step(optim_closure)
|
loss = optimizer.step(optim_closure)
|
||||||
|
|
||||||
# compute loss
|
# compute loss
|
||||||
@ -136,12 +149,14 @@ def train_orient(
|
|||||||
pbar.set_description("Error %f" % cur_loss)
|
pbar.set_description("Error %f" % cur_loss)
|
||||||
pbar.update(1)
|
pbar.update(1)
|
||||||
|
|
||||||
if renderer is not None and render_steps:
|
# if renderer is not None and render_steps:
|
||||||
renderer.render_model(
|
# renderer.render_model(
|
||||||
model=model,
|
# model=model,
|
||||||
model_out=pose_layer.cur_out,
|
# model_out=pose_layer.cur_out,
|
||||||
transform=R
|
# transform=R
|
||||||
)
|
# )
|
||||||
|
|
||||||
|
print("translation", pers_cam.translation)
|
||||||
|
|
||||||
if use_progress_bar:
|
if use_progress_bar:
|
||||||
pbar.close()
|
pbar.close()
|
||||||
|
|||||||
@ -106,7 +106,7 @@ def estimate_scale(joints, keypoints, pairs=[
|
|||||||
smpl_height = np.linalg.norm(smpl_dists, axis=0).mean()
|
smpl_height = np.linalg.norm(smpl_dists, axis=0).mean()
|
||||||
ops_height = np.linalg.norm(ops_dists, axis=0).mean()
|
ops_height = np.linalg.norm(ops_dists, axis=0).mean()
|
||||||
|
|
||||||
return cam_fy / 1080 * smpl_height / ops_height
|
return smpl_height / ops_height
|
||||||
|
|
||||||
|
|
||||||
def estimate_focal_length(run_estimation: bool = False):
|
def estimate_focal_length(run_estimation: bool = False):
|
||||||
@ -192,7 +192,7 @@ def setup_training(model, dataset, sample_index, renderer=True, offscreen=False)
|
|||||||
est_scale = estimate_scale(joints, keypoints)
|
est_scale = estimate_scale(joints, keypoints)
|
||||||
|
|
||||||
# apply scaling to keypoints
|
# apply scaling to keypoints
|
||||||
keypoints = keypoints * est_scale
|
keypoints = keypoints # * est_scale
|
||||||
|
|
||||||
# integrating Camera Estimation
|
# integrating Camera Estimation
|
||||||
|
|
||||||
|
|||||||
@ -278,13 +278,15 @@ def openpose_to_opengl_coords(
|
|||||||
[type]: [description]
|
[type]: [description]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
points = np.array([
|
# points = np.array([
|
||||||
[
|
# [
|
||||||
x / real_width * 2 - 1,
|
# x / real_width * 2 - 1,
|
||||||
-y / real_height * 2 + 1,
|
# -y / real_height * 2 + 1,
|
||||||
0
|
# 0
|
||||||
] for (x, y, z) in input_data])
|
# ] for (x, y, z) in input_data])
|
||||||
|
|
||||||
|
points = np.array(input_data)[:, :3]
|
||||||
|
points[:, 2] = 0
|
||||||
conf = np.array([
|
conf = np.array([
|
||||||
z for (_, _, z) in input_data
|
z for (_, _, z) in input_data
|
||||||
])
|
])
|
||||||
@ -292,6 +294,13 @@ def openpose_to_opengl_coords(
|
|||||||
return (points, conf)
|
return (points, conf)
|
||||||
|
|
||||||
|
|
||||||
|
def opengl_to_screen_space(points, size):
|
||||||
|
points[:, 0] = (points[:, 0] + 1) / 2 * size[0]
|
||||||
|
points[:, 1] = -((points[:, 1] - 1) / 2 * size[1])
|
||||||
|
points[:, 2] = ((points[:, 2] - 1) / 2 * size[1])
|
||||||
|
return points
|
||||||
|
|
||||||
|
|
||||||
def smpl_to_openpose(print_mapping: True):
|
def smpl_to_openpose(print_mapping: True):
|
||||||
"""Utility for remapping smpl mapping indices to openpose mapping indices.
|
"""Utility for remapping smpl mapping indices to openpose mapping indices.
|
||||||
|
|
||||||
|
|||||||
@ -93,7 +93,7 @@ def save_to_video(
|
|||||||
r.render_model_geometry(
|
r.render_model_geometry(
|
||||||
faces=model_anim.faces,
|
faces=model_anim.faces,
|
||||||
vertices=vertices,
|
vertices=vertices,
|
||||||
pose=cam_trans # cam_transform,
|
pose=cam_transform # cam_transform,
|
||||||
)
|
)
|
||||||
frames.append(r.get_snapshot())
|
frames.append(r.get_snapshot())
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user