making camera work again

This commit is contained in:
Wlad 2021-02-26 12:50:38 +01:00
parent eb1b5876a8
commit 36cd2c6648
13 changed files with 236 additions and 40 deletions

4
.gitignore vendored
View File

@ -101,4 +101,6 @@ vposer_v1_0
results/ results/
output/ output/
tests/ tests/
samples/video* samples/
raw/
presentation/

View File

@ -19,8 +19,8 @@ camera:
patience: 10 patience: 10
optimizer: Adam optimizer: Adam
orientation: orientation:
lr: 0.03 lr: 0.5
optimizer: LBFGS optimizer: Adam
iterations: 5 iterations: 5
joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization
pose: pose:

84
example_fit_camera.py Normal file
View File

@ -0,0 +1,84 @@
import pickle
import time
from train import create_animation
from dataset import SMPLyDataset
from model import *
from utils.general import *
from renderer import *
from utils.general import rename_files, get_new_filename
START_IDX = 1 # starting index of the frame to optimize for
FINISH_IDX = 60 # choose a big number to optimize for all frames in samples directory
# if False, only run already saved animation without optimization
RUN_OPTIMIZATION = True
result_image = []
idx = START_IDX
device = torch.device('cpu')
dtype = torch.float32
config = load_config()
dataset = SMPLyDataset.from_config(config)
model = SMPLyModel.model_from_conf(config)
# Rename files in samples directory to uniform format
if config['data']['renameFiles']:
rename_files(config['data']['rootDir'] + "/")
'''
Optimization part without visualization
'''
if RUN_OPTIMIZATION:
model_outs, filename = create_animation(
dataset,
config,
START_IDX,
FINISH_IDX,
verbose=False,
offscreen=True,
save_to_file=True,
interpolate=False
)
def replay_animation(file, start_frame=0, end_frame=None, with_background=False, fps=30, interpolated=False):
r = Renderer()
r.start()
model_anim = SMPLyModel.model_from_conf(config)
with open(file, "rb") as fp:
results = pickle.load(fp)
if end_frame is None:
end_frame = len(results)
for model, camera_transform in results[start_frame::]:
if interpolated:
vertices = model
else:
vertices = model.vertices
r.render_model_geometry(
faces=model_anim.faces,
vertices=vertices,
pose=camera_transform
)
time.sleep(1 / fps)
'''
Play the animation.
'''
if RUN_OPTIMIZATION:
anim_file = filename
else:
results_dir = config['output']['rootDir']
result_prefix = config['output']['prefix']
anim_file = results_dir + result_prefix + "0.pkl"
replay_animation(anim_file, interpolated=True)

View File

@ -9,7 +9,7 @@ from renderer import *
from utils.general import rename_files, get_new_filename from utils.general import rename_files, get_new_filename
START_IDX = 150 # starting index of the frame to optimize for START_IDX = 150 # starting index of the frame to optimize for
FINISH_IDX = 300 # choose a big number to optimize for all frames in samples directory FINISH_IDX = 400 # choose a big number to optimize for all frames in samples directory
result_image = [] result_image = []
idx = START_IDX idx = START_IDX

0
examples/__init__.py Normal file
View File

View File

@ -0,0 +1,84 @@
from collections import namedtuple
import torch
import torch.nn as nn
from smplx.lbs import transform_mat
class PerspectiveCamera(nn.Module):
FOCAL_LENGTH = 5000
def __init__(self, rotation=None, translation=None,
focal_length_x=None, focal_length_y=None,
batch_size=1,
center=None, dtype=torch.float32, **kwargs):
super(PerspectiveCamera, self).__init__()
self.batch_size = batch_size
self.dtype = dtype
# Make a buffer so that PyTorch does not complain when creating
# the camera matrix
self.register_buffer('zero',
torch.zeros([batch_size], dtype=dtype))
if focal_length_x is None or type(focal_length_x) == float:
focal_length_x = torch.full(
[batch_size],
self.FOCAL_LENGTH if focal_length_x is None else
focal_length_x,
dtype=dtype)
if focal_length_y is None or type(focal_length_y) == float:
focal_length_y = torch.full(
[batch_size],
self.FOCAL_LENGTH if focal_length_y is None else
focal_length_y,
dtype=dtype)
self.register_buffer('focal_length_x', focal_length_x)
self.register_buffer('focal_length_y', focal_length_y)
if center is None:
center = torch.zeros([batch_size, 2], dtype=dtype)
self.register_buffer('center', center)
if rotation is None:
rotation = torch.eye(
3, dtype=dtype).unsqueeze(dim=0).repeat(batch_size, 1, 1)
rotation = nn.Parameter(rotation, requires_grad=True)
self.register_parameter('rotation', rotation)
if translation is None:
translation = torch.zeros([batch_size, 3], dtype=dtype)
translation = nn.Parameter(translation,
requires_grad=True)
self.register_parameter('translation', translation)
def forward(self, points):
device = points.device
with torch.no_grad():
camera_mat = torch.zeros([self.batch_size, 2, 2],
dtype=self.dtype, device=points.device)
camera_mat[:, 0, 0] = self.focal_length_x
camera_mat[:, 1, 1] = self.focal_length_y
camera_transform = transform_mat(self.rotation,
self.translation.unsqueeze(dim=-1))
homog_coord = torch.ones(list(points.shape)[:-1] + [1],
dtype=points.dtype,
device=device)
# Convert the points to homogeneous coordinates
points_h = torch.cat([points, homog_coord], dim=-1)
projected_points = torch.einsum('bki,bji->bjk',
[camera_transform, points_h])
img_points = torch.div(projected_points[:, :, :2],
projected_points[:, :, 2].unsqueeze(dim=-1))
img_points = torch.einsum('bki,bji->bjk', [camera_mat, img_points]) \
+ self.center.unsqueeze(dim=1)
return img_points

View File

@ -60,12 +60,12 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
device = torch.device('cpu') device = torch.device('cpu')
# get camera estimation # get camera estimation
pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam( # pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
cam=camera, # cam=camera,
use_intrinsics=config['pose']['useCameraIntrinsics'], # use_intrinsics=config['pose']['useCameraIntrinsics'],
dtype=dtype, # dtype=dtype,
device=device, # device=device,
) # )
params = defaultdict( params = defaultdict(
body_pose=initial_pose, body_pose=initial_pose,
@ -74,15 +74,17 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
with torch.no_grad(): with torch.no_grad():
model(**params) model(**params)
r.start()
# apply transform to scene # apply transform to scene
if r is not None: # if r is not None:
r.set_group_pose("body", cam_trans.cpu().numpy()) #r.set_group_pose("body", cam_trans.cpu().numpy())
global_orient = train_orient_with_conf( global_orient = train_orient_with_conf(
config=config, config=config,
model=model, model=model,
keypoints=keypoints, keypoints=keypoints,
camera_layer=pose_camera, camera_layer=None, # pose_camera,
renderer=r, renderer=r,
device=device, device=device,
use_progress_bar=verbose, use_progress_bar=verbose,

0
train_camera.py Normal file
View File

View File

@ -1,4 +1,5 @@
from utils.mapping import get_indices_by_name from modules.perspective_cam import PerspectiveCamera
from utils.mapping import get_indices_by_name, opengl_to_screen_space
from modules.distance_loss import WeightedMSELoss from modules.distance_loss import WeightedMSELoss
from modules.utils import get_loss_layers from modules.utils import get_loss_layers
from camera_estimation import TorchCameraEstimate from camera_estimation import TorchCameraEstimate
@ -52,10 +53,18 @@ def train_orient(
) )
# make sure camera module is on the correct device # make sure camera module is on the correct device
camera = camera.to(device=device, dtype=dtype) #camera = camera.to(device=device, dtype=dtype)
pers_cam = PerspectiveCamera(
dtype=dtype, device=device,
center=torch.tensor([[1920/2, 1080/2]], dtype=dtype),
focal_length_x=850.0,
focal_length_y=850.0
).to(device=device)
# setup keypoint data # setup keypoint data
keypoints = torch.tensor(keypoints).to(device=device, dtype=dtype) keypoints = torch.tensor(keypoints).to(device=device, dtype=dtype)
# keypoints = opengl_to_screen_space(keypoints, (1920, 1080))
# do some janky conversion back to pixel :(
# torso indices # torso indices
torso_indices = get_indices_by_name(joint_names) torso_indices = get_indices_by_name(joint_names)
@ -67,7 +76,7 @@ def train_orient(
pose_layer = BodyPose(model, dtype=dtype, device=device, pose_layer = BodyPose(model, dtype=dtype, device=device,
useBodyMeanAngles=False).to(device=device, dtype=dtype) useBodyMeanAngles=False).to(device=device, dtype=dtype)
parameters = [model.global_orient] parameters = [model.global_orient, pers_cam.rotation, pers_cam.translation]
if use_progress_bar: if use_progress_bar:
pbar = tqdm(total=iterations) pbar = tqdm(total=iterations)
@ -85,19 +94,23 @@ def train_orient(
optimizer = optimizer(parameters, learning_rate) optimizer = optimizer(parameters, learning_rate)
print(keypoints[0][0])
body_joints, cur_pose = pose_layer()
body_joints = opengl_to_screen_space(body_joints.clone(), (1080, 1080))
print(body_joints[0][0])
# prediction and loss computation closere # prediction and loss computation closere
def predict(): def predict():
# return joints based on current model state # return joints based on current model state
body_joints, cur_pose = pose_layer() body_joints, cur_pose = pose_layer()
# body_joints = opengl_to_screen_space(body_joints.clone(), (1920, 1080))
# compute homogeneous coordinates and project them to 2D space # compute homogeneous coordinates and project them to 2D space
points = tgm.convert_points_to_homogeneous(body_joints) #points = tgm.convert_points_to_homogeneous(body_joints)
points = camera(points).squeeze() points = pers_cam(body_joints).squeeze()
print(points[0][0])
# compute loss between 2D joint projection and OpenPose keypoints # compute loss between 2D joint projection and OpenPose keypoints
loss = loss_layer(points[torso_indices], loss = loss_layer(points[torso_indices],
keypoints[torso_indices]) keypoints[torso_indices][:, :2])
return loss return loss
# main optimizer closure # main optimizer closure
@ -112,10 +125,10 @@ def train_orient(
return loss return loss
# camera translation # camera translation
R = camera.trans.detach().cpu().numpy().squeeze() #R = camera.trans.detach().cpu().numpy().squeeze()
# main optimization loop # main optimization loop
for t in range(iterations): for t in range(2000):
loss = optimizer.step(optim_closure) loss = optimizer.step(optim_closure)
# compute loss # compute loss
@ -136,12 +149,14 @@ def train_orient(
pbar.set_description("Error %f" % cur_loss) pbar.set_description("Error %f" % cur_loss)
pbar.update(1) pbar.update(1)
if renderer is not None and render_steps: # if renderer is not None and render_steps:
renderer.render_model( # renderer.render_model(
model=model, # model=model,
model_out=pose_layer.cur_out, # model_out=pose_layer.cur_out,
transform=R # transform=R
) # )
print("translation", pers_cam.translation)
if use_progress_bar: if use_progress_bar:
pbar.close() pbar.close()

View File

@ -106,7 +106,7 @@ def estimate_scale(joints, keypoints, pairs=[
smpl_height = np.linalg.norm(smpl_dists, axis=0).mean() smpl_height = np.linalg.norm(smpl_dists, axis=0).mean()
ops_height = np.linalg.norm(ops_dists, axis=0).mean() ops_height = np.linalg.norm(ops_dists, axis=0).mean()
return cam_fy / 1080 * smpl_height / ops_height return smpl_height / ops_height
def estimate_focal_length(run_estimation: bool = False): def estimate_focal_length(run_estimation: bool = False):
@ -192,7 +192,7 @@ def setup_training(model, dataset, sample_index, renderer=True, offscreen=False)
est_scale = estimate_scale(joints, keypoints) est_scale = estimate_scale(joints, keypoints)
# apply scaling to keypoints # apply scaling to keypoints
keypoints = keypoints * est_scale keypoints = keypoints # * est_scale
# integrating Camera Estimation # integrating Camera Estimation

View File

@ -278,13 +278,15 @@ def openpose_to_opengl_coords(
[type]: [description] [type]: [description]
""" """
points = np.array([ # points = np.array([
[ # [
x / real_width * 2 - 1, # x / real_width * 2 - 1,
-y / real_height * 2 + 1, # -y / real_height * 2 + 1,
0 # 0
] for (x, y, z) in input_data]) # ] for (x, y, z) in input_data])
points = np.array(input_data)[:, :3]
points[:, 2] = 0
conf = np.array([ conf = np.array([
z for (_, _, z) in input_data z for (_, _, z) in input_data
]) ])
@ -292,6 +294,13 @@ def openpose_to_opengl_coords(
return (points, conf) return (points, conf)
def opengl_to_screen_space(points, size):
points[:, 0] = (points[:, 0] + 1) / 2 * size[0]
points[:, 1] = -((points[:, 1] - 1) / 2 * size[1])
points[:, 2] = ((points[:, 2] - 1) / 2 * size[1])
return points
def smpl_to_openpose(print_mapping: True): def smpl_to_openpose(print_mapping: True):
"""Utility for remapping smpl mapping indices to openpose mapping indices. """Utility for remapping smpl mapping indices to openpose mapping indices.

View File

@ -93,7 +93,7 @@ def save_to_video(
r.render_model_geometry( r.render_model_geometry(
faces=model_anim.faces, faces=model_anim.faces,
vertices=vertices, vertices=vertices,
pose=cam_trans # cam_transform, pose=cam_transform # cam_transform,
) )
frames.append(r.get_snapshot()) frames.append(r.get_snapshot())