making camera work again

This commit is contained in:
Wlad 2021-02-26 12:50:38 +01:00
parent eb1b5876a8
commit 36cd2c6648
13 changed files with 236 additions and 40 deletions

4
.gitignore vendored
View File

@ -101,4 +101,6 @@ vposer_v1_0
results/
output/
tests/
samples/video*
samples/
raw/
presentation/

View File

@ -33,7 +33,7 @@ This should copy and rename the SMPL model to the correct folders. Either way th
To use `bodyPrior` in the configuration please download vposer and plate it into `./vposer_v1_0` directory in the project root. Vposer can be downloaded from this link after creating an account with SMPL-X [link](https://psfiles.is.tuebingen.mpg.de/downloads/smplx/vposer_v1_0-zip)
### Mesh intersection
To use `intersectLoss` in the configuration please pull the [github](https://github.com/gosticks/torch-mesh-isect) repo. This repo is patched to run on the newer versions of pytorch.
To use `intersectLoss` in the configuration please pull the [github](https://github.com/gosticks/torch-mesh-isect) repo. This repo is patched to run on the newer versions of pytorch.
Note: It only runs for Linux based operating systems. We had troubles getting it to work on Windows.
### Conda Environment

View File

@ -19,8 +19,8 @@ camera:
patience: 10
optimizer: Adam
orientation:
lr: 0.03
optimizer: LBFGS
lr: 0.5
optimizer: Adam
iterations: 5
joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization
pose:

84
example_fit_camera.py Normal file
View File

@ -0,0 +1,84 @@
import pickle
import time
from train import create_animation
from dataset import SMPLyDataset
from model import *
from utils.general import *
from renderer import *
from utils.general import rename_files, get_new_filename
START_IDX = 1 # starting index of the frame to optimize for
FINISH_IDX = 60 # choose a big number to optimize for all frames in samples directory
# if False, only run already saved animation without optimization
RUN_OPTIMIZATION = True
result_image = []
idx = START_IDX
device = torch.device('cpu')
dtype = torch.float32
config = load_config()
dataset = SMPLyDataset.from_config(config)
model = SMPLyModel.model_from_conf(config)
# Rename files in samples directory to uniform format
if config['data']['renameFiles']:
rename_files(config['data']['rootDir'] + "/")
'''
Optimization part without visualization
'''
if RUN_OPTIMIZATION:
model_outs, filename = create_animation(
dataset,
config,
START_IDX,
FINISH_IDX,
verbose=False,
offscreen=True,
save_to_file=True,
interpolate=False
)
def replay_animation(file, start_frame=0, end_frame=None, with_background=False, fps=30, interpolated=False):
r = Renderer()
r.start()
model_anim = SMPLyModel.model_from_conf(config)
with open(file, "rb") as fp:
results = pickle.load(fp)
if end_frame is None:
end_frame = len(results)
for model, camera_transform in results[start_frame::]:
if interpolated:
vertices = model
else:
vertices = model.vertices
r.render_model_geometry(
faces=model_anim.faces,
vertices=vertices,
pose=camera_transform
)
time.sleep(1 / fps)
'''
Play the animation.
'''
if RUN_OPTIMIZATION:
anim_file = filename
else:
results_dir = config['output']['rootDir']
result_prefix = config['output']['prefix']
anim_file = results_dir + result_prefix + "0.pkl"
replay_animation(anim_file, interpolated=True)

View File

@ -9,7 +9,7 @@ from renderer import *
from utils.general import rename_files, get_new_filename
START_IDX = 150 # starting index of the frame to optimize for
FINISH_IDX = 300 # choose a big number to optimize for all frames in samples directory
FINISH_IDX = 400 # choose a big number to optimize for all frames in samples directory
result_image = []
idx = START_IDX

0
examples/__init__.py Normal file
View File

View File

@ -0,0 +1,84 @@
from collections import namedtuple
import torch
import torch.nn as nn
from smplx.lbs import transform_mat
class PerspectiveCamera(nn.Module):
FOCAL_LENGTH = 5000
def __init__(self, rotation=None, translation=None,
focal_length_x=None, focal_length_y=None,
batch_size=1,
center=None, dtype=torch.float32, **kwargs):
super(PerspectiveCamera, self).__init__()
self.batch_size = batch_size
self.dtype = dtype
# Make a buffer so that PyTorch does not complain when creating
# the camera matrix
self.register_buffer('zero',
torch.zeros([batch_size], dtype=dtype))
if focal_length_x is None or type(focal_length_x) == float:
focal_length_x = torch.full(
[batch_size],
self.FOCAL_LENGTH if focal_length_x is None else
focal_length_x,
dtype=dtype)
if focal_length_y is None or type(focal_length_y) == float:
focal_length_y = torch.full(
[batch_size],
self.FOCAL_LENGTH if focal_length_y is None else
focal_length_y,
dtype=dtype)
self.register_buffer('focal_length_x', focal_length_x)
self.register_buffer('focal_length_y', focal_length_y)
if center is None:
center = torch.zeros([batch_size, 2], dtype=dtype)
self.register_buffer('center', center)
if rotation is None:
rotation = torch.eye(
3, dtype=dtype).unsqueeze(dim=0).repeat(batch_size, 1, 1)
rotation = nn.Parameter(rotation, requires_grad=True)
self.register_parameter('rotation', rotation)
if translation is None:
translation = torch.zeros([batch_size, 3], dtype=dtype)
translation = nn.Parameter(translation,
requires_grad=True)
self.register_parameter('translation', translation)
def forward(self, points):
device = points.device
with torch.no_grad():
camera_mat = torch.zeros([self.batch_size, 2, 2],
dtype=self.dtype, device=points.device)
camera_mat[:, 0, 0] = self.focal_length_x
camera_mat[:, 1, 1] = self.focal_length_y
camera_transform = transform_mat(self.rotation,
self.translation.unsqueeze(dim=-1))
homog_coord = torch.ones(list(points.shape)[:-1] + [1],
dtype=points.dtype,
device=device)
# Convert the points to homogeneous coordinates
points_h = torch.cat([points, homog_coord], dim=-1)
projected_points = torch.einsum('bki,bji->bjk',
[camera_transform, points_h])
img_points = torch.div(projected_points[:, :, :2],
projected_points[:, :, 2].unsqueeze(dim=-1))
img_points = torch.einsum('bki,bji->bjk', [camera_mat, img_points]) \
+ self.center.unsqueeze(dim=1)
return img_points

View File

@ -60,12 +60,12 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
device = torch.device('cpu')
# get camera estimation
pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
cam=camera,
use_intrinsics=config['pose']['useCameraIntrinsics'],
dtype=dtype,
device=device,
)
# pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam(
# cam=camera,
# use_intrinsics=config['pose']['useCameraIntrinsics'],
# dtype=dtype,
# device=device,
# )
params = defaultdict(
body_pose=initial_pose,
@ -74,15 +74,17 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
with torch.no_grad():
model(**params)
r.start()
# apply transform to scene
if r is not None:
r.set_group_pose("body", cam_trans.cpu().numpy())
# if r is not None:
#r.set_group_pose("body", cam_trans.cpu().numpy())
global_orient = train_orient_with_conf(
config=config,
model=model,
keypoints=keypoints,
camera_layer=pose_camera,
camera_layer=None, # pose_camera,
renderer=r,
device=device,
use_progress_bar=verbose,

0
train_camera.py Normal file
View File

View File

@ -1,4 +1,5 @@
from utils.mapping import get_indices_by_name
from modules.perspective_cam import PerspectiveCamera
from utils.mapping import get_indices_by_name, opengl_to_screen_space
from modules.distance_loss import WeightedMSELoss
from modules.utils import get_loss_layers
from camera_estimation import TorchCameraEstimate
@ -52,10 +53,18 @@ def train_orient(
)
# make sure camera module is on the correct device
camera = camera.to(device=device, dtype=dtype)
#camera = camera.to(device=device, dtype=dtype)
pers_cam = PerspectiveCamera(
dtype=dtype, device=device,
center=torch.tensor([[1920/2, 1080/2]], dtype=dtype),
focal_length_x=850.0,
focal_length_y=850.0
).to(device=device)
# setup keypoint data
keypoints = torch.tensor(keypoints).to(device=device, dtype=dtype)
# keypoints = opengl_to_screen_space(keypoints, (1920, 1080))
# do some janky conversion back to pixel :(
# torso indices
torso_indices = get_indices_by_name(joint_names)
@ -67,7 +76,7 @@ def train_orient(
pose_layer = BodyPose(model, dtype=dtype, device=device,
useBodyMeanAngles=False).to(device=device, dtype=dtype)
parameters = [model.global_orient]
parameters = [model.global_orient, pers_cam.rotation, pers_cam.translation]
if use_progress_bar:
pbar = tqdm(total=iterations)
@ -85,19 +94,23 @@ def train_orient(
optimizer = optimizer(parameters, learning_rate)
print(keypoints[0][0])
body_joints, cur_pose = pose_layer()
body_joints = opengl_to_screen_space(body_joints.clone(), (1080, 1080))
print(body_joints[0][0])
# prediction and loss computation closere
def predict():
# return joints based on current model state
body_joints, cur_pose = pose_layer()
# body_joints = opengl_to_screen_space(body_joints.clone(), (1920, 1080))
# compute homogeneous coordinates and project them to 2D space
points = tgm.convert_points_to_homogeneous(body_joints)
points = camera(points).squeeze()
#points = tgm.convert_points_to_homogeneous(body_joints)
points = pers_cam(body_joints).squeeze()
print(points[0][0])
# compute loss between 2D joint projection and OpenPose keypoints
loss = loss_layer(points[torso_indices],
keypoints[torso_indices])
keypoints[torso_indices][:, :2])
return loss
# main optimizer closure
@ -112,10 +125,10 @@ def train_orient(
return loss
# camera translation
R = camera.trans.detach().cpu().numpy().squeeze()
#R = camera.trans.detach().cpu().numpy().squeeze()
# main optimization loop
for t in range(iterations):
for t in range(2000):
loss = optimizer.step(optim_closure)
# compute loss
@ -136,12 +149,14 @@ def train_orient(
pbar.set_description("Error %f" % cur_loss)
pbar.update(1)
if renderer is not None and render_steps:
renderer.render_model(
model=model,
model_out=pose_layer.cur_out,
transform=R
)
# if renderer is not None and render_steps:
# renderer.render_model(
# model=model,
# model_out=pose_layer.cur_out,
# transform=R
# )
print("translation", pers_cam.translation)
if use_progress_bar:
pbar.close()

View File

@ -106,7 +106,7 @@ def estimate_scale(joints, keypoints, pairs=[
smpl_height = np.linalg.norm(smpl_dists, axis=0).mean()
ops_height = np.linalg.norm(ops_dists, axis=0).mean()
return cam_fy / 1080 * smpl_height / ops_height
return smpl_height / ops_height
def estimate_focal_length(run_estimation: bool = False):
@ -192,7 +192,7 @@ def setup_training(model, dataset, sample_index, renderer=True, offscreen=False)
est_scale = estimate_scale(joints, keypoints)
# apply scaling to keypoints
keypoints = keypoints * est_scale
keypoints = keypoints # * est_scale
# integrating Camera Estimation

View File

@ -278,13 +278,15 @@ def openpose_to_opengl_coords(
[type]: [description]
"""
points = np.array([
[
x / real_width * 2 - 1,
-y / real_height * 2 + 1,
0
] for (x, y, z) in input_data])
# points = np.array([
# [
# x / real_width * 2 - 1,
# -y / real_height * 2 + 1,
# 0
# ] for (x, y, z) in input_data])
points = np.array(input_data)[:, :3]
points[:, 2] = 0
conf = np.array([
z for (_, _, z) in input_data
])
@ -292,6 +294,13 @@ def openpose_to_opengl_coords(
return (points, conf)
def opengl_to_screen_space(points, size):
points[:, 0] = (points[:, 0] + 1) / 2 * size[0]
points[:, 1] = -((points[:, 1] - 1) / 2 * size[1])
points[:, 2] = ((points[:, 2] - 1) / 2 * size[1])
return points
def smpl_to_openpose(print_mapping: True):
"""Utility for remapping smpl mapping indices to openpose mapping indices.

View File

@ -93,7 +93,7 @@ def save_to_video(
r.render_model_geometry(
faces=model_anim.faces,
vertices=vertices,
pose=cam_trans # cam_transform,
pose=cam_transform # cam_transform,
)
frames.append(r.get_snapshot())