diff --git a/.gitignore b/.gitignore index 26f3e08..db71101 100644 --- a/.gitignore +++ b/.gitignore @@ -101,4 +101,6 @@ vposer_v1_0 results/ output/ tests/ -samples/video* \ No newline at end of file +samples/ +raw/ +presentation/ \ No newline at end of file diff --git a/README.md b/README.md index 0f7d9b2..4bcf177 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ This should copy and rename the SMPL model to the correct folders. Either way th To use `bodyPrior` in the configuration please download vposer and plate it into `./vposer_v1_0` directory in the project root. Vposer can be downloaded from this link after creating an account with SMPL-X [link](https://psfiles.is.tuebingen.mpg.de/downloads/smplx/vposer_v1_0-zip) ### Mesh intersection -To use `intersectLoss` in the configuration please pull the [github](https://github.com/gosticks/torch-mesh-isect) repo. This repo is patched to run on the newer versions of pytorch. +To use `intersectLoss` in the configuration please pull the [github](https://github.com/gosticks/torch-mesh-isect) repo. This repo is patched to run on the newer versions of pytorch. Note: It only runs for Linux based operating systems. We had troubles getting it to work on Windows. ### Conda Environment diff --git a/config.yaml b/config.yaml index 20c9125..eefe39b 100644 --- a/config.yaml +++ b/config.yaml @@ -19,8 +19,8 @@ camera: patience: 10 optimizer: Adam orientation: - lr: 0.03 - optimizer: LBFGS + lr: 0.5 + optimizer: Adam iterations: 5 joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization pose: diff --git a/example_fit_camera.py b/example_fit_camera.py new file mode 100644 index 0000000..d1fa579 --- /dev/null +++ b/example_fit_camera.py @@ -0,0 +1,84 @@ +import pickle +import time +from train import create_animation +from dataset import SMPLyDataset +from model import * +from utils.general import * +from renderer import * +from utils.general import rename_files, get_new_filename + +START_IDX = 1 # starting index of the frame to optimize for +FINISH_IDX = 60 # choose a big number to optimize for all frames in samples directory +# if False, only run already saved animation without optimization +RUN_OPTIMIZATION = True + +result_image = [] +idx = START_IDX + +device = torch.device('cpu') +dtype = torch.float32 + +config = load_config() +dataset = SMPLyDataset.from_config(config) +model = SMPLyModel.model_from_conf(config) + + +# Rename files in samples directory to uniform format +if config['data']['renameFiles']: + rename_files(config['data']['rootDir'] + "/") + + +''' +Optimization part without visualization +''' +if RUN_OPTIMIZATION: + model_outs, filename = create_animation( + dataset, + config, + START_IDX, + FINISH_IDX, + verbose=False, + offscreen=True, + save_to_file=True, + interpolate=False + ) + + +def replay_animation(file, start_frame=0, end_frame=None, with_background=False, fps=30, interpolated=False): + r = Renderer() + r.start() + + model_anim = SMPLyModel.model_from_conf(config) + + with open(file, "rb") as fp: + results = pickle.load(fp) + + if end_frame is None: + end_frame = len(results) + + for model, camera_transform in results[start_frame::]: + if interpolated: + vertices = model + else: + vertices = model.vertices + + r.render_model_geometry( + faces=model_anim.faces, + vertices=vertices, + pose=camera_transform + ) + + time.sleep(1 / fps) + + +''' +Play the animation. +''' +if RUN_OPTIMIZATION: + anim_file = filename +else: + results_dir = config['output']['rootDir'] + result_prefix = config['output']['prefix'] + anim_file = results_dir + result_prefix + "0.pkl" + +replay_animation(anim_file, interpolated=True) diff --git a/example_render_video.py b/example_render_video.py index 7086f58..682a1bd 100644 --- a/example_render_video.py +++ b/example_render_video.py @@ -9,7 +9,7 @@ from renderer import * from utils.general import rename_files, get_new_filename START_IDX = 150 # starting index of the frame to optimize for -FINISH_IDX = 300 # choose a big number to optimize for all frames in samples directory +FINISH_IDX = 400 # choose a big number to optimize for all frames in samples directory result_image = [] idx = START_IDX diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modules/perspective_cam.py b/modules/perspective_cam.py new file mode 100644 index 0000000..b975953 --- /dev/null +++ b/modules/perspective_cam.py @@ -0,0 +1,84 @@ +from collections import namedtuple + +import torch +import torch.nn as nn + +from smplx.lbs import transform_mat + + +class PerspectiveCamera(nn.Module): + + FOCAL_LENGTH = 5000 + + def __init__(self, rotation=None, translation=None, + focal_length_x=None, focal_length_y=None, + batch_size=1, + center=None, dtype=torch.float32, **kwargs): + super(PerspectiveCamera, self).__init__() + self.batch_size = batch_size + self.dtype = dtype + # Make a buffer so that PyTorch does not complain when creating + # the camera matrix + self.register_buffer('zero', + torch.zeros([batch_size], dtype=dtype)) + + if focal_length_x is None or type(focal_length_x) == float: + focal_length_x = torch.full( + [batch_size], + self.FOCAL_LENGTH if focal_length_x is None else + focal_length_x, + dtype=dtype) + + if focal_length_y is None or type(focal_length_y) == float: + focal_length_y = torch.full( + [batch_size], + self.FOCAL_LENGTH if focal_length_y is None else + focal_length_y, + dtype=dtype) + + self.register_buffer('focal_length_x', focal_length_x) + self.register_buffer('focal_length_y', focal_length_y) + + if center is None: + center = torch.zeros([batch_size, 2], dtype=dtype) + self.register_buffer('center', center) + + if rotation is None: + rotation = torch.eye( + 3, dtype=dtype).unsqueeze(dim=0).repeat(batch_size, 1, 1) + + rotation = nn.Parameter(rotation, requires_grad=True) + self.register_parameter('rotation', rotation) + + if translation is None: + translation = torch.zeros([batch_size, 3], dtype=dtype) + + translation = nn.Parameter(translation, + requires_grad=True) + self.register_parameter('translation', translation) + + def forward(self, points): + device = points.device + + with torch.no_grad(): + camera_mat = torch.zeros([self.batch_size, 2, 2], + dtype=self.dtype, device=points.device) + camera_mat[:, 0, 0] = self.focal_length_x + camera_mat[:, 1, 1] = self.focal_length_y + + camera_transform = transform_mat(self.rotation, + self.translation.unsqueeze(dim=-1)) + homog_coord = torch.ones(list(points.shape)[:-1] + [1], + dtype=points.dtype, + device=device) + # Convert the points to homogeneous coordinates + points_h = torch.cat([points, homog_coord], dim=-1) + + projected_points = torch.einsum('bki,bji->bjk', + [camera_transform, points_h]) + + img_points = torch.div(projected_points[:, :, :2], + projected_points[:, :, 2].unsqueeze(dim=-1)) + img_points = torch.einsum('bki,bji->bjk', [camera_mat, img_points]) \ + + self.center.unsqueeze(dim=1) + return img_points diff --git a/train.py b/train.py index 6ecf588..f2f6256 100644 --- a/train.py +++ b/train.py @@ -60,12 +60,12 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d device = torch.device('cpu') # get camera estimation - pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam( - cam=camera, - use_intrinsics=config['pose']['useCameraIntrinsics'], - dtype=dtype, - device=device, - ) + # pose_camera, cam_trans, cam_int, cam_params = SimpleCamera.from_estimation_cam( + # cam=camera, + # use_intrinsics=config['pose']['useCameraIntrinsics'], + # dtype=dtype, + # device=device, + # ) params = defaultdict( body_pose=initial_pose, @@ -74,15 +74,17 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d with torch.no_grad(): model(**params) + r.start() + # apply transform to scene - if r is not None: - r.set_group_pose("body", cam_trans.cpu().numpy()) + # if r is not None: + #r.set_group_pose("body", cam_trans.cpu().numpy()) global_orient = train_orient_with_conf( config=config, model=model, keypoints=keypoints, - camera_layer=pose_camera, + camera_layer=None, # pose_camera, renderer=r, device=device, use_progress_bar=verbose, diff --git a/train_camera.py b/train_camera.py new file mode 100644 index 0000000..e69de29 diff --git a/train_orient.py b/train_orient.py index 55924f5..8d03d54 100644 --- a/train_orient.py +++ b/train_orient.py @@ -1,4 +1,5 @@ -from utils.mapping import get_indices_by_name +from modules.perspective_cam import PerspectiveCamera +from utils.mapping import get_indices_by_name, opengl_to_screen_space from modules.distance_loss import WeightedMSELoss from modules.utils import get_loss_layers from camera_estimation import TorchCameraEstimate @@ -52,10 +53,18 @@ def train_orient( ) # make sure camera module is on the correct device - camera = camera.to(device=device, dtype=dtype) + #camera = camera.to(device=device, dtype=dtype) + pers_cam = PerspectiveCamera( + dtype=dtype, device=device, + center=torch.tensor([[1920/2, 1080/2]], dtype=dtype), + focal_length_x=850.0, + focal_length_y=850.0 + ).to(device=device) # setup keypoint data keypoints = torch.tensor(keypoints).to(device=device, dtype=dtype) + # keypoints = opengl_to_screen_space(keypoints, (1920, 1080)) + # do some janky conversion back to pixel :( # torso indices torso_indices = get_indices_by_name(joint_names) @@ -67,7 +76,7 @@ def train_orient( pose_layer = BodyPose(model, dtype=dtype, device=device, useBodyMeanAngles=False).to(device=device, dtype=dtype) - parameters = [model.global_orient] + parameters = [model.global_orient, pers_cam.rotation, pers_cam.translation] if use_progress_bar: pbar = tqdm(total=iterations) @@ -85,19 +94,23 @@ def train_orient( optimizer = optimizer(parameters, learning_rate) + print(keypoints[0][0]) + body_joints, cur_pose = pose_layer() + body_joints = opengl_to_screen_space(body_joints.clone(), (1080, 1080)) + print(body_joints[0][0]) + # prediction and loss computation closere def predict(): # return joints based on current model state body_joints, cur_pose = pose_layer() - + # body_joints = opengl_to_screen_space(body_joints.clone(), (1920, 1080)) # compute homogeneous coordinates and project them to 2D space - points = tgm.convert_points_to_homogeneous(body_joints) - points = camera(points).squeeze() - + #points = tgm.convert_points_to_homogeneous(body_joints) + points = pers_cam(body_joints).squeeze() + print(points[0][0]) # compute loss between 2D joint projection and OpenPose keypoints loss = loss_layer(points[torso_indices], - keypoints[torso_indices]) - + keypoints[torso_indices][:, :2]) return loss # main optimizer closure @@ -112,10 +125,10 @@ def train_orient( return loss # camera translation - R = camera.trans.detach().cpu().numpy().squeeze() + #R = camera.trans.detach().cpu().numpy().squeeze() # main optimization loop - for t in range(iterations): + for t in range(2000): loss = optimizer.step(optim_closure) # compute loss @@ -136,12 +149,14 @@ def train_orient( pbar.set_description("Error %f" % cur_loss) pbar.update(1) - if renderer is not None and render_steps: - renderer.render_model( - model=model, - model_out=pose_layer.cur_out, - transform=R - ) + # if renderer is not None and render_steps: + # renderer.render_model( + # model=model, + # model_out=pose_layer.cur_out, + # transform=R + # ) + + print("translation", pers_cam.translation) if use_progress_bar: pbar.close() diff --git a/utils/general.py b/utils/general.py index ff4555d..4f49ac8 100644 --- a/utils/general.py +++ b/utils/general.py @@ -106,7 +106,7 @@ def estimate_scale(joints, keypoints, pairs=[ smpl_height = np.linalg.norm(smpl_dists, axis=0).mean() ops_height = np.linalg.norm(ops_dists, axis=0).mean() - return cam_fy / 1080 * smpl_height / ops_height + return smpl_height / ops_height def estimate_focal_length(run_estimation: bool = False): @@ -192,7 +192,7 @@ def setup_training(model, dataset, sample_index, renderer=True, offscreen=False) est_scale = estimate_scale(joints, keypoints) # apply scaling to keypoints - keypoints = keypoints * est_scale + keypoints = keypoints # * est_scale # integrating Camera Estimation diff --git a/utils/mapping.py b/utils/mapping.py index c982af1..1c96771 100644 --- a/utils/mapping.py +++ b/utils/mapping.py @@ -278,13 +278,15 @@ def openpose_to_opengl_coords( [type]: [description] """ - points = np.array([ - [ - x / real_width * 2 - 1, - -y / real_height * 2 + 1, - 0 - ] for (x, y, z) in input_data]) + # points = np.array([ + # [ + # x / real_width * 2 - 1, + # -y / real_height * 2 + 1, + # 0 + # ] for (x, y, z) in input_data]) + points = np.array(input_data)[:, :3] + points[:, 2] = 0 conf = np.array([ z for (_, _, z) in input_data ]) @@ -292,6 +294,13 @@ def openpose_to_opengl_coords( return (points, conf) +def opengl_to_screen_space(points, size): + points[:, 0] = (points[:, 0] + 1) / 2 * size[0] + points[:, 1] = -((points[:, 1] - 1) / 2 * size[1]) + points[:, 2] = ((points[:, 2] - 1) / 2 * size[1]) + return points + + def smpl_to_openpose(print_mapping: True): """Utility for remapping smpl mapping indices to openpose mapping indices. diff --git a/utils/video.py b/utils/video.py index f745c68..8f3e16b 100644 --- a/utils/video.py +++ b/utils/video.py @@ -93,7 +93,7 @@ def save_to_video( r.render_model_geometry( faces=model_anim.faces, vertices=vertices, - pose=cam_trans # cam_transform, + pose=cam_transform # cam_transform, ) frames.append(r.get_snapshot())