From 316bc2d7e36af9bd7a843f1812214ff7e46eaf68 Mon Sep 17 00:00:00 2001
From: Wlad <9556979+gosticks@users.noreply.github.com>
Date: Tue, 23 Feb 2021 21:40:55 +0100
Subject: [PATCH] final preparations

---
 config.yaml                                   | 32 ++++----
 config_loss_off.yaml                          | 81 +++++++++++++++++++
 example_fit.py                                | 14 +++-
 example_fit_anim.py                           |  2 +-
 example_fit_temporal.py                       | 51 ++++++++++++
 example_hyper.py => example_hyper_optim.py    | 24 +++---
 example_render_video.py                       | 11 +--
 ...e_temporal.py => example_temporal_optim.py |  0
 train.py                                      | 20 +++--
 train_orient.py                               |  2 +-
 utils/video.py                                | 40 ++++++++-
 11 files changed, 232 insertions(+), 45 deletions(-)
 create mode 100644 config_loss_off.yaml
 create mode 100644 example_fit_temporal.py
 rename example_hyper.py => example_hyper_optim.py (74%)
 rename example_temporal.py => example_temporal_optim.py (100%)

diff --git a/config.yaml b/config.yaml
index dc41bfb..20c9125 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,5 +1,5 @@
 output:
-  rootDir: "./tests/20-02-21"
+  rootDir: "./presentation"
   prefix: ""
 smpl:
   modelRootDir: ./models
@@ -9,25 +9,25 @@ smpl:
   useVposerInit: false
 data:
   renameFiles: false
-  rootDir: ./samples/video02
+  rootDir: ./samples/video01
   personId: 0
-  sampleImageFormat: "frame-%%%.png"
-  sampleNameFormat: "input2_%%%%%%%%%%%%_keypoints.json"
+  sampleImageFormat: "input_%%%%%%%%%%%%_rendered.png"
+  sampleNameFormat: "input_%%%%%%%%%%%%_keypoints.json"
   sampleCoords: !!python/tuple [1080, 1080]
 camera:
   lr: 0.001
   patience: 10
   optimizer: Adam
 orientation:
-  lr: 0.01
-  optimizer: Adam
-  iterations: 100
+  lr: 0.03
+  optimizer: LBFGS
+  iterations: 5
   joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization
 pose:
   device: cuda
   lr: 0.01
   optimizer: Adam # currently supported Adam, LBFGS
-  iterations: 100
+  iterations: 150
   useCameraIntrinsics: true
   useOpenPoseConf: true # use openpose confidence to weight L2 distance loss
   bodyMeanLoss:
@@ -35,21 +35,21 @@ pose:
     weight: 0.1
   bodyPrior:
     enabled: true
-    weight: 0.01
+    weight: 0.1
   anglePrior:
     enabled: true
-    weight: 0.2
+    weight: 0.05
     # optional per joint configurations
     angleIdx: [56, 53, 12, 9, 37, 40]
-    directions: [-1, 1, -1, -1, 1, -1]
+    directions: [1, -1, -1, -1, 1, -1]
     # weights per joint
-    weights: [0.4, 0.4, 0.4, 0.4, 0.2, 0.2]
+    weights: [0.2, 0.2, 0.8, 0.8, 0.0, 0.0]
   angleLimitLoss:
     enabled: true
     weight: 0.01
   angleSumLoss:
     enabled: true
-    weight: 0.01
+    weight: 0.001
   intersectLoss:
     enabled: true
     weight: 0.5
@@ -57,14 +57,14 @@ pose:
     sigma: 0.5
   changeLoss:
     enabled: true
-    weight: 0.1
+    weight: 0.07
   confWeights:
     enabled: false
   vposerPath: "./vposer_v1_0"
   temporal:
     enabled: true
-    iterations: 75
-    lr: 0.01
+    iterations: 50
+    lr: 0.03
 preview:
   enable: true,
   keypoins:
diff --git a/config_loss_off.yaml b/config_loss_off.yaml
new file mode 100644
index 0000000..b16b4f4
--- /dev/null
+++ b/config_loss_off.yaml
@@ -0,0 +1,81 @@
+output:
+  rootDir: "./tests/final-03"
+  prefix: ""
+smpl:
+  modelRootDir: ./models
+  type: smplx # possible options smplx and smpl
+  gender: MALE # possible options MALE, FEMALE, NEUTRAL
+  ext: npz
+  useVposerInit: false
+data:
+  renameFiles: false
+  rootDir: ./samples/video01
+  personId: 0
+  sampleImageFormat: "input_%%%%%%%%%%%%_rendered.png"
+  sampleNameFormat: "input_%%%%%%%%%%%%_keypoints.json"
+  sampleCoords: !!python/tuple [1080, 1080]
+camera:
+  lr: 0.001
+  patience: 10
+  optimizer: Adam
+orientation:
+  lr: 0.03
+  optimizer: LBFGS
+  iterations: 5
+  joint_names: ["hip-left", "hip-right", "shoulder-left", "shoulder-right"] # joints to be used for optimization
+pose:
+  device: cuda
+  lr: 0.01
+  optimizer: Adam # currently supported Adam, LBFGS
+  iterations: 200
+  useCameraIntrinsics: true
+  useOpenPoseConf: false # use openpose confidence to weight L2 distance loss
+  bodyMeanLoss:
+    enabled: false
+    weight: 0.1
+  bodyPrior:
+    enabled: false
+    weight: 0.01
+  anglePrior:
+    enabled: false
+    weight: 0.2
+    # optional per joint configurations
+    angleIdx: [56, 53, 12, 9, 37, 40]
+    directions: [-1, 1, -1, -1, 1, -1]
+    # weights per joint
+    weights: [0.4, 0.4, 0.4, 0.4, 0.2, 0.2]
+  angleLimitLoss:
+    enabled: false
+    weight: 0.01
+  angleSumLoss:
+    enabled: false
+    weight: 0.001
+  intersectLoss:
+    enabled: false
+    weight: 0.5
+    maxCollisions: 8
+    sigma: 0.5
+  changeLoss:
+    enabled: true
+    weight: 0.2
+  confWeights:
+    enabled: false
+  vposerPath: "./vposer_v1_0"
+  temporal:
+    enabled: true
+    iterations: 30
+    lr: 0.01
+preview:
+  enable: true,
+  keypoins:
+    enable: true,
+    radius: 0.01
+    color: 1.0, 0.0, 1.0, 1.0
+  keypoint_torso:
+    enable: true,
+    radius: 0.01
+    color: 1.0, 0.0, 1.0, 1.0
+  joints:
+    enable: true
+    radius: 0.01
+    color: 0.0, 0.7, 0.0, 1.0
diff --git a/example_fit.py b/example_fit.py
index af84a96..6d8c37f 100644
--- a/example_fit.py
+++ b/example_fit.py
@@ -1,4 +1,5 @@
 import os
+from utils.video import make_video, make_video_with_pip
 from utils.graphs import render_loss_graph
 from train import optimize_sample
 
@@ -9,19 +10,28 @@ from dataset import SMPLyDataset
 # load and select sample
 config = load_config()
 dataset = SMPLyDataset.from_config(config=config)
-sample_index = 55
+sample_index = 150
+save_to_video = True
+
 
 if os.getenv('SAMPLE_INDEX') is not None:
     sample_index = int(os.getenv('SAMPLE_INDEX'))
 
+
 # train for pose
 pose, camera_transformation, loss_history, step_imgs, loss_components = optimize_sample(
     sample_index,
     dataset,
     config,
-    interactive=True
+    interactive=not save_to_video,
+    offscreen=save_to_video
 )
 
+if save_to_video:
+    img_path = dataset.get_image_path(sample_index)
+    make_video_with_pip(step_imgs, pip_image_path=img_path,
+                        video_name="example_fit")
+
 
 filename = get_output_path_from_conf(config) + ".png"
 render_loss_graph(
diff --git a/example_fit_anim.py b/example_fit_anim.py
index eea19fa..d1fa579 100644
--- a/example_fit_anim.py
+++ b/example_fit_anim.py
@@ -8,7 +8,7 @@ from renderer import *
 from utils.general import rename_files, get_new_filename
 
 START_IDX = 1  # starting index of the frame to optimize for
-FINISH_IDX = 200   # choose a big number to optimize for all frames in samples directory
+FINISH_IDX = 60   # choose a big number to optimize for all frames in samples directory
 # if False, only run already saved animation without optimization
 RUN_OPTIMIZATION = True
 
diff --git a/example_fit_temporal.py b/example_fit_temporal.py
new file mode 100644
index 0000000..3b2dac0
--- /dev/null
+++ b/example_fit_temporal.py
@@ -0,0 +1,51 @@
+import os
+from utils.video import make_video_with_pip
+from utils.graphs import render_loss_graph
+from train import optimize_sample
+
+# local imports
+from utils.general import get_output_path_from_conf, load_config
+from dataset import SMPLyDataset
+
+# load and select sample
+config = load_config()
+dataset = SMPLyDataset.from_config(config=config)
+sample_index = 55
+save_to_video = True
+
+if os.getenv('SAMPLE_INDEX') is not None:
+    sample_index = int(os.getenv('SAMPLE_INDEX'))
+
+# train for pose
+best_pose, camera_transformation, loss_history, step_imgs, loss_components = optimize_sample(
+    sample_index - 1,
+    dataset,
+    config,
+    interactive=False
+)
+
+config['pose']['lr'] = config['pose']['temporal']['lr']
+config['pose']['iterations'] = config['pose']['temporal']['iterations']
+
+# reuse model to train new sample
+pose_temp, camera_transformation, loss_history, step_imgs, loss_components = optimize_sample(
+    sample_index,
+    dataset,
+    config,
+    interactive=False,
+    offscreen=True,
+    initial_pose=best_pose.body_pose.detach().clone().cpu(),
+    initial_orient=best_pose.global_orient.detach().clone().cpu()
+)
+
+if save_to_video:
+    img_path = dataset.get_image_path(sample_index)
+    make_video_with_pip(step_imgs, pip_image_path=img_path,
+                        video_name="example_fit_temporal")
+
+filename = get_output_path_from_conf(config) + ".png"
+render_loss_graph(
+    loss_history=loss_history,
+    loss_components=loss_components,
+    save=True,
+    filename=filename)
diff --git a/example_hyper.py b/example_hyper_optim.py
similarity index 74%
rename from example_hyper.py
rename to example_hyper_optim.py
index daae91a..61012ba 100644
--- a/example_hyper.py
+++ b/example_hyper_optim.py
@@ -12,8 +12,8 @@ from utils.general import *
 from renderer import *
 from utils.general import rename_files, get_new_filename
 
-START_IDX = 00  # starting index of the frame to optimize for
-FINISH_IDX = 20   # choose a big number to optimize for all frames in samples
+START_IDX = 140  # starting index of the frame to optimize for
+FINISH_IDX = 300   # choose a big number to optimize for all frames in samples
 
 device = torch.device('cpu')
 dtype = torch.float32
@@ -31,7 +31,7 @@ def run_test(config):
         FINISH_IDX,
         verbose=False,
         offscreen=False,
-        save_to_file=False,
+        save_to_file=True,
         interpolate=False
     )
     video_name = getfilename_from_conf(
@@ -49,24 +49,28 @@ def run_test(config):
 
     save_to_video(
         model_outs, video_name, config,
+        start_frame_offset=START_IDX,
         dataset=dataset, interpolation_target=60
     )
 
 
 def run_pose_tests(config):
-    priors_types = ['bodyPrior', 'anglePrior', 'angleSumLoss', 'temporal']
+    priors_types = ['bodyPrior', 'anglePrior',
+                    'angleSumLoss', 'temporal', 'intersectLoss', 'changeLoss']
     l = [False, True]
     permutations = [list(i)
                     for i in itertools.product(l, repeat=len(priors_types))]
 
-    lr_steps = [0.01, 0.02, 0.03, 0.04, 0.05,
-                0.07, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5]
+    lr_steps = [0.01]
+    total_runs = len(permutations) * len(lr_steps)
+    run_num = 1
     for lr in lr_steps:
         print("running test with lr:", lr)
         config['pose']['lr'] = lr
 
         for p in permutations:
-            print("running test:", config['pose']['optimizer'])
+            print("running test ", "(" + str(run_num) + "/" + str(total_runs) + "):",
+                  config['pose']['optimizer'])
             # iterate over all permutations and update config
             for i, v in enumerate(p):
                 config['pose'][priors_types[i]]['enabled'] = v
@@ -79,7 +83,7 @@ print("training: Adam")
 # run tests for adam
 run_pose_tests(config)
 
-print("training: LBFGS")
+# print("training: LBFGS")
 # try the same with lbfgs
-config = load_config("./config.lbfgs.temporal.yaml")
-run_pose_tests(config)
+# config = load_config("./config.lbfgs.temporal.yaml")
+# run_pose_tests(config)
diff --git a/example_render_video.py b/example_render_video.py
index 6961d4f..7086f58 100644
--- a/example_render_video.py
+++ b/example_render_video.py
@@ -8,8 +8,8 @@ from utils.general import *
 from renderer import *
 from utils.general import rename_files, get_new_filename
 
-START_IDX = 1  # starting index of the frame to optimize for
-FINISH_IDX = None  # choose a big number to optimize for all frames in samples directory
+START_IDX = 150  # starting index of the frame to optimize for
+FINISH_IDX = 300  # choose a big number to optimize for all frames in samples directory
 
 result_image = []
 idx = START_IDX
@@ -24,13 +24,14 @@ model_outs, filename = create_animation(
     START_IDX,
     FINISH_IDX,
     verbose=False,
-    offscreen=True,
+    offscreen=False,
     save_to_file=False,
     interpolate=False
 )
 
-video_name = getfilename_from_conf(
+video_name = get_output_path_from_conf(
     config) + "-" + str(START_IDX) + "-" + str(FINISH_IDX)
 
 save_to_video(model_outs, video_name, config,
-              dataset=dataset, interpolation_target=60)
+              start_frame_offset=START_IDX,
+              dataset=dataset)
diff --git a/example_temporal.py b/example_temporal_optim.py
similarity index 100%
rename from example_temporal.py
rename to example_temporal_optim.py
diff --git a/train.py b/train.py
index e13e48f..6ecf588 100644
--- a/train.py
+++ b/train.py
@@ -18,12 +18,12 @@ from utils.video import interpolate_poses
 from camera_estimation import TorchCameraEstimate
 
 
-def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), dtype=torch.float32, interactive=True, offscreen=False, verbose=True, initial_pose=None):
+def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), dtype=torch.float32, interactive=True, offscreen=False, verbose=True, initial_pose=None, initial_orient=None):
     # prepare data and SMPL model
     model = SMPLyModel.model_from_conf(config)
     init_keypoints, init_joints, keypoints, conf, est_scale, r, img_path = setup_training(
         model=model,
-        renderer=interactive,
+        renderer=(interactive or offscreen),
         dataset=dataset,
         sample_index=sample_index,
         offscreen=offscreen
@@ -69,9 +69,10 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
 
     params = defaultdict(
         body_pose=initial_pose,
+        global_orient=initial_orient
     )
-
-    model(**params)
+    with torch.no_grad():
+        model(**params)
 
     # apply transform to scene
     if r is not None:
@@ -88,6 +89,11 @@ def optimize_sample(sample_index, dataset, config, device=torch.device('cpu'), d
         render_steps=(offscreen or interactive)
     )
 
+    # FIXME: there seems to some form of projection issue and hence the orientation is missestimating the angle
+    # with torch.no_grad():
+    # .to(device=device, dtype=dtype)
+    #    model.global_orient[0][1] = -model.global_orient[0][1]
+
     # train for pose
     best_out, loss_history, step_imgs, loss_components = train_pose_with_conf(
         config=config,
@@ -118,6 +124,7 @@ def create_animation(dataset, config, start_idx=0, end_idx=None, offscreen=False
         end_idx = len(dataset) - 1
 
     initial_pose = None
+    initial_orient = None
 
     for idx in trange(end_idx - start_idx, desc='Optimizing'):
         idx = start_idx + idx
@@ -133,8 +140,8 @@ def create_animation(dataset, config, start_idx=0, end_idx=None, offscreen=False
             verbose=verbose,
             offscreen=offscreen,
             interactive=verbose,
-            initial_pose=initial_pose
-        )
+            initial_pose=initial_pose,
+            initial_orient=initial_orient)
 
         if verbose:
             print("Optimization of", idx, "frames finished")
@@ -152,6 +159,7 @@ def create_animation(dataset, config, start_idx=0, end_idx=None, offscreen=False
 
         if use_temporal_data:
             initial_pose = best_out.body_pose.detach().clone().cpu()  # .to(device=device)
+            initial_orient = best_out.global_orient.detach().clone().cpu()
 
     if interpolate:
         model_outs = interpolate_poses(model_outs)
diff --git a/train_orient.py b/train_orient.py
index f70d414..55924f5 100644
--- a/train_orient.py
+++ b/train_orient.py
@@ -176,4 +176,4 @@ def train_orient_with_conf(
         use_progress_bar=use_progress_bar,
     )
 
-    return best_output.global_orient
+    return best_output.global_orient.detach().clone().cpu()
diff --git a/utils/video.py b/utils/video.py
index 2517989..f745c68 100644
--- a/utils/video.py
+++ b/utils/video.py
@@ -48,7 +48,8 @@ def save_to_video(
     config: object,
     fps=30,
     include_thumbnail=True,
-    thumbnail_size=0.25,
+    thumbnail_size=0.2,
+    start_frame_offset=0,
     dataset: SMPLyDataset = None,
     interpolation_target=None
 ):
@@ -76,9 +77,14 @@ def save_to_video(
         inter_ratio = int(interpolation_target / fps)
         num_intermediate = inter_ratio - 1
         sample_output = interpolate_poses(sample_output, num_intermediate)
-
+    else:
+        sample_output = [
+            (
+                out.vertices.detach().cpu().numpy()[0],
+                cam
+            ) for out, cam in sample_output]
     frames = []
-    print("[export] rendering animation frames...")
+    print("[export] rendering animation frames...", sample_output[0][0].shape)
 
     # just use the first transform
     cam_transform = sample_output[0][1]
@@ -87,7 +93,7 @@ def save_to_video(
         r.render_model_geometry(
             faces=model_anim.faces,
             vertices=vertices,
-            pose=cam_transform,
+            pose=cam_trans  # cam_transform,
         )
         frames.append(r.get_snapshot())
 
@@ -98,6 +104,8 @@ def save_to_video(
     def post_process_frame(img, idx: int):
         if not include_thumbnail:
             return img
+        # account for start from frames not zero
+        idx = start_frame_offset + idx
         frame_idx = idx
         if interpolation_target is not None:
             # account for possible interpolation
@@ -122,6 +130,30 @@ def save_to_video(
                post_process_frame=post_process_frame)
 
 
+def make_video_with_pip(frames, pip_image_path, video_name: str, fps=30, ext: str = "mp4", image_size=0.2):
+    """renders a video with a pip frame in the corner
+    """
+
+    def post_process_frame(img, idx: int):
+        overlay = cv2.imread(pip_image_path)
+
+        if overlay is None:
+            print("[error] image could not be ", pip_image_path)
+            return img
+
+        overlay = cv2.resize(
+            overlay,
+            dsize=(
+                int(overlay.shape[1] * image_size),
+                int(overlay.shape[0] * image_size)
+            ))
+        img[0:overlay.shape[0], 0:overlay.shape[1]] = overlay
+        return img
+
+    make_video(frames, video_name, fps,
+               post_process_frame=post_process_frame)
+
+
 def interpolate_poses(poses, num_intermediate=5):
     """
     Interpolate vertices and cameras between pairs of frames by adding intermediate results