diff --git a/.gitignore b/.gitignore index bb51d5f..71b8591 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ __pycache__/ *.out output data/*/data +*.tar +*.pth # Distribution / packaging diff --git a/README.md b/README.md index 2e7aae3..ad09b9f 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,13 @@ This repository is the offical [Pytorch](https://pytorch.org/) implementation of **[HandOccNet: Occlusion-Robust 3D Hand Mesh Estimation Network (CVPR 2022)](https://arxiv.org/abs/2203.14564)**. Below is the overall pipeline of HandOccNet. ![overall pipeline](./asset/model.png) +## Web demo +:rocket: **Run model through Replicate's web demo here**: + +[Demo and Docker image on Replicate](https://replicate.com/namepllet/3d-hand-estimation) + + + ## Quick demo * Install **[PyTorch](https://pytorch.org)** and Python >= 3.7.4 and run `sh requirements.sh`. * Download `snapshot_demo.pth.tar` from [here](https://drive.google.com/drive/folders/1OlyV-qbzOmtQYdzV6dbQX4OtAU5ajBOa?usp=sharing) and place at `demo` folder. diff --git a/cog.yaml b/cog.yaml new file mode 100644 index 0000000..5cc621f --- /dev/null +++ b/cog.yaml @@ -0,0 +1,31 @@ +# Configuration for Cog ⚙️ +# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md + +build: + # set to true if your model requires a GPU + gpu: true + + system_packages: + - "libosmesa6-dev" + - "freeglut3-dev" + + # python version in the form '3.8' or '3.8.12' + python_version: "3.8" + + python_packages: + - "numpy==1.17.4" + - "torch==1.9.1" + - "torchvision==0.10.1" + - "tqdm==4.64.0" + - "pycocotools==2.0.4" + - "pyrender==0.1.45" + - "chumpy==0.70" + - "einops==0.4.1" + - "opencv-python-headless==4.6.0.66" + # commands run after the environment is setup + run: + - "echo Done setting up environment!" + + +# predict.py defines how predictions are run on your model +predict: "demo/predict.py:Predictor" diff --git a/demo/predict.py b/demo/predict.py new file mode 100644 index 0000000..a6ff751 --- /dev/null +++ b/demo/predict.py @@ -0,0 +1,117 @@ +""" Generates 3D model of hand given image with possible occlusions""" +# Prediction interface for Cog ⚙️ +# https://github.com/replicate/cog/blob/main/docs/python.md +import argparse +import os +import os.path as osp +import sys +import tempfile +import zipfile + +import cv2 +import numpy as np +import torch +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +from cog import BaseModel, BasePredictor, Input, Path +from torch.nn.parallel.data_parallel import DataParallel + +sys.path.insert(0, osp.join("main")) +sys.path.insert(0, osp.join("common")) + +from config import cfg +from model import get_model +from utils.mano import MANO +from utils.preprocessing import generate_patch_image, load_img, process_bbox +from utils.vis import save_obj + + +class Output(BaseModel): + bbox_img: Path + obj_model: Path + + +class Predictor(BasePredictor): + def setup(self): + """Load the model into memory to make running multiple predictions efficient""" + + self.mano = MANO() + + # argument parsing + gpu = "0" + cfg.set_args(gpu) + cudnn.benchmark = True + + # snapshot load + model_path = "./demo/snapshot_demo.pth.tar" + assert osp.exists(model_path), "Cannot find model at " + model_path + print("Load checkpoint from {}".format(model_path)) + + # get model + self.model = get_model("test") + self.model = DataParallel(self.model).cuda() + ckpt = torch.load(model_path) + self.model.load_state_dict(ckpt["network"], strict=False) + self.model.eval() + + # prepare input image + self.transform = transforms.ToTensor() + + def predict( + self, + image: Path = Input(description="Input image"), + bbox_coords: str = Input( + description="Input comma-separated bounding box coordinates of hand (xmin,ymin,width,height)" + ) + ) -> Output: + img_path = str(image) + + original_img = load_img(img_path) + original_img_height, original_img_width = original_img.shape[:2] + # prepare bbox + + print("Preprocessing bounding boxes.......") + bbox = bbox_coords.split(",") + bbox = [float(i) for i in bbox] # xmin, ymin, width, height + + bbox = process_bbox(bbox, original_img_width, original_img_height) + img, img2bb_trans, bb2img_trans = generate_patch_image( + original_img, bbox, 1.0, 0.0, False, cfg.input_img_shape + ) + img = self.transform(img.astype(np.float32)) / 255 + img = img.cuda()[None, :, :, :] + + # forward + print("Running model inference.......") + inputs = {"img": img} + targets = {} + meta_info = {} + with torch.no_grad(): + out = self.model(inputs, targets, meta_info, "test") + img = (img[0].cpu().numpy().transpose(1, 2, 0) * 255).astype( + np.uint8 + ) # cfg.input_img_shape[1], cfg.input_img_shape[0], 3 + verts_out = out["mesh_coord_cam"][0].cpu().numpy() + + # bbox for input hand image + bbox_vis = np.array(bbox, int) + bbox_vis[2:] += bbox_vis[:2] + cvimg = cv2.rectangle( + original_img.copy(), bbox_vis[:2], bbox_vis[2:], (255, 0, 0), 3 + ) + + print("Generating outputs.......") + # save hand image with bbox + bbox_path = Path(tempfile.mkdtemp()) / "hand_bbox.png" + cv2.imwrite(str(bbox_path), cvimg[:, :, ::-1]) + + # save mesh + zip_path = Path(tempfile.mkdtemp()) / "hand_model_3d.zip" + obj_path = 'hand_model_3d.obj' + save_obj(verts_out * np.array([1, -1, -1]), self.mano.face, obj_path) + + print('Zipping .obj file......') + with zipfile.ZipFile(str(zip_path), "w") as zip_obj: + zip_obj.write(obj_path) + + return Output(bbox_img=bbox_path, obj_model=zip_path)