mirror of
https://github.com/ente-io/ente.git
synced 2025-07-03 05:56:17 +00:00
1091 lines
37 KiB
Plaintext
1091 lines
37 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Prepping YOLOv5Face model for use in Ente\n",
|
||
"\n",
|
||
"[Paper](https://arxiv.org/abs/2105.12931) | [Github](https://github.com/deepcam-cn/yolov5-face)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Setting up Pytorch weights and source code"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Please manually put the Pytorch .pt weights in the `pytorch_weights` directory. "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model_weights_path = \"pytorch_weights/yolov5s_face.pt\"\n",
|
||
"models_path = \"onnx_models/\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"!mkdir yoloface_repo\n",
|
||
"%cd yoloface_repo\n",
|
||
"!git clone https://github.com/deepcam-cn/yolov5-face.git\n",
|
||
"%cd ..\n",
|
||
"!cp -r yoloface_repo/yolov5-face/models/ models/\n",
|
||
"!cp -r yoloface_repo/yolov5-face/utils/ utils/\n",
|
||
"!rm -rf yoloface_repo"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Imports"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Libraries\n",
|
||
"import torch\n",
|
||
"import torch.nn as nn\n",
|
||
"from PIL import Image\n",
|
||
"import json\n",
|
||
"import numpy as np\n",
|
||
"import onnx\n",
|
||
"import onnxruntime as ort\n",
|
||
"print(ort.__version__)\n",
|
||
"\n",
|
||
"# Source code\n",
|
||
"from models.common import Conv, ShuffleV2Block\n",
|
||
"from models.experimental import attempt_load\n",
|
||
"from utils.activations import Hardswish, SiLU\n",
|
||
"from utils.general import set_logging, check_img_size"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Export to ONNX"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"onnx_opset = 18\n",
|
||
"img_size = [640, 640]\n",
|
||
"batch_size = 1\n",
|
||
"dynamic_shapes = False\n",
|
||
"\n",
|
||
"# Load PyTorch model\n",
|
||
"model = attempt_load(\n",
|
||
" model_weights_path, map_location=torch.device(\"cpu\")\n",
|
||
") # load FP32 model\n",
|
||
"delattr(model.model[-1], \"anchor_grid\")\n",
|
||
"model.model[-1].anchor_grid = [\n",
|
||
" torch.zeros(1)\n",
|
||
"] * 3 # nl=3 number of detection layers\n",
|
||
"model.model[-1].export_cat = True\n",
|
||
"model.eval()\n",
|
||
"labels = model.names\n",
|
||
"\n",
|
||
"# Checks\n",
|
||
"gs = int(max(model.stride)) # grid size (max stride)\n",
|
||
"img_size = [\n",
|
||
" check_img_size(x, gs) for x in img_size\n",
|
||
"] # verify img_size are gs-multiples\n",
|
||
"\n",
|
||
"# Test input\n",
|
||
"img = torch.zeros(batch_size, 3, *img_size)\n",
|
||
"\n",
|
||
"# Update model\n",
|
||
"for k, m in model.named_modules():\n",
|
||
" m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility\n",
|
||
" if isinstance(m, Conv): # assign export-friendly activations\n",
|
||
" if isinstance(m.act, nn.Hardswish):\n",
|
||
" m.act = Hardswish()\n",
|
||
" elif isinstance(m.act, nn.SiLU):\n",
|
||
" m.act = SiLU()\n",
|
||
" if isinstance(m, ShuffleV2Block): # shufflenet block nn.SiLU\n",
|
||
" for i in range(len(m.branch1)):\n",
|
||
" if isinstance(m.branch1[i], nn.SiLU):\n",
|
||
" m.branch1[i] = SiLU()\n",
|
||
" for i in range(len(m.branch2)):\n",
|
||
" if isinstance(m.branch2[i], nn.SiLU):\n",
|
||
" m.branch2[i] = SiLU()\n",
|
||
"y = model(img) # dry run\n",
|
||
"\n",
|
||
"# ONNX export\n",
|
||
"print(\"\\nStarting ONNX export with onnx %s...\" % onnx.__version__)\n",
|
||
"onnx_model_export_path = models_path + model_weights_path.replace(\".pt\", \".onnx\").split('/')[-1]\n",
|
||
"model.fuse() \n",
|
||
"input_names = [\"input\"]\n",
|
||
"output_names = [\"output\"]\n",
|
||
"torch.onnx.export(\n",
|
||
" model,\n",
|
||
" img,\n",
|
||
" onnx_model_export_path,\n",
|
||
" verbose=False,\n",
|
||
" opset_version=onnx_opset,\n",
|
||
" input_names=input_names,\n",
|
||
" output_names=output_names,\n",
|
||
" dynamic_axes=(\n",
|
||
" {\"input\": {0: \"batch\"}, \"output\": {0: \"batch\"}} if dynamic_shapes else None\n",
|
||
" ),\n",
|
||
")\n",
|
||
"\n",
|
||
"# Checks\n",
|
||
"onnx_model = onnx.load(onnx_model_export_path) # load onnx model\n",
|
||
"onnx.checker.check_model(onnx_model) # check onnx model\n",
|
||
"\n",
|
||
"# onnx infer\n",
|
||
"providers = [\"CPUExecutionProvider\"]\n",
|
||
"session = ort.InferenceSession(onnx_model_export_path, providers=providers)\n",
|
||
"im = img.cpu().numpy().astype(np.float32) # torch to numpy\n",
|
||
"y_onnx = session.run(\n",
|
||
" [session.get_outputs()[0].name], {session.get_inputs()[0].name: im}\n",
|
||
")[0]\n",
|
||
"print(\"pred's shape is \", y_onnx.shape)\n",
|
||
"print(\"max(|torch_pred - onnx_pred|) =\", abs(y.cpu().numpy() - y_onnx).max())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"!rm -rf models/\n",
|
||
"!rm -rf utils/"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Altering ONNX model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Add preprocessing inside model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from onnxruntime_extensions.tools.pre_post_processing import PrePostProcessor, create_named_value, Resize, ImageBytesToFloat, Unsqueeze, Debug, LetterBox, ChannelsLastToChannelsFirst"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"inputs = [create_named_value(\"input_to_preprocess\", onnx.TensorProto.UINT8, [\"H\", \"W\", \"C\"])]\n",
|
||
"\n",
|
||
"pipeline = PrePostProcessor(inputs, onnx_opset)\n",
|
||
"\n",
|
||
"pipeline.add_pre_processing(\n",
|
||
" [\n",
|
||
" Resize(640, layout= \"HWC\", policy=\"not_larger\"), # Resize to 640, maintaining aspect ratio and letting largest dimension not exceed 640 (so smaller dimension will be <= 640)\n",
|
||
" # Debug(),\n",
|
||
" LetterBox((640, 640), layout=\"HWC\", fill_value=114), # Add padding to make the image actually always 640x640,\n",
|
||
" ChannelsLastToChannelsFirst(), # Convert to CHW\n",
|
||
" # Debug(),\n",
|
||
" ImageBytesToFloat(), # Convert to float in range 0..1 by dividing uint8 values by 255\n",
|
||
" # Debug(),\n",
|
||
" Unsqueeze([0]), # add batch, CHW --> 1CHW\n",
|
||
" # Debug(),\n",
|
||
" ]\n",
|
||
")\n",
|
||
"\n",
|
||
"# pipeline.add_post_processing()\n",
|
||
"onnx_model_prepro = pipeline.run(onnx_model)\n",
|
||
"onnx.checker.check_model(onnx_model_prepro)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"To debug and visually inspect the preprocessing, please uncomment the `Debug()` statements in above block and run it again, and then uncomment and run the code in the block below:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# onnx.save(onnx_model_prepro, \"yolov5s_face_prepro.onnx\")\n",
|
||
"\n",
|
||
"# image_singapore = Image.open(\"../data/singapore.jpg\").convert('RGB')\n",
|
||
"# image_singapore_onnx = np.array(image_singapore)\n",
|
||
"# print(image_singapore_onnx.shape)\n",
|
||
"# print(type(image_singapore_onnx))\n",
|
||
"# print(image_singapore_onnx.dtype)\n",
|
||
"\n",
|
||
"# ort_session = ort.InferenceSession(\"yolov5s_face_prepro.onnx\")\n",
|
||
"# test = ort_session.run(None, {\"input_to_preprocess\": image_singapore_onnx})\n",
|
||
"\n",
|
||
"# preprocessed = test[4]\n",
|
||
"# print(preprocessed.shape)\n",
|
||
"# print(type(preprocessed))\n",
|
||
"\n",
|
||
"# # import matplotlib#.pyplot as plt\n",
|
||
"# from IPython.display import display\n",
|
||
"# # matplotlib.use('TkAgg')\n",
|
||
"\n",
|
||
"# displayable_array = preprocessed.reshape(3, 640, 640).transpose((1, 2, 0))\n",
|
||
"# # Display the image\n",
|
||
"# # matplotlib.pyplot.imshow(displayable_array)\n",
|
||
"# # matplotlib.pyplot.axis('off') \n",
|
||
"# # matplotlib.pyplot.show()\n",
|
||
"# display(Image.fromarray((displayable_array * 255).astype(np.uint8)))\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Add slice operator for use of RGBA input"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Create a new input with flexible channel dimension\n",
|
||
"new_input = onnx.helper.make_tensor_value_info(\n",
|
||
" name=\"input\",\n",
|
||
" elem_type=onnx.TensorProto.UINT8,\n",
|
||
" shape=[\"H\", \"W\", \"C\"], \n",
|
||
")\n",
|
||
"\n",
|
||
"# Create constant tensors for starts, ends, and axes and use them to create a Slice node\n",
|
||
"starts_tensor = onnx.helper.make_tensor(\n",
|
||
" name=\"starts\",\n",
|
||
" data_type=onnx.TensorProto.INT64,\n",
|
||
" dims=[1],\n",
|
||
" vals=np.array([0], dtype=np.int64)\n",
|
||
")\n",
|
||
"ends_tensor = onnx.helper.make_tensor(\n",
|
||
" name=\"ends\",\n",
|
||
" data_type=onnx.TensorProto.INT64,\n",
|
||
" dims=[1],\n",
|
||
" vals=np.array([3], dtype=np.int64)\n",
|
||
")\n",
|
||
"axes_tensor = onnx.helper.make_tensor(\n",
|
||
" name=\"axes\",\n",
|
||
" data_type=onnx.TensorProto.INT64,\n",
|
||
" dims=[1],\n",
|
||
" vals=np.array([2], dtype=np.int64)\n",
|
||
")\n",
|
||
"slice_node = onnx.helper.make_node(\n",
|
||
" \"Slice\",\n",
|
||
" inputs=[\"input\", \"starts\", \"ends\", \"axes\"],\n",
|
||
" outputs=[\"sliced_input\"],\n",
|
||
" name=\"slice_rgba_input_node\",\n",
|
||
")\n",
|
||
"# Combine initializers\n",
|
||
"initializers = [starts_tensor, ends_tensor, axes_tensor] + list(onnx_model_prepro.graph.initializer)\n",
|
||
"\n",
|
||
"# Get the name of the original input\n",
|
||
"original_input_name = onnx_model_prepro.graph.input[0].name\n",
|
||
"\n",
|
||
"# Make new graph by adding the new input and Slice node to the old graph\n",
|
||
"graph = onnx.helper.make_graph(\n",
|
||
" [slice_node] + list(onnx_model_prepro.graph.node), # Prepend Slice node to existing nodes\n",
|
||
" onnx_model_prepro.graph.name,\n",
|
||
" [new_input] + list(onnx_model_prepro.graph.input)[1:], # Replace first input, keep others\n",
|
||
" list(onnx_model_prepro.graph.output),\n",
|
||
" initializer=initializers,\n",
|
||
" value_info=onnx_model_prepro.graph.value_info,\n",
|
||
")\n",
|
||
"\n",
|
||
"# Create the new model\n",
|
||
"onnx_model_rgba = onnx.helper.make_model(\n",
|
||
" graph,\n",
|
||
" opset_imports=[onnx.helper.make_opsetid(\"\", onnx_opset)]\n",
|
||
")\n",
|
||
"\n",
|
||
"# Update the input names in the rest of the model\n",
|
||
"for node in onnx_model_rgba.graph.node:\n",
|
||
" for i, input_name in enumerate(node.input):\n",
|
||
" if input_name == original_input_name:\n",
|
||
" node.input[i] = \"sliced_input\"\n",
|
||
"\n",
|
||
"# Save the new model\n",
|
||
"onnx.checker.check_model(onnx_model_rgba)\n",
|
||
"onnx_model_rgba_path = onnx_model_export_path[:-5] + \"_rgba.onnx\"\n",
|
||
"onnx.save(onnx_model_rgba, onnx_model_rgba_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# image = Image.open(\"../data/man.jpeg\").convert('RGBA')\n",
|
||
"# image_onnx = np.array(image)\n",
|
||
"# print(image_onnx.shape)\n",
|
||
"# print(type(image_onnx))\n",
|
||
"# print(image_onnx.dtype)\n",
|
||
"\n",
|
||
"# ort_session = ort.InferenceSession(\"yolov5s_face_rgba.onnx\")\n",
|
||
"# test = ort_session.run(None, {\"input\": image_onnx})\n",
|
||
"# print(test[0].shape)\n",
|
||
"# scores_output = test[0][:,:,4]\n",
|
||
"# print(f\"Highest score: {scores_output.max()}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Add post-processing inside the model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's first rename the output of the model so we can name the post-processed output as `output`.\n",
|
||
"Then we have to split `[1, 25200, 16]` into `[1, 25200, 4]`, `[1, 25200, 1]`, `[1, 25200, 11]` (i.e. `[1, detections, bbox]`, `[1, detections, score]`, `[1, detections, landmarks]`) named `boxes`, `scores`, `masks`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Add a Split operator at the end so that it be used with the SelectBestBoundingBoxesByNMS operator\n",
|
||
"num_det = 25200\n",
|
||
"graph = onnx_model_rgba.graph\n",
|
||
"\n",
|
||
"# Let's first rename the output of the model so we can name the post-processed output as `output`\n",
|
||
"for node in onnx_model_rgba.graph.node:\n",
|
||
" for i, output_name in enumerate(node.output):\n",
|
||
" if output_name == \"output\":\n",
|
||
" node.output[i] = \"og_output\"\n",
|
||
"og_output = onnx.helper.make_tensor_value_info(\n",
|
||
" name=\"og_output\",\n",
|
||
" elem_type=onnx.TensorProto.FLOAT,\n",
|
||
" shape=[1, num_det, 16], \n",
|
||
")\n",
|
||
"\n",
|
||
"# Create the split node\n",
|
||
"boxes_output = onnx.helper.make_tensor_value_info(\n",
|
||
" name=\"boxes_unsqueezed\",\n",
|
||
" elem_type=onnx.TensorProto.FLOAT,\n",
|
||
" shape=[1, num_det, 4], \n",
|
||
")\n",
|
||
"scores_output = onnx.helper.make_tensor_value_info(\n",
|
||
" name=\"scores_unsqueezed\",\n",
|
||
" elem_type=onnx.TensorProto.FLOAT,\n",
|
||
" shape=[1, num_det, 1], \n",
|
||
")\n",
|
||
"masks_output = onnx.helper.make_tensor_value_info(\n",
|
||
" name=\"masks_unsqueezed\",\n",
|
||
" elem_type=onnx.TensorProto.FLOAT,\n",
|
||
" shape=[1, num_det, 11], \n",
|
||
")\n",
|
||
"splits_tensor = onnx.helper.make_tensor(\n",
|
||
" name=\"splits\",\n",
|
||
" data_type=onnx.TensorProto.INT64,\n",
|
||
" dims=[3],\n",
|
||
" vals=np.array([4, 1, 11], dtype=np.int64)\n",
|
||
")\n",
|
||
"split_node = onnx.helper.make_node(\n",
|
||
" \"Split\",\n",
|
||
" inputs=[\"og_output\", \"splits\"],\n",
|
||
" outputs=[\"boxes_unsqueezed\", \"scores_unsqueezed\", \"masks_unsqueezed\"],\n",
|
||
" name=\"split_og_output\",\n",
|
||
" axis=2,\n",
|
||
")\n",
|
||
"\n",
|
||
"# Combine initializers\n",
|
||
"initializers = list(graph.initializer) + [splits_tensor]\n",
|
||
"\n",
|
||
"# Make new graph by adding the new outputs and Split node to the old graph\n",
|
||
"graph = onnx.helper.make_graph(\n",
|
||
" list(graph.node) + [split_node], # Append split node to existing nodes\n",
|
||
" graph.name,\n",
|
||
" list(graph.input), \n",
|
||
" [boxes_output, scores_output, masks_output],\n",
|
||
" initializer=initializers,\n",
|
||
" value_info=graph.value_info,\n",
|
||
")\n",
|
||
"\n",
|
||
"# Create the new model\n",
|
||
"onnx_model_split = onnx.helper.make_model(\n",
|
||
" graph,\n",
|
||
" opset_imports=[onnx.helper.make_opsetid(\"\", onnx_opset)]\n",
|
||
")\n",
|
||
"\n",
|
||
"# Save the new model\n",
|
||
"onnx.checker.check_model(onnx_model_split)\n",
|
||
"onnx_model_split_path = onnx_model_export_path[:-5] + \"_split.onnx\"\n",
|
||
"onnx.save(onnx_model_split, onnx_model_split_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Now we can run NMS on these splitted outputs using `NonMaxSuppression` operator"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"num_det = 25200\n",
|
||
"graph = onnx_model_split.graph\n",
|
||
"nodes = list(graph.node)\n",
|
||
"outputs = list(graph.output)\n",
|
||
"initializers = list(graph.initializer)\n",
|
||
"original_output = graph.output[0]\n",
|
||
"\n",
|
||
"# Create the Transpose node for the scores (since NMS requires the scores to be in the middle dimension for some reason)\n",
|
||
"transpose_node_score = onnx.helper.make_node(\n",
|
||
" \"Transpose\",\n",
|
||
" inputs=[\"scores_unsqueezed\"],\n",
|
||
" outputs=[\"scores_transposed\"],\n",
|
||
" name=\"transpose_scores\",\n",
|
||
" perm=[0, 2, 1],\n",
|
||
")\n",
|
||
"nodes.append(transpose_node_score)\n",
|
||
"\n",
|
||
"# Create the NMS node\n",
|
||
"nms_indices = onnx.helper.make_tensor_value_info(\"nms_indices\", onnx.TensorProto.INT64, shape=[\"detections\", 3])\n",
|
||
"max_output = onnx.helper.make_tensor(\"max_output\",onnx.TensorProto.INT64, [1], np.array([100], dtype=np.int64))\n",
|
||
"iou_threshold = onnx.helper.make_tensor(\"iou_threshold\",onnx.TensorProto.FLOAT, [1], np.array([0.4], dtype=np.float32))\n",
|
||
"score_threshold = onnx.helper.make_tensor(\"score_threshold\",onnx.TensorProto.FLOAT, [1], np.array([0.6], dtype=np.float32))\n",
|
||
"initializers = initializers + [max_output, iou_threshold, score_threshold]\n",
|
||
"nms_node = onnx.helper.make_node(\n",
|
||
" \"NonMaxSuppression\",\n",
|
||
" inputs=[\"boxes_unsqueezed\", \"scores_transposed\", \"max_output\", \"iou_threshold\", \"score_threshold\"],\n",
|
||
" outputs=[\"nms_indices\"],\n",
|
||
" name=\"perform_nms\",\n",
|
||
" center_point_box=1,\n",
|
||
")\n",
|
||
"nodes.append(nms_node)\n",
|
||
"outputs.append(nms_indices)\n",
|
||
"\n",
|
||
"# Make new graph by adding the new outputs and Split node to the old graph\n",
|
||
"graph = onnx.helper.make_graph(\n",
|
||
" nodes,\n",
|
||
" graph.name,\n",
|
||
" list(graph.input), \n",
|
||
" outputs,\n",
|
||
" initializer=initializers,\n",
|
||
" value_info=graph.value_info,\n",
|
||
")\n",
|
||
"\n",
|
||
"# Create the new model\n",
|
||
"onnx_model_nms = onnx.helper.make_model(\n",
|
||
" graph,\n",
|
||
" opset_imports=[onnx.helper.make_opsetid(\"\", onnx_opset)]\n",
|
||
")\n",
|
||
"\n",
|
||
"# Save the new model\n",
|
||
"onnx.checker.check_model(onnx_model_nms)\n",
|
||
"onnx_model_nms_path = onnx_model_export_path[:-5] + \"_nms.onnx\"\n",
|
||
"onnx.save(onnx_model_nms, onnx_model_nms_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# image = Image.open(\"../data/man.jpeg\").convert('RGBA')\n",
|
||
"# image_onnx = np.array(image)\n",
|
||
"\n",
|
||
"# ort_session = ort.InferenceSession(\"yolov5s_face_nms.onnx\")\n",
|
||
"# test = ort_session.run(None, {\"input\": image_onnx})\n",
|
||
"# print(test[3].shape)\n",
|
||
"# print(test[3])\n",
|
||
"# print(test[1][0, 24129, 0])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Now we need to add some Squeeze, Slice and Gather nodes so handle the NMS given indices properly. The goal is that the final output is a very simple array of shape `(detections, 16)` of only the relevant detections."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"num_det = 25200\n",
|
||
"graph = onnx_model_nms.graph\n",
|
||
"nodes = list(graph.node)\n",
|
||
"outputs = list(graph.output)\n",
|
||
"initializers = list(graph.initializer)\n",
|
||
"original_output = graph.output[0]\n",
|
||
"\n",
|
||
"# Create Slide node to slice the NMS indices from (detections, 3) to (detections, 1) by taking the third column\n",
|
||
"sliced_indices = onnx.helper.make_tensor_value_info(\"sliced_indices\", onnx.TensorProto.INT64, shape=[\"detections\", 1])\n",
|
||
"outputs.append(sliced_indices)\n",
|
||
"starts_slice_tensor = onnx.helper.make_tensor(\n",
|
||
" name=\"starts_slice_tensor\",\n",
|
||
" data_type=onnx.TensorProto.INT64,\n",
|
||
" dims=[1],\n",
|
||
" vals=np.array([2], dtype=np.int64)\n",
|
||
")\n",
|
||
"ends_slice_tensor = onnx.helper.make_tensor(\n",
|
||
" name=\"ends_slice_tensor\",\n",
|
||
" data_type=onnx.TensorProto.INT64,\n",
|
||
" dims=[1],\n",
|
||
" vals=np.array([3], dtype=np.int64)\n",
|
||
")\n",
|
||
"axes_slice_tensor = onnx.helper.make_tensor(\n",
|
||
" name=\"axes_slice_tensor\",\n",
|
||
" data_type=onnx.TensorProto.INT64,\n",
|
||
" dims=[1],\n",
|
||
" vals=np.array([1], dtype=np.int64)\n",
|
||
")\n",
|
||
"initializers = initializers + [starts_slice_tensor, ends_slice_tensor, axes_slice_tensor]\n",
|
||
"slice_node = onnx.helper.make_node(\n",
|
||
" \"Slice\",\n",
|
||
" inputs=[\"nms_indices\", \"starts_slice_tensor\", \"ends_slice_tensor\", \"axes_slice_tensor\"],\n",
|
||
" outputs=[\"sliced_indices\"],\n",
|
||
" name=\"slice_nms_indices\",\n",
|
||
")\n",
|
||
"nodes.append(slice_node)\n",
|
||
"\n",
|
||
"# Create Squeeze node to squeeze the sliced indices\n",
|
||
"squeezed_indices = onnx.helper.make_tensor_value_info(\"squeezed_indices\", onnx.TensorProto.INT64, shape=[\"detections\"])\n",
|
||
"outputs.append(squeezed_indices)\n",
|
||
"squeeze_slice_tensor = onnx.helper.make_tensor(\"squeeze_slice_axis\",onnx.TensorProto.INT64, [1], np.array([1], dtype=np.int64))\n",
|
||
"initializers.append(squeeze_slice_tensor)\n",
|
||
"squeeze_slice_node = onnx.helper.make_node(\n",
|
||
" \"Squeeze\",\n",
|
||
" inputs=[\"sliced_indices\", \"squeeze_slice_axis\"],\n",
|
||
" outputs=[\"squeezed_indices\"],\n",
|
||
" name=\"squeeze_sliced_indices\",\n",
|
||
")\n",
|
||
"nodes.append(squeeze_slice_node)\n",
|
||
"\n",
|
||
"# Create Squeeze node to squeeze the original output\n",
|
||
"squeezed_output = onnx.helper.make_tensor_value_info(\"squeezed_output\", onnx.TensorProto.FLOAT, shape=[25200, 16])\n",
|
||
"outputs.append(squeezed_output)\n",
|
||
"squeeze_tensor = onnx.helper.make_tensor(\"squeeze_axis\",onnx.TensorProto.INT64, [1], np.array([0], dtype=np.int64))\n",
|
||
"initializers.append(squeeze_tensor)\n",
|
||
"squeeze_node = onnx.helper.make_node(\n",
|
||
" \"Squeeze\",\n",
|
||
" inputs=[\"og_output\", \"squeeze_axis\"],\n",
|
||
" outputs=[\"squeezed_output\"],\n",
|
||
" name=\"squeeze_output\",\n",
|
||
")\n",
|
||
"nodes.append(squeeze_node)\n",
|
||
"\n",
|
||
"\n",
|
||
"# Create Gather node to gather the relevant NMS indices from the original output\n",
|
||
"postpro_output = onnx.helper.make_tensor_value_info(\"output\", onnx.TensorProto.FLOAT, shape=[\"detections\", 16])\n",
|
||
"outputs.append(postpro_output)\n",
|
||
"gather_node = onnx.helper.make_node(\n",
|
||
" \"Gather\",\n",
|
||
" inputs=[\"squeezed_output\", \"squeezed_indices\"],\n",
|
||
" outputs=[\"output\"],\n",
|
||
" name=\"gather_output\",\n",
|
||
")\n",
|
||
"nodes.append(gather_node)\n",
|
||
"\n",
|
||
"\n",
|
||
"# Make the new graph\n",
|
||
"graph = onnx.helper.make_graph(\n",
|
||
" nodes,\n",
|
||
" graph.name,\n",
|
||
" list(graph.input), \n",
|
||
" [postpro_output],\n",
|
||
" initializer=initializers,\n",
|
||
" value_info=graph.value_info,\n",
|
||
")\n",
|
||
"\n",
|
||
"# Create the new model\n",
|
||
"onnx_model_prepostpro = onnx.helper.make_model(\n",
|
||
" graph,\n",
|
||
" opset_imports=[onnx.helper.make_opsetid(\"\", onnx_opset)]\n",
|
||
")\n",
|
||
"\n",
|
||
"# Save the new model\n",
|
||
"onnx.checker.check_model(onnx_model_prepostpro)\n",
|
||
"onnx_model_prepostpro_path = onnx_model_export_path[:-5] + \"_prepostpro.onnx\"\n",
|
||
"onnx.save(onnx_model_prepostpro, onnx_model_prepostpro_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# image = Image.open(\"../data/people.jpeg\").convert('RGBA')\n",
|
||
"# image_onnx = np.array(image)\n",
|
||
"\n",
|
||
"# ort_session = ort.InferenceSession(\"yolov5s_face_prepostpro.onnx\")\n",
|
||
"# test = ort_session.run(None, {\"input\": image_onnx})\n",
|
||
"# test[0].shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Optimize model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# define path og model and sim model\n",
|
||
"onnx_model_sim_path = onnx_model_export_path[:-5] + f\"_opset{onnx_opset}_rgba_sim.onnx\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Simplify the model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"!onnxsim {onnx_model_prepostpro_path} {onnx_model_sim_path}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# !onnxsim yolov5s_face_prepostpro.onnx yolov5s_face_opset18_rgba_sim.onnx"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Optimize the graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"opt_sess_options = ort.SessionOptions()\n",
|
||
"\n",
|
||
"opt_sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL\n",
|
||
"opt_sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC\n",
|
||
"\n",
|
||
"onnx_model_opt_path = onnx_model_export_path[:-5] + f\"_opset{onnx_opset}_rgba_opt.onnx\"\n",
|
||
"opt_sess_options.optimized_model_filepath = onnx_model_opt_path\n",
|
||
"\n",
|
||
"opt_session = ort.InferenceSession(onnx_model_sim_path, opt_sess_options)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Prevent splits initializer issue\n",
|
||
"\n",
|
||
"For some weird reason the model can give issues on iOS when there's an initializer named \"splits\". \n",
|
||
"So to prevent that we check and rename any such initializer"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"current_model = onnx.load(onnx_model_opt_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def find_duplicates(name_list):\n",
|
||
" seen = set()\n",
|
||
" duplicates = set()\n",
|
||
" \n",
|
||
" for name in name_list:\n",
|
||
" if name in seen:\n",
|
||
" duplicates.add(name)\n",
|
||
" else:\n",
|
||
" seen.add(name)\n",
|
||
" \n",
|
||
" return list(duplicates)\n",
|
||
"\n",
|
||
"# Get the list of initializers\n",
|
||
"initializers = current_model.graph.initializer\n",
|
||
"init_names = [init.name for init in initializers]\n",
|
||
"\n",
|
||
"# If you want to store the initializers and their names in a dictionary\n",
|
||
"initializer_dict = {init.name: init for init in initializers}\n",
|
||
"init_names = [init.name for init in initializers]\n",
|
||
"\n",
|
||
"print(f\"splits initializer: \\n {initializer_dict[\"splits\"]}\")\n",
|
||
"\n",
|
||
"duplicate_names = find_duplicates(init_names)\n",
|
||
"\n",
|
||
"print(\"Duplicate names:\", duplicate_names)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def rename_initializer(model, old_name, new_name):\n",
|
||
" for initializer in model.graph.initializer:\n",
|
||
" if initializer.name == old_name:\n",
|
||
" initializer.name = new_name\n",
|
||
" break\n",
|
||
" \n",
|
||
" # Update any references to this initializer in the graph inputs\n",
|
||
" for input in model.graph.input:\n",
|
||
" if input.name == old_name:\n",
|
||
" input.name = new_name\n",
|
||
" \n",
|
||
" # Update references in nodes\n",
|
||
" for node in model.graph.node:\n",
|
||
" for i, input_name in enumerate(node.input):\n",
|
||
" if input_name == old_name:\n",
|
||
" node.input[i] = new_name"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"rename_initializer(current_model, \"splits\", \"splits_initializer_unique\")\n",
|
||
"\n",
|
||
"# Save the modified model\n",
|
||
"onnx_model_opt_with_splits_path = onnx_model_opt_path\n",
|
||
"onnx_model_opt_path = onnx_model_opt_path[:-5] + \"_nosplits.onnx\"\n",
|
||
"onnx.save(current_model, onnx_model_opt_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Add metadata to model\n",
|
||
"\n",
|
||
"https://onnx.ai/onnx/intro/python.html#opset-and-metadata"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_yolo_face_model = onnx.load(onnx_model_opt_path)\n",
|
||
"new_yolo_face_model.producer_name = \"EnteYOLOv5Face\"\n",
|
||
"new_yolo_face_model.doc_string = \"YOLOv5 Face detector with built-in pre- and post-processing. Accepts both RGB and RGBA raw bytes input (uint8) in HWC format. Outputs the relevant detections in the format (detections, 16) where the first 4 values are the bounding box coordinates, the fifth is the confidence score, and the rest are the landmarks.\"\n",
|
||
"new_yolo_face_model.graph.doc_string = \"\"\n",
|
||
"new_yolo_face_model.graph.name = \"SliceRGB+Resize+LetterBox+ToFloat+Unsqueeze+YOLOv5Face+NMS+Slice+Gather\"\n",
|
||
"onnx.save(new_yolo_face_model, onnx_model_opt_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"!rm {onnx_model_export_path}\n",
|
||
"!rm {onnx_model_rgba_path}\n",
|
||
"!rm {onnx_model_split_path}\n",
|
||
"!rm {onnx_model_nms_path}\n",
|
||
"!rm {onnx_model_prepostpro_path}\n",
|
||
"!rm {onnx_model_sim_path}\n",
|
||
"!rm {onnx_model_opt_with_splits_path}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Tune some settings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# from tqdm import tqdm\n",
|
||
"# import time"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# image = Image.open(\"../data/people.jpeg\").convert('RGBA')\n",
|
||
"# image_onnx = np.array(image)\n",
|
||
"# time_test_size = 500\n",
|
||
"\n",
|
||
"# sess_options1 = ort.SessionOptions()\n",
|
||
"# sess_options1.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED\n",
|
||
"# # sess_options.enable_profiling = True\n",
|
||
"# # sess_options.log_severity_level = 0 # Verbose\n",
|
||
"# sess_options1.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL\n",
|
||
"# ort_session1 = ort.InferenceSession(onnx_model_opt_path, sess_options1)\n",
|
||
"\n",
|
||
"# begin_time_1 = time.time()\n",
|
||
"# for i in tqdm(range(time_test_size)):\n",
|
||
"# _ = ort_session1.run(None, {\"input\": image_onnx})\n",
|
||
"# end_time_1 = time.time()\n",
|
||
"# time_1 = end_time_1 - begin_time_1\n",
|
||
"\n",
|
||
"\n",
|
||
"# sess_options2 = ort.SessionOptions()\n",
|
||
"# sess_options2.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED\n",
|
||
"# # sess_options.enable_profiling = True\n",
|
||
"# # sess_options.log_severity_level = 0 # Verbose\n",
|
||
"# sess_options2.inter_op_num_threads = 4\n",
|
||
"# # sess_options2.intra_op_num_threads = 4\n",
|
||
"# sess_options2.execution_mode = ort.ExecutionMode.ORT_PARALLEL\n",
|
||
"# ort_session2 = ort.InferenceSession(onnx_model_opt_path, sess_options2, providers=[\"CPUExecutionProvider\"])\n",
|
||
"\n",
|
||
"# begin_time_2 = time.time()\n",
|
||
"# for i in tqdm(range(time_test_size)):\n",
|
||
"# _ = ort_session2.run(None, {\"input\": image_onnx})\n",
|
||
"# end_time_2 = time.time()\n",
|
||
"# time_2 = end_time_2 - begin_time_2\n",
|
||
"\n",
|
||
"# print(f\"Time for first execution: {time_1}\")\n",
|
||
"# print(f\"Time for second execution: {time_2}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"So lessons:\n",
|
||
"1. Use sequential execution\n",
|
||
"2. Use extended optimizations\n",
|
||
"3. Number of inter op doesn't have significant impact\n",
|
||
"4. Number of intra op doesn't have significant impact"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"One final test:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"image = Image.open(\"../data/man.jpeg\").convert('RGBA')\n",
|
||
"imageWidth, imageHeight = image.size\n",
|
||
"inputWidth, inputHeight = 640, 640\n",
|
||
"print(imageWidth, imageHeight)\n",
|
||
"image_onnx = np.array(image)\n",
|
||
"\n",
|
||
"sess_options1 = ort.SessionOptions()\n",
|
||
"sess_options1.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED\n",
|
||
"# sess_options.enable_profiling = True\n",
|
||
"# sess_options.log_severity_level = 0 # Verbose\n",
|
||
"# sess_options1.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL\n",
|
||
"ort_session = ort.InferenceSession(onnx_model_opt_path)\n",
|
||
"raw_detection = ort_session.run(None, {\"input\": image_onnx})[0][0]\n",
|
||
"print(raw_detection.shape)\n",
|
||
"raw_detection"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from PIL import Image, ImageDraw\n",
|
||
"from IPython.display import display\n",
|
||
"\n",
|
||
"def display_face_detection(image_path, face_box, landmarks):\n",
|
||
" # Open the image\n",
|
||
" img = Image.open(image_path)\n",
|
||
" \n",
|
||
" # Create a draw object\n",
|
||
" draw = ImageDraw.Draw(img)\n",
|
||
" \n",
|
||
" # Draw the bounding box\n",
|
||
" draw.rectangle(face_box, outline=\"red\", width=2)\n",
|
||
" \n",
|
||
" # Draw the landmark points\n",
|
||
" for point in landmarks:\n",
|
||
" x, y = point\n",
|
||
" radius = 3\n",
|
||
" draw.ellipse([x-radius, y-radius, x+radius, y+radius], fill=\"blue\")\n",
|
||
" \n",
|
||
" # Display the image\n",
|
||
" display(img)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def correct_detection_and_display(image_path, raw_detection, imageWidth, imageHeight, inputWidth, inputHeight):\n",
|
||
"\n",
|
||
" # Create the raw relative bounding box and landmarks\n",
|
||
" box = [0, 0, 0, 0]\n",
|
||
" box[0] = (raw_detection[0] - raw_detection[2] / 2) / inputWidth\n",
|
||
" box[1] = (raw_detection[1] - raw_detection[3] / 2) / inputHeight\n",
|
||
" box[2] = (raw_detection[0] + raw_detection[2] / 2) / inputWidth\n",
|
||
" box[3] = (raw_detection[1] + raw_detection[3] / 2) / inputHeight\n",
|
||
" landmarks = [(0, 0), (0, 0), (0, 0), (0, 0), (0, 0)]\n",
|
||
" i = 0\n",
|
||
" for x, y in zip(raw_detection[5:15:2], raw_detection[6:15:2]):\n",
|
||
" landmarks[i] = (x / inputWidth, y / inputHeight)\n",
|
||
" i += 1\n",
|
||
"\n",
|
||
" # Correct the bounding box and landmarks for letterboxing during preprocessing\n",
|
||
" scale = min(inputWidth / imageWidth, inputHeight / imageHeight)\n",
|
||
" scaledWidth = round(imageWidth * scale)\n",
|
||
" scaledHeight = round(imageHeight * scale)\n",
|
||
" print(f\"scaledWidth: {scaledWidth}, scaledHeight: {scaledHeight}\")\n",
|
||
"\n",
|
||
" halveDiffX = (inputWidth - scaledWidth) / 2\n",
|
||
" halveDiffY = (inputHeight - scaledHeight) / 2\n",
|
||
" print(f\"halveDiffX: {halveDiffX}, halveDiffY: {halveDiffY}\")\n",
|
||
" scaleX = inputHeight / scaledWidth\n",
|
||
" scaleY = inputHeight / scaledHeight\n",
|
||
" translateX = - halveDiffX / inputWidth\n",
|
||
" translateY = - halveDiffY / inputHeight\n",
|
||
" print(f\"scaleX: {scaleX}, scaleY: {scaleY}\")\n",
|
||
" print(f\"translateX: {translateX}, translateY: {translateY}\")\n",
|
||
"\n",
|
||
" box[0] = (box[0] + translateX) * scaleX\n",
|
||
" box[1] = (box[1] + translateY) * scaleY\n",
|
||
" box[2] = (box[2] + translateX) * scaleX\n",
|
||
" box[3] = (box[3] + translateY) * scaleY\n",
|
||
"\n",
|
||
" for i in range(5):\n",
|
||
" landmarks[i] = ((landmarks[i][0] + translateX) * scaleX, (landmarks[i][1] + translateY) * scaleY)\n",
|
||
"\n",
|
||
" # Convert the bounding box and landmarks to absolute values\n",
|
||
" box = [box[0] * imageWidth, box[1] * imageHeight, box[2] * imageWidth, box[3] * imageHeight]\n",
|
||
" landmarks = [(x * imageWidth, y * imageHeight) for x, y in landmarks]\n",
|
||
"\n",
|
||
" print(\"Bounding box:\", box)\n",
|
||
" print(\"Landmarks:\", landmarks)\n",
|
||
"\n",
|
||
" display_face_detection(image_path, box, landmarks)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"image_path = \"../data/man.jpeg\"\n",
|
||
"# face_box = (50, 10, 100, 100) # (left, top, right, bottom)\n",
|
||
"# landmarks = [\n",
|
||
"# (30, 30), # Left eye\n",
|
||
"# (80, 30), # Right eye\n",
|
||
"# (55, 50), # Nose\n",
|
||
"# (35, 80), # Left mouth corner\n",
|
||
"# (75, 80) # Right mouth corner\n",
|
||
"# ]\n",
|
||
"\n",
|
||
"correct_detection_and_display(image_path, raw_detection, imageWidth, imageHeight, inputWidth, inputHeight)\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "ente_clip",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|