|
7 | 7 | "source": [
|
8 | 8 | "# Hello Object Detection\n",
|
9 | 9 | "\n",
|
10 |
| - "A very basic introduction to using object detection models with OpenVINO.\n", |
| 10 | + "A very basic introduction to using object detection models with OpenVINO™.\n", |
11 | 11 | "\n",
|
12 |
| - "We use the [horizontal-text-detection-0001](https://docs.openvino.ai/latest/omz_models_model_horizontal_text_detection_0001.html) model from the [Open Model Zoo](https://github.com/openvinotoolkit/open_model_zoo/). It detects horizontal text in images and returns a blob of data in the shape of `[100, 5]`. Each detected text box is stored in the format `[x_min, y_min, x_max, y_max, conf]`, where\n", |
| 12 | + "The [horizontal-text-detection-0001](https://docs.openvino.ai/latest/omz_models_model_horizontal_text_detection_0001.html) model from [Open Model Zoo](https://github.com/openvinotoolkit/open_model_zoo/) is used. It detects horizontal text in images and returns a blob of data in the shape of `[100, 5]`. Each detected text box is stored in the `[x_min, y_min, x_max, y_max, conf]` format, where the\n", |
13 | 13 | "`(x_min, y_min)` are the coordinates of the top left bounding box corner, `(x_max, y_max)` are the coordinates of the bottom right bounding box corner and `conf` is the confidence for the predicted class."
|
14 | 14 | ]
|
15 | 15 | },
|
|
73 | 73 | "metadata": {},
|
74 | 74 | "outputs": [],
|
75 | 75 | "source": [
|
76 |
| - "# Text detection models expects image in BGR format\n", |
| 76 | + "# Text detection models expect an image in BGR format.\n", |
77 | 77 | "image = cv2.imread(\"data/intel_rnb.jpg\")\n",
|
78 | 78 | "\n",
|
79 |
| - "# N,C,H,W = batch size, number of channels, height, width\n", |
| 79 | + "# N,C,H,W = batch size, number of channels, height, width.\n", |
80 | 80 | "N, C, H, W = input_layer_ir.shape\n",
|
81 | 81 | "\n",
|
82 |
| - "# Resize image to meet network expected input sizes\n", |
| 82 | + "# Resize the image to meet network expected input sizes.\n", |
83 | 83 | "resized_image = cv2.resize(image, (W, H))\n",
|
84 | 84 | "\n",
|
85 |
| - "# Reshape to network input shape\n", |
| 85 | + "# Reshape to the network input shape.\n", |
86 | 86 | "input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)\n",
|
87 | 87 | "\n",
|
88 | 88 | "plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));"
|
|
103 | 103 | "metadata": {},
|
104 | 104 | "outputs": [],
|
105 | 105 | "source": [
|
106 |
| - "# Create inference request\n", |
| 106 | + "# Create an inference request.\n", |
107 | 107 | "boxes = compiled_model([input_image])[output_layer_ir]\n",
|
108 | 108 | "\n",
|
109 |
| - "# Remove zero only boxes\n", |
| 109 | + "# Remove zero only boxes.\n", |
110 | 110 | "boxes = boxes[~np.all(boxes == 0, axis=1)]"
|
111 | 111 | ]
|
112 | 112 | },
|
|
125 | 125 | "metadata": {},
|
126 | 126 | "outputs": [],
|
127 | 127 | "source": [
|
128 |
| - "# For each detection, the description has the format: [x_min, y_min, x_max, y_max, conf]\n", |
129 |
| - "# Image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib we use cvtColor function\n", |
| 128 | + "# For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:\n", |
| 129 | + "# The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function\n", |
130 | 130 | "def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):\n",
|
131 |
| - " # Define colors for boxes and descriptions\n", |
| 131 | + " # Define colors for boxes and descriptions.\n", |
132 | 132 | " colors = {\"red\": (255, 0, 0), \"green\": (0, 255, 0)}\n",
|
133 | 133 | "\n",
|
134 |
| - " # Fetch image shapes to calculate ratio\n", |
| 134 | + " # Fetch the image shapes to calculate a ratio.\n", |
135 | 135 | " (real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2]\n",
|
136 | 136 | " ratio_x, ratio_y = real_x / resized_x, real_y / resized_y\n",
|
137 | 137 | "\n",
|
138 |
| - " # Convert base image from bgr to rgb format\n", |
| 138 | + " # Convert the base image from BGR to RGB format.\n", |
139 | 139 | " rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)\n",
|
140 | 140 | "\n",
|
141 |
| - " # Iterate through non-zero boxes\n", |
| 141 | + " # Iterate through non-zero boxes.\n", |
142 | 142 | " for box in boxes:\n",
|
143 |
| - " # Pick confidence factor from last place in array\n", |
| 143 | + " # Pick a confidence factor from the last place in an array.\n", |
144 | 144 | " conf = box[-1]\n",
|
145 | 145 | " if conf > threshold:\n",
|
146 |
| - " # Convert float to int and multiply corner position of each box by x and y ratio\n", |
147 |
| - " # In case that bounding box is found at the top of the image, \n", |
148 |
| - " # we position upper box bar little lower to make it visible on image \n", |
| 146 | + " # Convert float to int and multiply corner position of each box by x and y ratio.\n", |
| 147 | + " # If the bounding box is found at the top of the image, \n", |
| 148 | + " # position the upper box bar little lower to make it visible on the image. \n", |
149 | 149 | " (x_min, y_min, x_max, y_max) = [\n",
|
150 | 150 | " int(max(corner_position * ratio_y, 10)) if idx % 2 \n",
|
151 | 151 | " else int(corner_position * ratio_x)\n",
|
152 | 152 | " for idx, corner_position in enumerate(box[:-1])\n",
|
153 | 153 | " ]\n",
|
154 | 154 | "\n",
|
155 |
| - " # Draw box based on position, parameters in rectangle function are: image, start_point, end_point, color, thickness\n", |
| 155 | + " # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.\n", |
156 | 156 | " rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors[\"green\"], 3)\n",
|
157 | 157 | "\n",
|
158 |
| - " # Add text to image based on position and confidence\n", |
159 |
| - " # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type\n", |
| 158 | + " # Add text to the image based on position and confidence.\n", |
| 159 | + " # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.\n", |
160 | 160 | " if conf_labels:\n",
|
161 | 161 | " rgb_image = cv2.putText(\n",
|
162 | 162 | " rgb_image,\n",
|
|
0 commit comments