Skip to content

Commit 3623705

Browse files
authored
Add yolo v8 (#71)
1 parent 03a6cee commit 3623705

19 files changed

+550
-67
lines changed

.github/workflows/test_accuracy.yml

+2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ jobs:
2929
run: |
3030
source venv/bin/activate
3131
pytest --data=./data tests/python/accuracy/test_accuracy.py
32+
DATA=data pytest --data=./data tests/python/accuracy/test_YOLOv8.py
3233
- name: Install CPP ependencies
3334
run: |
3435
sudo bash model_api/cpp/install_dependencies.sh
@@ -40,3 +41,4 @@ jobs:
4041
- name: Run CPP Test
4142
run: |
4243
build/test_accuracy -d data -p tests/python/accuracy/public_scope.json
44+
DATA=data build/test_YOLOv8

docs/model-configuration.md

+3
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ The list features only model wrappers which intoduce new configuration values in
4949
###### `YoloV4`
5050
1. `anchors`: List - list of custom anchor values
5151
1. `masks`: List - list of mask, applied to anchors for each output layer
52+
###### `YOLOv5`, `YOLOv8`
53+
1. `agnostic_nms`: bool - if True, the model is agnostic to the number of classes, and all classes are considered as one
54+
1. `iou_threshold`: float - threshold for non-maximum suppression (NMS) intersection over union (IOU) filtering
5255
###### `YOLOX`
5356
1. `iou_threshold`: float - threshold for non-maximum suppression (NMS) intersection over union (IOU) filtering
5457
#### `HpeAssociativeEmbedding`

model_api/cpp/models/include/models/detection_model_yolo.h

+21
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,24 @@ class ModelYolo : public DetectionModelExt {
8383
std::vector<int64_t> presetMasks;
8484
ov::Layout yoloRegionLayout = "NCHW";
8585
};
86+
87+
class YOLOv5 : public DetectionModelExt {
88+
// Reimplementation of ultralytics.YOLO
89+
void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
90+
void updateModelInfo() override;
91+
void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority);
92+
bool agnostic_nms = false;
93+
public:
94+
YOLOv5(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration);
95+
YOLOv5(std::shared_ptr<InferenceAdapter>& adapter);
96+
std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
97+
static std::string ModelType;
98+
};
99+
100+
class YOLOv8 : public YOLOv5 {
101+
public:
102+
// YOLOv5 and YOLOv8 are identical in terms of inference
103+
YOLOv8(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration) : YOLOv5{model, configuration} {}
104+
YOLOv8(std::shared_ptr<InferenceAdapter>& adapter) : YOLOv5{adapter} {}
105+
static std::string ModelType;
106+
};

model_api/cpp/models/src/detection_model.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ std::unique_ptr<DetectionModel> DetectionModel::create_model(const std::string&
9191
detectionModel = std::unique_ptr<DetectionModel>(new ModelYoloX(model, configuration));
9292
} else if (model_type == ModelCenterNet::ModelType) {
9393
detectionModel = std::unique_ptr<DetectionModel>(new ModelCenterNet(model, configuration));
94+
} else if (model_type == YOLOv5::ModelType) {
95+
detectionModel = std::unique_ptr<DetectionModel>(new YOLOv5(model, configuration));
96+
} else if (model_type == YOLOv8::ModelType) {
97+
detectionModel = std::unique_ptr<DetectionModel>(new YOLOv8(model, configuration));
9498
} else {
9599
throw std::runtime_error("Incorrect or unsupported model_type is provided in the model_info section: " + model_type);
96100
}

model_api/cpp/models/src/detection_model_faceboxes.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ std::unique_ptr<ResultBase> ModelFaceBoxes::postprocess(InferenceResult& infResu
243243
std::vector<Anchor> boxes = filterBoxes(boxesTensor, anchors, scores.first, variance);
244244

245245
// Apply Non-maximum Suppression
246-
const std::vector<int> keep = nms(boxes, scores.second, iou_threshold);
246+
const std::vector<size_t>& keep = nms(boxes, scores.second, iou_threshold);
247247

248248
// Create detection result objects
249249
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);

model_api/cpp/models/src/detection_model_ssd.cpp

+10-8
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,13 @@ std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& i
161161
0.f,
162162
floatInputImgHeight);
163163
desc.width = clamp(
164-
round((detections[i * numAndStep.objectSize + 5] * netInputWidth - padLeft) * invertedScaleX - desc.x),
164+
round((detections[i * numAndStep.objectSize + 5] * netInputWidth - padLeft) * invertedScaleX),
165165
0.f,
166-
floatInputImgWidth);
166+
floatInputImgWidth) - desc.x;
167167
desc.height = clamp(
168-
round((detections[i * numAndStep.objectSize + 6] * netInputHeight - padTop) * invertedScaleY - desc.y),
169-
0.f, floatInputImgHeight);
168+
round((detections[i * numAndStep.objectSize + 6] * netInputHeight - padTop) * invertedScaleY),
169+
0.f,
170+
floatInputImgHeight) - desc.y;
170171
result->objects.push_back(desc);
171172
}
172173
}
@@ -222,12 +223,13 @@ std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult
222223
0.f,
223224
floatInputImgHeight);
224225
desc.width = clamp(
225-
round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX - desc.x),
226+
round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX),
226227
0.f,
227-
floatInputImgWidth);
228+
floatInputImgWidth) - desc.x;
228229
desc.height = clamp(
229-
round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY - desc.y),
230-
0.f, floatInputImgHeight);
230+
round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY),
231+
0.f,
232+
floatInputImgHeight) - desc.y;
231233
result->objects.push_back(desc);
232234
}
233235
}

model_api/cpp/models/src/detection_model_yolo.cpp

+169
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <openvino/openvino.hpp>
2828

2929
#include <utils/common.hpp>
30+
#include <utils/nms.hpp>
3031
#include <utils/slog.hpp>
3132

3233
#include "models/internal_model_data.h"
@@ -504,3 +505,171 @@ ModelYolo::Region::Region(size_t classes,
504505
num = anchors.size() / 2;
505506
}
506507
}
508+
509+
std::string YOLOv5::ModelType = "YOLOv5";
510+
511+
void YOLOv5::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
512+
const ov::Output<ov::Node>& input = model->input();
513+
const ov::Shape& in_shape = input.get_partial_shape().get_max_shape();
514+
if (in_shape.size() != 4) {
515+
throw std::runtime_error("YOLO: the rank of the input must be 4");
516+
}
517+
inputNames.push_back(input.get_any_name());
518+
const ov::Layout& inputLayout = getInputLayout(input);
519+
if (!embedded_processing) {
520+
model = ImageModel::embedProcessing(model,
521+
inputNames[0],
522+
inputLayout,
523+
resizeMode,
524+
interpolationMode,
525+
ov::Shape{
526+
in_shape[ov::layout::width_idx(inputLayout)],
527+
in_shape[ov::layout::height_idx(inputLayout)]
528+
},
529+
pad_value,
530+
reverse_input_channels,
531+
{},
532+
scale_values);
533+
534+
netInputWidth = in_shape[ov::layout::width_idx(inputLayout)];
535+
netInputHeight = in_shape[ov::layout::height_idx(inputLayout)];
536+
537+
embedded_processing = true;
538+
}
539+
540+
const ov::Output<const ov::Node>& output = model->output();
541+
if (ov::element::Type_t::f32 != output.get_element_type()) {
542+
throw std::runtime_error("YOLO: the output must be of precision f32");
543+
}
544+
const ov::Shape& out_shape = output.get_partial_shape().get_max_shape();
545+
if (3 != out_shape.size()) {
546+
throw std::runtime_error("YOLO: the output must be of rank 3");
547+
}
548+
if (!labels.empty() && labels.size() + 4 != out_shape[1]) {
549+
throw std::runtime_error("YOLO: number of labels must be smaller than out_shape[1] by 4");
550+
}
551+
}
552+
553+
void YOLOv5::updateModelInfo() {
554+
DetectionModelExt::updateModelInfo();
555+
model->set_rt_info(YOLOv5::ModelType, "model_info", "model_type");
556+
model->set_rt_info(agnostic_nms, "model_info", "agnostic_nms");
557+
model->set_rt_info(iou_threshold, "model_info", "iou_threshold");
558+
}
559+
560+
void YOLOv5::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) {
561+
pad_value = get_from_any_maps("pad_value", top_priority, mid_priority, 114);
562+
if (top_priority.find("resize_type") == top_priority.end() && mid_priority.find("resize_type") == mid_priority.end()) {
563+
interpolationMode = cv::INTER_LINEAR;
564+
resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
565+
}
566+
reverse_input_channels = get_from_any_maps("reverse_input_channels", top_priority, mid_priority, true);
567+
scale_values = get_from_any_maps("scale_values", top_priority, mid_priority, std::vector<float>{255.0f});
568+
confidence_threshold = get_from_any_maps("confidence_threshold", top_priority, mid_priority, 0.25f);
569+
agnostic_nms = get_from_any_maps("agnostic_nms", top_priority, mid_priority, agnostic_nms);
570+
iou_threshold = get_from_any_maps("iou_threshold", top_priority, mid_priority, 0.7f);
571+
}
572+
573+
YOLOv5::YOLOv5(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration)
574+
: DetectionModelExt(model, configuration) {
575+
init_from_config(configuration, model->get_rt_info<ov::AnyMap>("model_info"));
576+
}
577+
578+
YOLOv5::YOLOv5(std::shared_ptr<InferenceAdapter>& adapter)
579+
: DetectionModelExt(adapter) {
580+
init_from_config(adapter->getModelConfig(), ov::AnyMap{});
581+
}
582+
583+
std::unique_ptr<ResultBase> YOLOv5::postprocess(InferenceResult& infResult) {
584+
if (1 != infResult.outputsData.size()) {
585+
throw std::runtime_error("YOLO: expect 1 output");
586+
}
587+
const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor();
588+
const ov::Shape& out_shape = detectionsTensor.get_shape();
589+
if (3 != out_shape.size()) {
590+
throw std::runtime_error("YOLO: the output must be of rank 3");
591+
}
592+
if (1 != out_shape[0]) {
593+
throw std::runtime_error("YOLO: the first dim of the output must be 1");
594+
}
595+
size_t num_proposals = out_shape[2];
596+
std::vector<Anchor> boxes;
597+
std::vector<float> confidences;
598+
std::vector<size_t> labelIDs;
599+
const float* const detections = detectionsTensor.data<float>();
600+
for (size_t i = 0; i < num_proposals; ++i) {
601+
float confidence = 0.0f;
602+
size_t max_id = 0;
603+
constexpr size_t LABELS_START = 4;
604+
for (size_t j = LABELS_START; j < out_shape[1]; ++j) {
605+
if (detections[j * num_proposals + i] > confidence) {
606+
confidence = detections[j * num_proposals + i];
607+
max_id = j;
608+
}
609+
}
610+
if (confidence > confidence_threshold) {
611+
boxes.push_back(Anchor{
612+
detections[0 * num_proposals + i] - detections[2 * num_proposals + i] / 2.0f,
613+
detections[1 * num_proposals + i] - detections[3 * num_proposals + i] / 2.0f,
614+
detections[0 * num_proposals + i] + detections[2 * num_proposals + i] / 2.0f,
615+
detections[1 * num_proposals + i] + detections[3 * num_proposals + i] / 2.0f,
616+
});
617+
confidences.push_back(confidence);
618+
labelIDs.push_back(max_id - LABELS_START);
619+
}
620+
}
621+
constexpr bool includeBoundaries = false;
622+
constexpr size_t keep_top_k = 30000;
623+
std::vector<size_t> keep;
624+
if (agnostic_nms) {
625+
keep = nms(boxes, confidences, iou_threshold, includeBoundaries, keep_top_k);
626+
} else {
627+
std::vector<AnchorLabeled> boxes_with_class;
628+
boxes_with_class.reserve(boxes.size());
629+
for (size_t i = 0; i < boxes.size(); ++i) {
630+
boxes_with_class.emplace_back(boxes[i], int(labelIDs[i]));
631+
}
632+
keep = multiclass_nms(boxes_with_class, confidences, iou_threshold, includeBoundaries, keep_top_k);
633+
}
634+
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
635+
auto base = std::unique_ptr<ResultBase>(result);
636+
const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
637+
float floatInputImgWidth = float(internalData.inputImgWidth),
638+
floatInputImgHeight = float(internalData.inputImgHeight);
639+
float invertedScaleX = floatInputImgWidth / netInputWidth,
640+
invertedScaleY = floatInputImgHeight / netInputHeight;
641+
int padLeft = 0, padTop = 0;
642+
if (RESIZE_KEEP_ASPECT == resizeMode || RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
643+
invertedScaleX = invertedScaleY = std::max(invertedScaleX, invertedScaleY);
644+
if (RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
645+
padLeft = (netInputWidth - int(std::round(floatInputImgWidth / invertedScaleX))) / 2;
646+
padTop = (netInputHeight - int(std::round(floatInputImgHeight / invertedScaleY))) / 2;
647+
}
648+
}
649+
for (size_t idx : keep) {
650+
DetectedObject desc;
651+
desc.x = clamp(
652+
round((boxes[idx].left - padLeft) * invertedScaleX),
653+
0.f,
654+
floatInputImgWidth);
655+
desc.y = clamp(
656+
round((boxes[idx].top - padTop) * invertedScaleY),
657+
0.f,
658+
floatInputImgHeight);
659+
desc.width = clamp(
660+
round((boxes[idx].right - padLeft) * invertedScaleX),
661+
0.f,
662+
floatInputImgWidth) - desc.x;
663+
desc.height = clamp(
664+
round((boxes[idx].bottom - padTop) * invertedScaleY),
665+
0.f,
666+
floatInputImgHeight) - desc.y;
667+
desc.confidence = confidences[idx];
668+
desc.labelID = static_cast<size_t>(labelIDs[idx]);
669+
desc.label = getLabelName(desc.labelID);
670+
result->objects.push_back(desc);
671+
}
672+
return base;
673+
}
674+
675+
std::string YOLOv8::ModelType = "YOLOv8";

model_api/cpp/models/src/detection_model_yolox.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ std::unique_ptr<ResultBase> ModelYoloX::postprocess(InferenceResult& infResult)
190190
}
191191

192192
// NMS for valid boxes
193-
std::vector<int> keep = nms(validBoxes, scores, iou_threshold, true);
194-
for (auto& index: keep) {
193+
const std::vector<size_t>& keep = nms(validBoxes, scores, iou_threshold, true);
194+
for (size_t index: keep) {
195195
// Create new detected box
196196
DetectedObject obj;
197197
obj.x = clamp(validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));

model_api/cpp/utils/include/utils/nms.hpp

+14-15
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,13 @@ struct AnchorLabeled : public Anchor {
5050
AnchorLabeled() = default;
5151
AnchorLabeled(float _left, float _top, float _right, float _bottom, int _labelID) :
5252
Anchor(_left, _top, _right, _bottom), labelID(_labelID) {}
53+
AnchorLabeled(const Anchor& coords, int labelID) : Anchor{coords}, labelID{labelID} {}
5354
};
5455

5556
template <typename Anchor>
56-
std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>& scores,
57-
const float thresh, bool includeBoundaries=false, size_t maxNum=0) {
58-
if (maxNum == 0) {
59-
maxNum = boxes.size();
57+
std::vector<size_t> nms(const std::vector<Anchor>& boxes, const std::vector<float>& scores, const float thresh, bool includeBoundaries=false, size_t keep_top_k=0) {
58+
if (keep_top_k == 0) {
59+
keep_top_k = boxes.size();
6060
}
6161
std::vector<float> areas(boxes.size());
6262
for (size_t i = 0; i < boxes.size(); ++i) {
@@ -67,25 +67,24 @@ std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>&
6767
std::sort(order.begin(), order.end(), [&scores](int o1, int o2) { return scores[o1] > scores[o2]; });
6868

6969
size_t ordersNum = 0;
70-
for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0 && ordersNum < maxNum; ordersNum++);
70+
for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0 && ordersNum < keep_top_k; ordersNum++);
7171

72-
std::vector<int> keep;
72+
std::vector<size_t> keep;
7373
bool shouldContinue = true;
7474
for (size_t i = 0; shouldContinue && i < ordersNum; ++i) {
75-
auto idx1 = order[i];
75+
int idx1 = order[i];
7676
if (idx1 >= 0) {
7777
keep.push_back(idx1);
7878
shouldContinue = false;
7979
for (size_t j = i + 1; j < ordersNum; ++j) {
80-
auto idx2 = order[j];
80+
int idx2 = order[j];
8181
if (idx2 >= 0) {
8282
shouldContinue = true;
83-
auto overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - std::fmaxf(boxes[idx1].left, boxes[idx2].left);
84-
auto overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - std::fmaxf(boxes[idx1].top, boxes[idx2].top);
85-
auto intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0;
86-
auto overlap = intersection / (areas[idx1] + areas[idx2] - intersection);
87-
88-
if (overlap >= thresh) {
83+
float overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - std::fmaxf(boxes[idx1].left, boxes[idx2].left);
84+
float overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - std::fmaxf(boxes[idx1].top, boxes[idx2].top);
85+
float intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0;
86+
float union_area = areas[idx1] + areas[idx2] - intersection;
87+
if (0.0f == union_area || intersection / union_area > thresh) {
8988
order[j] = -1;
9089
}
9190
}
@@ -95,5 +94,5 @@ std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>&
9594
return keep;
9695
}
9796

98-
std::vector<int> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
97+
std::vector<size_t> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
9998
const float iou_threshold=0.45f, bool includeBoundaries=false, size_t maxNum=200);

model_api/cpp/utils/src/nms.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#include "utils/nms.hpp"
2020

2121

22-
std::vector<int> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
22+
std::vector<size_t> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
2323
const float iou_threshold, bool includeBoundaries, size_t maxNum) {
2424
std::vector<Anchor> boxes_copy;
2525
boxes_copy.reserve(boxes.size());

model_api/python/openvino/model_api/models/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
add_rotated_rects,
5757
get_contours,
5858
)
59-
from .yolo import YOLO, YOLOF, YOLOX, YoloV3ONNX, YoloV4
59+
from .yolo import YOLO, YOLOF, YOLOX, YoloV3ONNX, YoloV4, YOLOv5, YOLOv8
6060

6161
classification_models = [
6262
"resnet-18-pytorch",
@@ -118,6 +118,8 @@
118118
"YOLO",
119119
"YoloV3ONNX",
120120
"YoloV4",
121+
"YOLOv5",
122+
"YOLOv8",
121123
"YOLOF",
122124
"YOLOX",
123125
"ClassificationResult",

0 commit comments

Comments
 (0)