summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp')
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp188
1 files changed, 188 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp b/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp
new file mode 100644
index 0000000..132eb9e
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp
@@ -0,0 +1,188 @@
+/*
+// Copyright (C) 2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolov3_onnx.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+#include "utils/image_utils.h"
+
+ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName,
+ float confidenceThreshold,
+ const std::vector<std::string>& labels,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {
+ interpolationMode = cv::INTER_CUBIC;
+ resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
+ }
+
+
+void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare inputs ------------------------------------------------------
+ const ov::OutputVector& inputs = model->inputs();
+ if (inputs.size() != 2) {
+ throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ inputsNames.reserve(inputs.size());
+ for (auto& input : inputs) {
+ const ov::Shape& currentShape = input.get_shape();
+ std::string currentName = input.get_any_name();
+ const ov::Layout& currentLayout = getInputLayout(input);
+
+ if (currentShape.size() == 4) {
+ if (currentShape[ov::layout::channels_idx(currentLayout)] != 3) {
+ throw std::logic_error("Expected 4D image input with 3 channels");
+ }
+ inputsNames[0] = currentName;
+ netInputWidth = currentShape[ov::layout::width_idx(currentLayout)];
+ netInputHeight = currentShape[ov::layout::height_idx(currentLayout)];
+ ppp.input(currentName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+ } else if (currentShape.size() == 2) {
+ if (currentShape[ov::layout::channels_idx(currentLayout)] != 2) {
+ throw std::logic_error("Expected 2D image info input with 2 channels");
+ }
+ inputsNames[1] = currentName;
+ ppp.input(currentName).tensor().set_element_type(ov::element::i32);
+ }
+ ppp.input(currentName).model().set_layout(currentLayout);
+ }
+
+ // --------------------------- Prepare outputs -----------------------------------------------------
+ const ov::OutputVector& outputs = model->outputs();
+ if (outputs.size() != 3) {
+ throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs");
+ }
+
+ for (auto& output : outputs) {
+ const ov::Shape& currentShape = output.get_partial_shape().get_max_shape();
+ std::string currentName = output.get_any_name();
+ if (currentShape.back() == 3) {
+ indicesOutputName = currentName;
+ ppp.output(currentName).tensor().set_element_type(ov::element::i32);
+ } else if (currentShape[2] == 4) {
+ boxesOutputName = currentName;
+ ppp.output(currentName).tensor().set_element_type(ov::element::f32);
+ } else if (currentShape[1] == numberOfClasses) {
+ scoresOutputName = currentName;
+ ppp.output(currentName).tensor().set_element_type(ov::element::f32);
+ } else {
+ throw std::logic_error("Expected shapes [:,:,4], [:,"
+ + std::to_string(numberOfClasses) + ",:] and [:,3] for outputs");
+ }
+ outputsNames.push_back(currentName);
+ }
+ model = ppp.build();
+}
+
+std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData& inputData,
+ ov::InferRequest& request) {
+ const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+
+ cv::Mat info(cv::Size(1, 2), CV_32SC1);
+ info.at<int>(0, 0) = origImg.rows;
+ info.at<int>(0, 1) = origImg.cols;
+ auto allocator = std::make_shared<SharedTensorAllocator>(info);
+ ov::Tensor infoInput = ov::Tensor(ov::element::i32, ov::Shape({1, 2}), ov::Allocator(allocator));
+
+ request.set_tensor(inputsNames[1], infoInput);
+
+ return ImageModel::preprocess(inputData, request);
+}
+
+namespace {
+float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) {
+ const float* scoresPtr = scoresTensor.data<float>();
+ const auto shape = scoresTensor.get_shape();
+ int N = shape[2];
+
+ return scoresPtr[classInd * N + boxInd];
+}
+}
+
+std::unique_ptr<ResultBase> ModelYoloV3ONNX::postprocess(InferenceResult& infResult) {
+ // Get info about input image
+ const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+ const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+
+ // Get outputs tensors
+ const ov::Tensor& boxes = infResult.outputsData[boxesOutputName];
+ const float* boxesPtr = boxes.data<float>();
+
+ const ov::Tensor& scores = infResult.outputsData[scoresOutputName];
+ const ov::Tensor& indices = infResult.outputsData[indicesOutputName];
+
+ const int* indicesData = indices.data<int>();
+ const auto indicesShape = indices.get_shape();
+ const auto boxShape = boxes.get_shape();
+
+ // Generate detection results
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+ size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0];
+ int indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1];
+
+ for (size_t i = 0; i < numberOfBoxes; ++i) {
+ int batchInd = indicesData[i * indicesStride];
+ int classInd = indicesData[i * indicesStride + 1];
+ int boxInd = indicesData[i * indicesStride + 2];
+
+ if (batchInd == -1) {
+ break;
+ }
+
+ float score = getScore(scores, classInd, boxInd);
+
+ if (score > confidenceThreshold) {
+ DetectedObject obj;
+ size_t startPos = boxShape[2] * boxInd;
+
+ auto x = boxesPtr[startPos + 1];
+ auto y = boxesPtr[startPos];
+ auto width = boxesPtr[startPos + 3] - x;
+ auto height = boxesPtr[startPos + 2] - y;
+
+ // Create new detected box
+ obj.x = clamp(x, 0.f, static_cast<float>(imgWidth));
+ obj.y = clamp(y, 0.f, static_cast<float>(imgHeight));
+ obj.height = clamp(height, 0.f, static_cast<float>(imgHeight));
+ obj.width = clamp(width, 0.f, static_cast<float>(imgWidth));
+ obj.confidence = score;
+ obj.labelID = classInd;
+ obj.label = getLabelName(classInd);
+
+ result->objects.push_back(obj);
+
+ }
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}