summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/common/models/src/detection_model_ssd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/runtime/common/models/src/detection_model_ssd.cpp')
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_ssd.cpp281
1 files changed, 281 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/models/src/detection_model_ssd.cpp b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp
new file mode 100644
index 0000000..ef741ee
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp
@@ -0,0 +1,281 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_ssd.h"
+
+#include <algorithm>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+struct InputData;
+
+ModelSSD::ModelSSD(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ const std::vector<std::string>& labels,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {}
+
+std::shared_ptr<InternalModelData> ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) {
+ if (inputsNames.size() > 1) {
+ const auto& imageInfoTensor = request.get_tensor(inputsNames[1]);
+ const auto info = imageInfoTensor.data<float>();
+ info[0] = static_cast<float>(netInputHeight);
+ info[1] = static_cast<float>(netInputWidth);
+ info[2] = 1;
+ request.set_tensor(inputsNames[1], imageInfoTensor);
+ }
+
+ return DetectionModel::preprocess(inputData, request);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocess(InferenceResult& infResult) {
+ return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& infResult) {
+ const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor();
+ size_t detectionsNum = detectionsTensor.get_shape()[detectionsNumId];
+ const float* detections = detectionsTensor.data<float>();
+
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+ auto retVal = std::unique_ptr<ResultBase>(result);
+
+ const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+ for (size_t i = 0; i < detectionsNum; i++) {
+ float image_id = detections[i * objectSize + 0];
+ if (image_id < 0) {
+ break;
+ }
+
+ float confidence = detections[i * objectSize + 2];
+
+ /** Filtering out objects with confidence < confidence_threshold probability **/
+ if (confidence > confidenceThreshold) {
+ DetectedObject desc;
+
+ desc.confidence = confidence;
+ desc.labelID = static_cast<int>(detections[i * objectSize + 1]);
+ desc.label = getLabelName(desc.labelID);
+
+ desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth,
+ 0.f,
+ static_cast<float>(internalData.inputImgWidth));
+ desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight,
+ 0.f,
+ static_cast<float>(internalData.inputImgHeight));
+ desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth,
+ 0.f,
+ static_cast<float>(internalData.inputImgWidth)) -
+ desc.x;
+ desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight,
+ 0.f,
+ static_cast<float>(internalData.inputImgHeight)) -
+ desc.y;
+
+ result->objects.push_back(desc);
+ }
+ }
+
+ return retVal;
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) {
+ const float* boxes = infResult.outputsData[outputsNames[0]].data<float>();
+ size_t detectionsNum = infResult.outputsData[outputsNames[0]].get_shape()[detectionsNumId];
+ const float* labels = infResult.outputsData[outputsNames[1]].data<float>();
+ const float* scores = outputsNames.size() > 2 ? infResult.outputsData[outputsNames[2]].data<float>() : nullptr;
+
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+ auto retVal = std::unique_ptr<ResultBase>(result);
+
+ const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+ // In models with scores are stored in separate output, coordinates are normalized to [0,1]
+ // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight]
+ float widthScale = static_cast<float>(internalData.inputImgWidth) / (scores ? 1 : netInputWidth);
+ float heightScale = static_cast<float>(internalData.inputImgHeight) / (scores ? 1 : netInputHeight);
+
+ for (size_t i = 0; i < detectionsNum; i++) {
+ float confidence = scores ? scores[i] : boxes[i * objectSize + 4];
+
+ /** Filtering out objects with confidence < confidence_threshold probability **/
+ if (confidence > confidenceThreshold) {
+ DetectedObject desc;
+
+ desc.confidence = confidence;
+ desc.labelID = static_cast<int>(labels[i]);
+ desc.label = getLabelName(desc.labelID);
+
+ desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth));
+ desc.y =
+ clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight));
+ desc.width =
+ clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)) -
+ desc.x;
+ desc.height =
+ clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)) -
+ desc.y;
+
+ result->objects.push_back(desc);
+ }
+ }
+
+ return retVal;
+}
+
+void ModelSSD::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ ov::preprocess::PrePostProcessor ppp(model);
+ for (const auto& input : model->inputs()) {
+ auto inputTensorName = input.get_any_name();
+ const ov::Shape& shape = input.get_shape();
+ ov::Layout inputLayout = getInputLayout(input);
+
+ if (shape.size() == 4) { // 1st input contains images
+ if (inputsNames.empty()) {
+ inputsNames.push_back(inputTensorName);
+ } else {
+ inputsNames[0] = inputTensorName;
+ }
+
+ inputTransform.setPrecision(ppp, inputTensorName);
+ ppp.input(inputTensorName).tensor().set_layout({"NHWC"});
+
+ if (useAutoResize) {
+ ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape();
+
+ ppp.input(inputTensorName)
+ .preprocess()
+ .convert_element_type(ov::element::f32)
+ .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+ }
+
+ ppp.input(inputTensorName).model().set_layout(inputLayout);
+
+ netInputWidth = shape[ov::layout::width_idx(inputLayout)];
+ netInputHeight = shape[ov::layout::height_idx(inputLayout)];
+ } else if (shape.size() == 2) { // 2nd input contains image info
+ inputsNames.resize(2);
+ inputsNames[1] = inputTensorName;
+ ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32);
+ } else {
+ throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) +
+ "D "
+ "input layer '" +
+ input.get_any_name() +
+ "'. "
+ "Only 2D and 4D input layers are supported");
+ }
+ }
+ model = ppp.build();
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ if (model->outputs().size() == 1) {
+ prepareSingleOutput(model);
+ } else {
+ prepareMultipleOutputs(model);
+ }
+}
+
+void ModelSSD::prepareSingleOutput(std::shared_ptr<ov::Model>& model) {
+ const auto& output = model->output();
+ outputsNames.push_back(output.get_any_name());
+
+ const ov::Shape& shape = output.get_shape();
+ const ov::Layout& layout("NCHW");
+ if (shape.size() != 4) {
+ throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size()));
+ }
+ detectionsNumId = ov::layout::height_idx(layout);
+ objectSize = shape[ov::layout::width_idx(layout)];
+ if (objectSize != 7) {
+ throw std::logic_error("SSD single output must have 7 as a last dimension, but had " +
+ std::to_string(objectSize));
+ }
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout);
+ model = ppp.build();
+}
+
+void ModelSSD::prepareMultipleOutputs(std::shared_ptr<ov::Model>& model) {
+ const ov::OutputVector& outputs = model->outputs();
+ for (auto& output : outputs) {
+ const auto& tensorNames = output.get_names();
+ for (const auto& name : tensorNames) {
+ if (name.find("boxes") != std::string::npos) {
+ outputsNames.push_back(name);
+ break;
+ } else if (name.find("labels") != std::string::npos) {
+ outputsNames.push_back(name);
+ break;
+ } else if (name.find("scores") != std::string::npos) {
+ outputsNames.push_back(name);
+ break;
+ }
+ }
+ }
+ if (outputsNames.size() != 2 && outputsNames.size() != 3) {
+ throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " +
+ std::to_string(outputsNames.size()));
+ }
+ std::sort(outputsNames.begin(), outputsNames.end());
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ const auto& boxesShape = model->output(outputsNames[0]).get_partial_shape().get_max_shape();
+
+ ov::Layout boxesLayout;
+ if (boxesShape.size() == 2) {
+ boxesLayout = "NC";
+ detectionsNumId = ov::layout::batch_idx(boxesLayout);
+ objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)];
+
+ if (objectSize != 5) {
+ throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required");
+ }
+ } else if (boxesShape.size() == 3) {
+ boxesLayout = "CHW";
+ detectionsNumId = ov::layout::height_idx(boxesLayout);
+ objectSize = boxesShape[ov::layout::width_idx(boxesLayout)];
+
+ if (objectSize != 4) {
+ throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required");
+ }
+ } else {
+ throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " +
+ std::to_string(boxesShape.size()));
+ }
+
+ ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout);
+
+ for (const auto& outName : outputsNames) {
+ ppp.output(outName).tensor().set_element_type(ov::element::f32);
+ }
+ model = ppp.build();
+}