1 files changed, 281 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/models/src/detection_model_ssd.cpp b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp
new file mode 100644
index 0000000..ef741ee
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp
@@ -0,0 +1,281 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_ssd.h"
+
+#include <algorithm>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+struct InputData;
+
+ModelSSD::ModelSSD(const std::string& modelFileName,
+                   float confidenceThreshold,
+                   bool useAutoResize,
+                   const std::vector<std::string>& labels,
+                   const std::string& layout)
+    : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {}
+
+std::shared_ptr<InternalModelData> ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) {
+    if (inputsNames.size() > 1) {
+        const auto& imageInfoTensor = request.get_tensor(inputsNames[1]);
+        const auto info = imageInfoTensor.data<float>();
+        info[0] = static_cast<float>(netInputHeight);
+        info[1] = static_cast<float>(netInputWidth);
+        info[2] = 1;
+        request.set_tensor(inputsNames[1], imageInfoTensor);
+    }
+
+    return DetectionModel::preprocess(inputData, request);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocess(InferenceResult& infResult) {
+    return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& infResult) {
+    const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor();
+    size_t detectionsNum = detectionsTensor.get_shape()[detectionsNumId];
+    const float* detections = detectionsTensor.data<float>();
+
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    auto retVal = std::unique_ptr<ResultBase>(result);
+
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+    for (size_t i = 0; i < detectionsNum; i++) {
+        float image_id = detections[i * objectSize + 0];
+        if (image_id < 0) {
+            break;
+        }
+
+        float confidence = detections[i * objectSize + 2];
+
+        /** Filtering out objects with confidence < confidence_threshold probability **/
+        if (confidence > confidenceThreshold) {
+            DetectedObject desc;
+
+            desc.confidence = confidence;
+            desc.labelID = static_cast<int>(detections[i * objectSize + 1]);
+            desc.label = getLabelName(desc.labelID);
+
+            desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth,
+                           0.f,
+                           static_cast<float>(internalData.inputImgWidth));
+            desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight,
+                           0.f,
+                           static_cast<float>(internalData.inputImgHeight));
+            desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth,
+                               0.f,
+                               static_cast<float>(internalData.inputImgWidth)) -
+                         desc.x;
+            desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight,
+                                0.f,
+                                static_cast<float>(internalData.inputImgHeight)) -
+                          desc.y;
+
+            result->objects.push_back(desc);
+        }
+    }
+
+    return retVal;
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) {
+    const float* boxes = infResult.outputsData[outputsNames[0]].data<float>();
+    size_t detectionsNum = infResult.outputsData[outputsNames[0]].get_shape()[detectionsNumId];
+    const float* labels = infResult.outputsData[outputsNames[1]].data<float>();
+    const float* scores = outputsNames.size() > 2 ? infResult.outputsData[outputsNames[2]].data<float>() : nullptr;
+
+    DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+    auto retVal = std::unique_ptr<ResultBase>(result);
+
+    const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+    // In models with scores are stored in separate output, coordinates are normalized to [0,1]
+    // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight]
+    float widthScale = static_cast<float>(internalData.inputImgWidth) / (scores ? 1 : netInputWidth);
+    float heightScale = static_cast<float>(internalData.inputImgHeight) / (scores ? 1 : netInputHeight);
+
+    for (size_t i = 0; i < detectionsNum; i++) {
+        float confidence = scores ? scores[i] : boxes[i * objectSize + 4];
+
+        /** Filtering out objects with confidence < confidence_threshold probability **/
+        if (confidence > confidenceThreshold) {
+            DetectedObject desc;
+
+            desc.confidence = confidence;
+            desc.labelID = static_cast<int>(labels[i]);
+            desc.label = getLabelName(desc.labelID);
+
+            desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth));
+            desc.y =
+                clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight));
+            desc.width =
+                clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)) -
+                desc.x;
+            desc.height =
+                clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)) -
+                desc.y;
+
+            result->objects.push_back(desc);
+        }
+    }
+
+    return retVal;
+}
+
+void ModelSSD::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+    // --------------------------- Configure input & output -------------------------------------------------
+    // --------------------------- Prepare input ------------------------------------------------------
+    ov::preprocess::PrePostProcessor ppp(model);
+    for (const auto& input : model->inputs()) {
+        auto inputTensorName = input.get_any_name();
+        const ov::Shape& shape = input.get_shape();
+        ov::Layout inputLayout = getInputLayout(input);
+
+        if (shape.size() == 4) {  // 1st input contains images
+            if (inputsNames.empty()) {
+                inputsNames.push_back(inputTensorName);
+            } else {
+                inputsNames[0] = inputTensorName;
+            }
+
+            inputTransform.setPrecision(ppp, inputTensorName);
+            ppp.input(inputTensorName).tensor().set_layout({"NHWC"});
+
+            if (useAutoResize) {
+                ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape();
+
+                ppp.input(inputTensorName)
+                    .preprocess()
+                    .convert_element_type(ov::element::f32)
+                    .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+            }
+
+            ppp.input(inputTensorName).model().set_layout(inputLayout);
+
+            netInputWidth = shape[ov::layout::width_idx(inputLayout)];
+            netInputHeight = shape[ov::layout::height_idx(inputLayout)];
+        } else if (shape.size() == 2) {  // 2nd input contains image info
+            inputsNames.resize(2);
+            inputsNames[1] = inputTensorName;
+            ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32);
+        } else {
+            throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) +
+                                   "D "
+                                   "input layer '" +
+                                   input.get_any_name() +
+                                   "'. "
+                                   "Only 2D and 4D input layers are supported");
+        }
+    }
+    model = ppp.build();
+
+    // --------------------------- Prepare output  -----------------------------------------------------
+    if (model->outputs().size() == 1) {
+        prepareSingleOutput(model);
+    } else {
+        prepareMultipleOutputs(model);
+    }
+}
+
+void ModelSSD::prepareSingleOutput(std::shared_ptr<ov::Model>& model) {
+    const auto& output = model->output();
+    outputsNames.push_back(output.get_any_name());
+
+    const ov::Shape& shape = output.get_shape();
+    const ov::Layout& layout("NCHW");
+    if (shape.size() != 4) {
+        throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size()));
+    }
+    detectionsNumId = ov::layout::height_idx(layout);
+    objectSize = shape[ov::layout::width_idx(layout)];
+    if (objectSize != 7) {
+        throw std::logic_error("SSD single output must have 7 as a last dimension, but had " +
+                               std::to_string(objectSize));
+    }
+    ov::preprocess::PrePostProcessor ppp(model);
+    ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout);
+    model = ppp.build();
+}
+
+void ModelSSD::prepareMultipleOutputs(std::shared_ptr<ov::Model>& model) {
+    const ov::OutputVector& outputs = model->outputs();
+    for (auto& output : outputs) {
+        const auto& tensorNames = output.get_names();
+        for (const auto& name : tensorNames) {
+            if (name.find("boxes") != std::string::npos) {
+                outputsNames.push_back(name);
+                break;
+            } else if (name.find("labels") != std::string::npos) {
+                outputsNames.push_back(name);
+                break;
+            } else if (name.find("scores") != std::string::npos) {
+                outputsNames.push_back(name);
+                break;
+            }
+        }
+    }
+    if (outputsNames.size() != 2 && outputsNames.size() != 3) {
+        throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " +
+                               std::to_string(outputsNames.size()));
+    }
+    std::sort(outputsNames.begin(), outputsNames.end());
+
+    ov::preprocess::PrePostProcessor ppp(model);
+    const auto& boxesShape = model->output(outputsNames[0]).get_partial_shape().get_max_shape();
+
+    ov::Layout boxesLayout;
+    if (boxesShape.size() == 2) {
+        boxesLayout = "NC";
+        detectionsNumId = ov::layout::batch_idx(boxesLayout);
+        objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)];
+
+        if (objectSize != 5) {
+            throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required");
+        }
+    } else if (boxesShape.size() == 3) {
+        boxesLayout = "CHW";
+        detectionsNumId = ov::layout::height_idx(boxesLayout);
+        objectSize = boxesShape[ov::layout::width_idx(boxesLayout)];
+
+        if (objectSize != 4) {
+            throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required");
+        }
+    } else {
+        throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " +
+                               std::to_string(boxesShape.size()));
+    }
+
+    ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout);
+
+    for (const auto& outName : outputsNames) {
+        ppp.output(outName).tensor().set_element_type(ov::element::f32);
+    }
+    model = ppp.build();
+}