diff options
| author | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
|---|---|---|
| committer | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
| commit | ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch) | |
| tree | a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/runtime/common/models/src/detection_model_ssd.cpp | |
| parent | 40da1752f2c8639186b72f6838aa415e854d0b1d (diff) | |
| download | thesis-master.tar.gz thesis-master.tar.bz2 thesis-master.zip | |
Diffstat (limited to 'python/openvino/runtime/common/models/src/detection_model_ssd.cpp')
| -rw-r--r-- | python/openvino/runtime/common/models/src/detection_model_ssd.cpp | 281 |
1 files changed, 281 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/models/src/detection_model_ssd.cpp b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp new file mode 100644 index 0000000..ef741ee --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp @@ -0,0 +1,281 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_ssd.h" + +#include <algorithm> +#include <map> +#include <stdexcept> +#include <string> +#include <unordered_set> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/ocv_common.hpp> + +#include "models/internal_model_data.h" +#include "models/results.h" + +struct InputData; + +ModelSSD::ModelSSD(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + const std::vector<std::string>& labels, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {} + +std::shared_ptr<InternalModelData> ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) { + if (inputsNames.size() > 1) { + const auto& imageInfoTensor = request.get_tensor(inputsNames[1]); + const auto info = imageInfoTensor.data<float>(); + info[0] = static_cast<float>(netInputHeight); + info[1] = static_cast<float>(netInputWidth); + info[2] = 1; + request.set_tensor(inputsNames[1], imageInfoTensor); + } + + return DetectionModel::preprocess(inputData, request); +} + +std::unique_ptr<ResultBase> ModelSSD::postprocess(InferenceResult& infResult) { + return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult); +} + +std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& infResult) { + const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor(); + size_t detectionsNum = detectionsTensor.get_shape()[detectionsNumId]; + const float* detections = detectionsTensor.data<float>(); + + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + auto retVal = std::unique_ptr<ResultBase>(result); + + const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>(); + + for (size_t i = 0; i < detectionsNum; i++) { + float image_id = detections[i * objectSize + 0]; + if (image_id < 0) { + break; + } + + float confidence = detections[i * objectSize + 2]; + + /** Filtering out objects with confidence < confidence_threshold probability **/ + if (confidence > confidenceThreshold) { + DetectedObject desc; + + desc.confidence = confidence; + desc.labelID = static_cast<int>(detections[i * objectSize + 1]); + desc.label = getLabelName(desc.labelID); + + desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth, + 0.f, + static_cast<float>(internalData.inputImgWidth)); + desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight, + 0.f, + static_cast<float>(internalData.inputImgHeight)); + desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth, + 0.f, + static_cast<float>(internalData.inputImgWidth)) - + desc.x; + desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight, + 0.f, + static_cast<float>(internalData.inputImgHeight)) - + desc.y; + + result->objects.push_back(desc); + } + } + + return retVal; +} + +std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) { + const float* boxes = infResult.outputsData[outputsNames[0]].data<float>(); + size_t detectionsNum = infResult.outputsData[outputsNames[0]].get_shape()[detectionsNumId]; + const float* labels = infResult.outputsData[outputsNames[1]].data<float>(); + const float* scores = outputsNames.size() > 2 ? infResult.outputsData[outputsNames[2]].data<float>() : nullptr; + + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + auto retVal = std::unique_ptr<ResultBase>(result); + + const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>(); + + // In models with scores are stored in separate output, coordinates are normalized to [0,1] + // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight] + float widthScale = static_cast<float>(internalData.inputImgWidth) / (scores ? 1 : netInputWidth); + float heightScale = static_cast<float>(internalData.inputImgHeight) / (scores ? 1 : netInputHeight); + + for (size_t i = 0; i < detectionsNum; i++) { + float confidence = scores ? scores[i] : boxes[i * objectSize + 4]; + + /** Filtering out objects with confidence < confidence_threshold probability **/ + if (confidence > confidenceThreshold) { + DetectedObject desc; + + desc.confidence = confidence; + desc.labelID = static_cast<int>(labels[i]); + desc.label = getLabelName(desc.labelID); + + desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)); + desc.y = + clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)); + desc.width = + clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)) - + desc.x; + desc.height = + clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)) - + desc.y; + + result->objects.push_back(desc); + } + } + + return retVal; +} + +void ModelSSD::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + ov::preprocess::PrePostProcessor ppp(model); + for (const auto& input : model->inputs()) { + auto inputTensorName = input.get_any_name(); + const ov::Shape& shape = input.get_shape(); + ov::Layout inputLayout = getInputLayout(input); + + if (shape.size() == 4) { // 1st input contains images + if (inputsNames.empty()) { + inputsNames.push_back(inputTensorName); + } else { + inputsNames[0] = inputTensorName; + } + + inputTransform.setPrecision(ppp, inputTensorName); + ppp.input(inputTensorName).tensor().set_layout({"NHWC"}); + + if (useAutoResize) { + ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape(); + + ppp.input(inputTensorName) + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + ppp.input(inputTensorName).model().set_layout(inputLayout); + + netInputWidth = shape[ov::layout::width_idx(inputLayout)]; + netInputHeight = shape[ov::layout::height_idx(inputLayout)]; + } else if (shape.size() == 2) { // 2nd input contains image info + inputsNames.resize(2); + inputsNames[1] = inputTensorName; + ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32); + } else { + throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) + + "D " + "input layer '" + + input.get_any_name() + + "'. " + "Only 2D and 4D input layers are supported"); + } + } + model = ppp.build(); + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() == 1) { + prepareSingleOutput(model); + } else { + prepareMultipleOutputs(model); + } +} + +void ModelSSD::prepareSingleOutput(std::shared_ptr<ov::Model>& model) { + const auto& output = model->output(); + outputsNames.push_back(output.get_any_name()); + + const ov::Shape& shape = output.get_shape(); + const ov::Layout& layout("NCHW"); + if (shape.size() != 4) { + throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size())); + } + detectionsNumId = ov::layout::height_idx(layout); + objectSize = shape[ov::layout::width_idx(layout)]; + if (objectSize != 7) { + throw std::logic_error("SSD single output must have 7 as a last dimension, but had " + + std::to_string(objectSize)); + } + ov::preprocess::PrePostProcessor ppp(model); + ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout); + model = ppp.build(); +} + +void ModelSSD::prepareMultipleOutputs(std::shared_ptr<ov::Model>& model) { + const ov::OutputVector& outputs = model->outputs(); + for (auto& output : outputs) { + const auto& tensorNames = output.get_names(); + for (const auto& name : tensorNames) { + if (name.find("boxes") != std::string::npos) { + outputsNames.push_back(name); + break; + } else if (name.find("labels") != std::string::npos) { + outputsNames.push_back(name); + break; + } else if (name.find("scores") != std::string::npos) { + outputsNames.push_back(name); + break; + } + } + } + if (outputsNames.size() != 2 && outputsNames.size() != 3) { + throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " + + std::to_string(outputsNames.size())); + } + std::sort(outputsNames.begin(), outputsNames.end()); + + ov::preprocess::PrePostProcessor ppp(model); + const auto& boxesShape = model->output(outputsNames[0]).get_partial_shape().get_max_shape(); + + ov::Layout boxesLayout; + if (boxesShape.size() == 2) { + boxesLayout = "NC"; + detectionsNumId = ov::layout::batch_idx(boxesLayout); + objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)]; + + if (objectSize != 5) { + throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required"); + } + } else if (boxesShape.size() == 3) { + boxesLayout = "CHW"; + detectionsNumId = ov::layout::height_idx(boxesLayout); + objectSize = boxesShape[ov::layout::width_idx(boxesLayout)]; + + if (objectSize != 4) { + throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required"); + } + } else { + throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " + + std::to_string(boxesShape.size())); + } + + ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout); + + for (const auto& outName : outputsNames) { + ppp.output(outName).tensor().set_element_type(ov::element::f32); + } + model = ppp.build(); +} |
