diff options
| author | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
|---|---|---|
| committer | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
| commit | ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch) | |
| tree | a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp | |
| parent | 40da1752f2c8639186b72f6838aa415e854d0b1d (diff) | |
| download | thesis-master.tar.gz thesis-master.tar.bz2 thesis-master.zip | |
Diffstat (limited to 'python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp')
| -rw-r--r-- | python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp b/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp new file mode 100644 index 0000000..132eb9e --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp @@ -0,0 +1,188 @@ +/* +// Copyright (C) 2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_yolov3_onnx.h" + +#include <algorithm> +#include <cmath> +#include <cstdint> +#include <stdexcept> +#include <string> +#include <utility> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/slog.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" +#include "utils/image_utils.h" + +ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName, + float confidenceThreshold, + const std::vector<std::string>& labels, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) { + interpolationMode = cv::INTER_CUBIC; + resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX; + } + + +void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare inputs ------------------------------------------------------ + const ov::OutputVector& inputs = model->inputs(); + if (inputs.size() != 2) { + throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs"); + } + + ov::preprocess::PrePostProcessor ppp(model); + inputsNames.reserve(inputs.size()); + for (auto& input : inputs) { + const ov::Shape& currentShape = input.get_shape(); + std::string currentName = input.get_any_name(); + const ov::Layout& currentLayout = getInputLayout(input); + + if (currentShape.size() == 4) { + if (currentShape[ov::layout::channels_idx(currentLayout)] != 3) { + throw std::logic_error("Expected 4D image input with 3 channels"); + } + inputsNames[0] = currentName; + netInputWidth = currentShape[ov::layout::width_idx(currentLayout)]; + netInputHeight = currentShape[ov::layout::height_idx(currentLayout)]; + ppp.input(currentName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); + } else if (currentShape.size() == 2) { + if (currentShape[ov::layout::channels_idx(currentLayout)] != 2) { + throw std::logic_error("Expected 2D image info input with 2 channels"); + } + inputsNames[1] = currentName; + ppp.input(currentName).tensor().set_element_type(ov::element::i32); + } + ppp.input(currentName).model().set_layout(currentLayout); + } + + // --------------------------- Prepare outputs ----------------------------------------------------- + const ov::OutputVector& outputs = model->outputs(); + if (outputs.size() != 3) { + throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs"); + } + + for (auto& output : outputs) { + const ov::Shape& currentShape = output.get_partial_shape().get_max_shape(); + std::string currentName = output.get_any_name(); + if (currentShape.back() == 3) { + indicesOutputName = currentName; + ppp.output(currentName).tensor().set_element_type(ov::element::i32); + } else if (currentShape[2] == 4) { + boxesOutputName = currentName; + ppp.output(currentName).tensor().set_element_type(ov::element::f32); + } else if (currentShape[1] == numberOfClasses) { + scoresOutputName = currentName; + ppp.output(currentName).tensor().set_element_type(ov::element::f32); + } else { + throw std::logic_error("Expected shapes [:,:,4], [:," + + std::to_string(numberOfClasses) + ",:] and [:,3] for outputs"); + } + outputsNames.push_back(currentName); + } + model = ppp.build(); +} + +std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData& inputData, + ov::InferRequest& request) { + const auto& origImg = inputData.asRef<ImageInputData>().inputImage; + + cv::Mat info(cv::Size(1, 2), CV_32SC1); + info.at<int>(0, 0) = origImg.rows; + info.at<int>(0, 1) = origImg.cols; + auto allocator = std::make_shared<SharedTensorAllocator>(info); + ov::Tensor infoInput = ov::Tensor(ov::element::i32, ov::Shape({1, 2}), ov::Allocator(allocator)); + + request.set_tensor(inputsNames[1], infoInput); + + return ImageModel::preprocess(inputData, request); +} + +namespace { +float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) { + const float* scoresPtr = scoresTensor.data<float>(); + const auto shape = scoresTensor.get_shape(); + int N = shape[2]; + + return scoresPtr[classInd * N + boxInd]; +} +} + +std::unique_ptr<ResultBase> ModelYoloV3ONNX::postprocess(InferenceResult& infResult) { + // Get info about input image + const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth; + const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight; + + // Get outputs tensors + const ov::Tensor& boxes = infResult.outputsData[boxesOutputName]; + const float* boxesPtr = boxes.data<float>(); + + const ov::Tensor& scores = infResult.outputsData[scoresOutputName]; + const ov::Tensor& indices = infResult.outputsData[indicesOutputName]; + + const int* indicesData = indices.data<int>(); + const auto indicesShape = indices.get_shape(); + const auto boxShape = boxes.get_shape(); + + // Generate detection results + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0]; + int indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1]; + + for (size_t i = 0; i < numberOfBoxes; ++i) { + int batchInd = indicesData[i * indicesStride]; + int classInd = indicesData[i * indicesStride + 1]; + int boxInd = indicesData[i * indicesStride + 2]; + + if (batchInd == -1) { + break; + } + + float score = getScore(scores, classInd, boxInd); + + if (score > confidenceThreshold) { + DetectedObject obj; + size_t startPos = boxShape[2] * boxInd; + + auto x = boxesPtr[startPos + 1]; + auto y = boxesPtr[startPos]; + auto width = boxesPtr[startPos + 3] - x; + auto height = boxesPtr[startPos + 2] - y; + + // Create new detected box + obj.x = clamp(x, 0.f, static_cast<float>(imgWidth)); + obj.y = clamp(y, 0.f, static_cast<float>(imgHeight)); + obj.height = clamp(height, 0.f, static_cast<float>(imgHeight)); + obj.width = clamp(width, 0.f, static_cast<float>(imgWidth)); + obj.confidence = score; + obj.labelID = classInd; + obj.label = getLabelName(classInd); + + result->objects.push_back(obj); + + } + } + + return std::unique_ptr<ResultBase>(result); +} |
