diff options
Diffstat (limited to 'python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp')
| -rw-r--r-- | python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp | 261 |
1 files changed, 261 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp b/python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp new file mode 100644 index 0000000..bb349a6 --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp @@ -0,0 +1,261 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_faceboxes.h" + +#include <algorithm> +#include <cmath> +#include <map> +#include <stdexcept> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/nms.hpp> +#include <utils/ocv_common.hpp> + +#include "models/internal_model_data.h" +#include "models/results.h" + +ModelFaceBoxes::ModelFaceBoxes(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), + maxProposalsCount(0), + boxIOUThreshold(boxIOUThreshold), + variance({0.1f, 0.2f}), + steps({32, 64, 128}), + minSizes({{32, 64, 128}, {256}, {512}}) {} + +void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("FaceBoxes model wrapper expects models that have only 1 input"); + } + + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("Expected 3-channel input"); + } + + ov::preprocess::PrePostProcessor ppp(model); + inputTransform.setPrecision(ppp, model->input().get_any_name()); + ppp.input().tensor().set_layout({"NHWC"}); + + if (useAutoResize) { + ppp.input().tensor().set_spatial_dynamic_shape(); + + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Reading image input parameters ------------------------------------------- + inputsNames.push_back(model->input().get_any_name()); + netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; + netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() != 2) { + throw std::logic_error("FaceBoxes model wrapper expects models that have 2 outputs"); + } + + const ov::Layout outputLayout{"CHW"}; + maxProposalsCount = model->outputs().front().get_shape()[ov::layout::height_idx(outputLayout)]; + for (const auto& output : model->outputs()) { + const auto outTensorName = output.get_any_name(); + outputsNames.push_back(outTensorName); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout); + } + std::sort(outputsNames.begin(), outputsNames.end()); + model = ppp.build(); + + // --------------------------- Calculating anchors ---------------------------------------------------- + std::vector<std::pair<size_t, size_t>> featureMaps; + for (auto s : steps) { + featureMaps.push_back({netInputHeight / s, netInputWidth / s}); + } + + priorBoxes(featureMaps); +} + +void calculateAnchors(std::vector<Anchor>& anchors, + const std::vector<float>& vx, + const std::vector<float>& vy, + const int minSize, + const int step) { + float skx = static_cast<float>(minSize); + float sky = static_cast<float>(minSize); + + std::vector<float> dense_cx, dense_cy; + + for (auto x : vx) { + dense_cx.push_back(x * step); + } + + for (auto y : vy) { + dense_cy.push_back(y * step); + } + + for (auto cy : dense_cy) { + for (auto cx : dense_cx) { + anchors.push_back( + {cx - 0.5f * skx, cy - 0.5f * sky, cx + 0.5f * skx, cy + 0.5f * sky}); // left top right bottom + } + } +} + +void calculateAnchorsZeroLevel(std::vector<Anchor>& anchors, + const int fx, + const int fy, + const std::vector<int>& minSizes, + const int step) { + for (auto s : minSizes) { + std::vector<float> vx, vy; + if (s == 32) { + vx.push_back(static_cast<float>(fx)); + vx.push_back(fx + 0.25f); + vx.push_back(fx + 0.5f); + vx.push_back(fx + 0.75f); + + vy.push_back(static_cast<float>(fy)); + vy.push_back(fy + 0.25f); + vy.push_back(fy + 0.5f); + vy.push_back(fy + 0.75f); + } else if (s == 64) { + vx.push_back(static_cast<float>(fx)); + vx.push_back(fx + 0.5f); + + vy.push_back(static_cast<float>(fy)); + vy.push_back(fy + 0.5f); + } else { + vx.push_back(fx + 0.5f); + vy.push_back(fy + 0.5f); + } + calculateAnchors(anchors, vx, vy, s, step); + } +} + +void ModelFaceBoxes::priorBoxes(const std::vector<std::pair<size_t, size_t>>& featureMaps) { + anchors.reserve(maxProposalsCount); + + for (size_t k = 0; k < featureMaps.size(); ++k) { + std::vector<float> a; + for (size_t i = 0; i < featureMaps[k].first; ++i) { + for (size_t j = 0; j < featureMaps[k].second; ++j) { + if (k == 0) { + calculateAnchorsZeroLevel(anchors, j, i, minSizes[k], steps[k]); + } else { + calculateAnchors(anchors, {j + 0.5f}, {i + 0.5f}, minSizes[k][0], steps[k]); + } + } + } + } +} + +std::pair<std::vector<size_t>, std::vector<float>> filterScores(const ov::Tensor& scoresTensor, + const float confidenceThreshold) { + auto shape = scoresTensor.get_shape(); + const float* scoresPtr = scoresTensor.data<float>(); + + std::vector<size_t> indices; + std::vector<float> scores; + scores.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE); + indices.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE); + for (size_t i = 1; i < shape[1] * shape[2]; i = i + 2) { + if (scoresPtr[i] > confidenceThreshold) { + indices.push_back(i / 2); + scores.push_back(scoresPtr[i]); + } + } + + return {indices, scores}; +} + +std::vector<Anchor> filterBoxes(const ov::Tensor& boxesTensor, + const std::vector<Anchor>& anchors, + const std::vector<size_t>& validIndices, + const std::vector<float>& variance) { + auto shape = boxesTensor.get_shape(); + const float* boxesPtr = boxesTensor.data<float>(); + + std::vector<Anchor> boxes; + boxes.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE); + for (auto i : validIndices) { + auto objStart = shape[2] * i; + + auto dx = boxesPtr[objStart]; + auto dy = boxesPtr[objStart + 1]; + auto dw = boxesPtr[objStart + 2]; + auto dh = boxesPtr[objStart + 3]; + + auto predCtrX = dx * variance[0] * anchors[i].getWidth() + anchors[i].getXCenter(); + auto predCtrY = dy * variance[0] * anchors[i].getHeight() + anchors[i].getYCenter(); + auto predW = exp(dw * variance[1]) * anchors[i].getWidth(); + auto predH = exp(dh * variance[1]) * anchors[i].getHeight(); + + boxes.push_back({static_cast<float>(predCtrX - 0.5f * predW), + static_cast<float>(predCtrY - 0.5f * predH), + static_cast<float>(predCtrX + 0.5f * predW), + static_cast<float>(predCtrY + 0.5f * predH)}); + } + + return boxes; +} + +std::unique_ptr<ResultBase> ModelFaceBoxes::postprocess(InferenceResult& infResult) { + // Filter scores and get valid indices for bounding boxes + const auto scoresTensor = infResult.outputsData[outputsNames[1]]; + const auto scores = filterScores(scoresTensor, confidenceThreshold); + + // Filter bounding boxes on indices + auto boxesTensor = infResult.outputsData[outputsNames[0]]; + std::vector<Anchor> boxes = filterBoxes(boxesTensor, anchors, scores.first, variance); + + // Apply Non-maximum Suppression + const std::vector<int> keep = nms(boxes, scores.second, boxIOUThreshold); + + // Create detection result objects + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth; + const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight; + const float scaleX = static_cast<float>(netInputWidth) / imgWidth; + const float scaleY = static_cast<float>(netInputHeight) / imgHeight; + + result->objects.reserve(keep.size()); + for (auto i : keep) { + DetectedObject desc; + desc.confidence = scores.second[i]; + desc.x = clamp(boxes[i].left / scaleX, 0.f, static_cast<float>(imgWidth)); + desc.y = clamp(boxes[i].top / scaleY, 0.f, static_cast<float>(imgHeight)); + desc.width = clamp(boxes[i].getWidth() / scaleX, 0.f, static_cast<float>(imgWidth)); + desc.height = clamp(boxes[i].getHeight() / scaleY, 0.f, static_cast<float>(imgHeight)); + desc.labelID = 0; + desc.label = labels[0]; + + result->objects.push_back(desc); + } + + return std::unique_ptr<ResultBase>(result); +} |
