diff options
Diffstat (limited to 'python/openvino/runtime/common/models/src/detection_model_centernet.cpp')
| -rw-r--r-- | python/openvino/runtime/common/models/src/detection_model_centernet.cpp | 302 |
1 files changed, 302 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/models/src/detection_model_centernet.cpp b/python/openvino/runtime/common/models/src/detection_model_centernet.cpp new file mode 100644 index 0000000..eac42a7 --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_centernet.cpp @@ -0,0 +1,302 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_centernet.h" + +#include <stddef.h> + +#include <algorithm> +#include <cmath> +#include <map> +#include <stdexcept> +#include <utility> + +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/image_utils.h> +#include <utils/ocv_common.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" + +ModelCenterNet::ModelCenterNet(const std::string& modelFileName, + float confidenceThreshold, + const std::vector<std::string>& labels, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {} + +void ModelCenterNet::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("CenterNet model wrapper expects models that have only 1 input"); + } + + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("Expected 3-channel input"); + } + + ov::preprocess::PrePostProcessor ppp(model); + inputTransform.setPrecision(ppp, model->input().get_any_name()); + ppp.input().tensor().set_layout("NHWC"); + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Reading image input parameters ------------------------------------------- + inputsNames.push_back(model->input().get_any_name()); + netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; + netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() != 3) { + throw std::logic_error("CenterNet model wrapper expects models that have 3 outputs"); + } + + const ov::Layout outLayout{"NCHW"}; + for (const auto& output : model->outputs()) { + auto outTensorName = output.get_any_name(); + outputsNames.push_back(outTensorName); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outLayout); + } + std::sort(outputsNames.begin(), outputsNames.end()); + model = ppp.build(); +} + +cv::Point2f getDir(const cv::Point2f& srcPoint, float rotRadius) { + float sn = sinf(rotRadius); + float cs = cosf(rotRadius); + + cv::Point2f srcResult(0.0f, 0.0f); + srcResult.x = srcPoint.x * cs - srcPoint.y * sn; + srcResult.y = srcPoint.x * sn + srcPoint.y * cs; + + return srcResult; +} + +cv::Point2f get3rdPoint(const cv::Point2f& a, const cv::Point2f& b) { + cv::Point2f direct = a - b; + return b + cv::Point2f(-direct.y, direct.x); +} + +cv::Mat getAffineTransform(float centerX, + float centerY, + int srcW, + float rot, + size_t outputWidth, + size_t outputHeight, + bool inv = false) { + float rotRad = static_cast<float>(CV_PI) * rot / 180.0f; + auto srcDir = getDir({0.0f, -0.5f * srcW}, rotRad); + cv::Point2f dstDir(0.0f, -0.5f * outputWidth); + std::vector<cv::Point2f> src(3, {0.0f, 0.0f}); + std::vector<cv::Point2f> dst(3, {0.0f, 0.0f}); + + src[0] = {centerX, centerY}; + src[1] = srcDir + src[0]; + src[2] = get3rdPoint(src[0], src[1]); + + dst[0] = {outputWidth * 0.5f, outputHeight * 0.5f}; + dst[1] = dst[0] + dstDir; + dst[2] = get3rdPoint(dst[0], dst[1]); + + cv::Mat trans; + if (inv) { + trans = cv::getAffineTransform(dst, src); + } else { + trans = cv::getAffineTransform(src, dst); + } + + return trans; +} + +std::shared_ptr<InternalModelData> ModelCenterNet::preprocess(const InputData& inputData, ov::InferRequest& request) { + auto& img = inputData.asRef<ImageInputData>().inputImage; + const auto& resizedImg = resizeImageExt(img, netInputWidth, netInputHeight, RESIZE_KEEP_ASPECT_LETTERBOX); + + request.set_input_tensor(wrapMat2Tensor(inputTransform(resizedImg))); + return std::make_shared<InternalImageModelData>(img.cols, img.rows); +} + +std::vector<std::pair<size_t, float>> nms(float* scoresPtr, const ov::Shape& shape, float threshold, int kernel = 3) { + std::vector<std::pair<size_t, float>> scores; + scores.reserve(ModelCenterNet::INIT_VECTOR_SIZE); + auto chSize = shape[2] * shape[3]; + + for (size_t i = 0; i < shape[1] * shape[2] * shape[3]; ++i) { + scoresPtr[i] = expf(scoresPtr[i]) / (1 + expf(scoresPtr[i])); + } + + for (size_t ch = 0; ch < shape[1]; ++ch) { + for (size_t w = 0; w < shape[2]; ++w) { + for (size_t h = 0; h < shape[3]; ++h) { + float max = scoresPtr[chSize * ch + shape[2] * w + h]; + + // --------------------- filter on threshold-------------------------------------- + if (max < threshold) { + continue; + } + + // --------------------- store index and score------------------------------------ + scores.push_back({chSize * ch + shape[2] * w + h, max}); + + bool next = true; + // ---------------------- maxpool2d ----------------------------------------------- + for (int i = -kernel / 2; i < kernel / 2 + 1 && next; ++i) { + for (int j = -kernel / 2; j < kernel / 2 + 1; ++j) { + if (w + i >= 0 && w + i < shape[2] && h + j >= 0 && h + j < shape[3]) { + if (scoresPtr[chSize * ch + shape[2] * (w + i) + h + j] > max) { + scores.pop_back(); + next = false; + break; + } + } else { + if (max < 0) { + scores.pop_back(); + next = false; + break; + } + } + } + } + } + } + } + + return scores; +} + +static std::vector<std::pair<size_t, float>> filterScores(const ov::Tensor& scoresTensor, float threshold) { + auto shape = scoresTensor.get_shape(); + float* scoresPtr = scoresTensor.data<float>(); + + return nms(scoresPtr, shape, threshold); +} + +std::vector<std::pair<float, float>> filterReg(const ov::Tensor& regressionTensor, + const std::vector<std::pair<size_t, float>>& scores, + size_t chSize) { + const float* regPtr = regressionTensor.data<float>(); + std::vector<std::pair<float, float>> reg; + + for (auto s : scores) { + reg.push_back({regPtr[s.first % chSize], regPtr[chSize + s.first % chSize]}); + } + + return reg; +} + +std::vector<std::pair<float, float>> filterWH(const ov::Tensor& whTensor, + const std::vector<std::pair<size_t, float>>& scores, + size_t chSize) { + const float* whPtr = whTensor.data<float>(); + std::vector<std::pair<float, float>> wh; + + for (auto s : scores) { + wh.push_back({whPtr[s.first % chSize], whPtr[chSize + s.first % chSize]}); + } + + return wh; +} + +std::vector<ModelCenterNet::BBox> calcBoxes(const std::vector<std::pair<size_t, float>>& scores, + const std::vector<std::pair<float, float>>& reg, + const std::vector<std::pair<float, float>>& wh, + const ov::Shape& shape) { + std::vector<ModelCenterNet::BBox> boxes(scores.size()); + + for (size_t i = 0; i < boxes.size(); ++i) { + size_t chIdx = scores[i].first % (shape[2] * shape[3]); + auto xCenter = chIdx % shape[3]; + auto yCenter = chIdx / shape[3]; + + boxes[i].left = xCenter + reg[i].first - wh[i].first / 2.0f; + boxes[i].top = yCenter + reg[i].second - wh[i].second / 2.0f; + boxes[i].right = xCenter + reg[i].first + wh[i].first / 2.0f; + boxes[i].bottom = yCenter + reg[i].second + wh[i].second / 2.0f; + } + + return boxes; +} + +void transform(std::vector<ModelCenterNet::BBox>& boxes, + const ov::Shape& shape, + int scale, + float centerX, + float centerY) { + cv::Mat1f trans = getAffineTransform(centerX, centerY, scale, 0, shape[2], shape[3], true); + + for (auto& b : boxes) { + ModelCenterNet::BBox newbb; + + newbb.left = trans.at<float>(0, 0) * b.left + trans.at<float>(0, 1) * b.top + trans.at<float>(0, 2); + newbb.top = trans.at<float>(1, 0) * b.left + trans.at<float>(1, 1) * b.top + trans.at<float>(1, 2); + newbb.right = trans.at<float>(0, 0) * b.right + trans.at<float>(0, 1) * b.bottom + trans.at<float>(0, 2); + newbb.bottom = trans.at<float>(1, 0) * b.right + trans.at<float>(1, 1) * b.bottom + trans.at<float>(1, 2); + + b = newbb; + } +} + +std::unique_ptr<ResultBase> ModelCenterNet::postprocess(InferenceResult& infResult) { + // --------------------------- Filter data and get valid indices --------------------------------- + const auto& heatmapTensor = infResult.outputsData[outputsNames[0]]; + const auto& heatmapTensorShape = heatmapTensor.get_shape(); + const auto chSize = heatmapTensorShape[2] * heatmapTensorShape[3]; + const auto scores = filterScores(heatmapTensor, confidenceThreshold); + + const auto& regressionTensor = infResult.outputsData[outputsNames[1]]; + const auto reg = filterReg(regressionTensor, scores, chSize); + + const auto& whTensor = infResult.outputsData[outputsNames[2]]; + const auto wh = filterWH(whTensor, scores, chSize); + + // --------------------------- Calculate bounding boxes & apply inverse affine transform ---------- + auto boxes = calcBoxes(scores, reg, wh, heatmapTensorShape); + + const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth; + const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight; + const auto scale = std::max(imgWidth, imgHeight); + const float centerX = imgWidth / 2.0f; + const float centerY = imgHeight / 2.0f; + + transform(boxes, heatmapTensorShape, scale, centerX, centerY); + + // --------------------------- Create detection result objects ------------------------------------ + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + + result->objects.reserve(scores.size()); + for (size_t i = 0; i < scores.size(); ++i) { + DetectedObject desc; + desc.confidence = scores[i].second; + desc.labelID = scores[i].first / chSize; + desc.label = getLabelName(desc.labelID); + desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth)); + desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight)); + desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth)); + desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight)); + + result->objects.push_back(desc); + } + + return std::unique_ptr<ResultBase>(result); +} |
