summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp
diff options
context:
space:
mode:
authorEric Dao <eric@erickhangdao.com>2025-03-10 17:54:31 -0400
committerEric Dao <eric@erickhangdao.com>2025-03-10 17:54:31 -0400
commitab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch)
treea1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp
parent40da1752f2c8639186b72f6838aa415e854d0b1d (diff)
downloadthesis-master.tar.gz
thesis-master.tar.bz2
thesis-master.zip
completed thesisHEADmaster
Diffstat (limited to 'python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp')
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp289
1 files changed, 289 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp b/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp
new file mode 100644
index 0000000..ebb5e14
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp
@@ -0,0 +1,289 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality using OpenCV
+ * @file ocv_common.hpp
+ */
+
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include <openvino/openvino.hpp>
+
+#include "utils/common.hpp"
+#include "utils/shared_tensor_allocator.hpp"
+
+/**
+* @brief Get cv::Mat value in the correct format.
+*/
+template <typename T>
+const T getMatValue(const cv::Mat& mat, size_t h, size_t w, size_t c) {
+ switch (mat.type()) {
+ case CV_8UC1: return (T)mat.at<uchar>(h, w);
+ case CV_8UC3: return (T)mat.at<cv::Vec3b>(h, w)[c];
+ case CV_32FC1: return (T)mat.at<float>(h, w);
+ case CV_32FC3: return (T)mat.at<cv::Vec3f>(h, w)[c];
+ }
+ throw std::runtime_error("cv::Mat type is not recognized");
+};
+
+/**
+* @brief Resize and copy image data from cv::Mat object to a given Tensor object.
+* @param mat - given cv::Mat object with an image data.
+* @param tensor - Tensor object which to be filled by an image data.
+* @param batchIndex - batch index of an image inside of the blob.
+*/
+static UNUSED void matToTensor(const cv::Mat& mat, const ov::Tensor& tensor, int batchIndex = 0) {
+ ov::Shape tensorShape = tensor.get_shape();
+ static const ov::Layout layout("NCHW");
+ const size_t width = tensorShape[ov::layout::width_idx(layout)];
+ const size_t height = tensorShape[ov::layout::height_idx(layout)];
+ const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
+ if (static_cast<size_t>(mat.channels()) != channels) {
+ throw std::runtime_error("The number of channels for model input and image must match");
+ }
+ if (channels != 1 && channels != 3) {
+ throw std::runtime_error("Unsupported number of channels");
+ }
+ int batchOffset = batchIndex * width * height * channels;
+
+ cv::Mat resizedMat;
+ if (static_cast<int>(width) != mat.size().width || static_cast<int>(height) != mat.size().height) {
+ cv::resize(mat, resizedMat, cv::Size(width, height));
+ } else {
+ resizedMat = mat;
+ }
+
+ if (tensor.get_element_type() == ov::element::f32) {
+ float_t* tensorData = tensor.data<float_t>();
+ for (size_t c = 0; c < channels; c++)
+ for (size_t h = 0; h < height; h++)
+ for (size_t w = 0; w < width; w++)
+ tensorData[batchOffset + c * width * height + h * width + w] =
+ getMatValue<float_t>(resizedMat, h, w, c);
+ } else {
+ uint8_t* tensorData = tensor.data<uint8_t>();
+ if (resizedMat.depth() == CV_32F) {
+ throw std::runtime_error("Conversion of cv::Mat from float_t to uint8_t is forbidden");
+ }
+ for (size_t c = 0; c < channels; c++)
+ for (size_t h = 0; h < height; h++)
+ for (size_t w = 0; w < width; w++)
+ tensorData[batchOffset + c * width * height + h * width + w] =
+ getMatValue<uint8_t>(resizedMat, h, w, c);
+ }
+}
+
+static UNUSED ov::Tensor wrapMat2Tensor(const cv::Mat& mat) {
+ auto matType = mat.type() & CV_MAT_DEPTH_MASK;
+ if (matType != CV_8U && matType != CV_32F) {
+ throw std::runtime_error("Unsupported mat type for wrapping");
+ }
+ bool isMatFloat = matType == CV_32F;
+
+ const size_t channels = mat.channels();
+ const size_t height = mat.rows;
+ const size_t width = mat.cols;
+
+ const size_t strideH = mat.step.buf[0];
+ const size_t strideW = mat.step.buf[1];
+
+ const bool isDense = !isMatFloat ? (strideW == channels && strideH == channels * width) :
+ (strideW == channels * sizeof(float) && strideH == channels * width * sizeof(float));
+ if (!isDense) {
+ throw std::runtime_error("Doesn't support conversion from not dense cv::Mat");
+ }
+ auto precision = isMatFloat ? ov::element::f32 : ov::element::u8;
+ auto allocator = std::make_shared<SharedTensorAllocator>(mat);
+ return ov::Tensor(precision, ov::Shape{ 1, height, width, channels }, ov::Allocator(allocator));
+}
+
+static inline void resize2tensor(const cv::Mat& mat, const ov::Tensor& tensor) {
+ static const ov::Layout layout{"NHWC"};
+ const ov::Shape& shape = tensor.get_shape();
+ cv::Size size{int(shape[ov::layout::width_idx(layout)]), int(shape[ov::layout::height_idx(layout)])};
+ assert(tensor.get_element_type() == ov::element::u8);
+ assert(shape.size() == 4);
+ assert(shape[ov::layout::batch_idx(layout)] == 1);
+ assert(shape[ov::layout::channels_idx(layout)] == 3);
+ cv::resize(mat, cv::Mat{size, CV_8UC3, tensor.data()}, size);
+}
+
+static inline ov::Layout getLayoutFromShape(const ov::Shape& shape) {
+ if (shape.size() == 2) {
+ return "NC";
+ }
+ else if (shape.size() == 3) {
+ return (shape[0] >= 1 && shape[0] <= 4) ? "CHW" :
+ "HWC";
+ }
+ else if (shape.size() == 4) {
+ return (shape[1] >= 1 && shape[1] <= 4) ? "NCHW" :
+ "NHWC";
+ }
+ else {
+ throw std::runtime_error("Usupported " + std::to_string(shape.size()) + "D shape");
+ }
+}
+
+/**
+ * @brief Puts text message on the frame, highlights the text with a white border to make it distinguishable from
+ * the background.
+ * @param frame - frame to put the text on.
+ * @param message - text of the message.
+ * @param position - bottom-left corner of the text string in the image.
+ * @param fontFace - font type.
+ * @param fontScale - font scale factor that is multiplied by the font-specific base size.
+ * @param color - text color.
+ * @param thickness - thickness of the lines used to draw a text.
+ */
+inline void putHighlightedText(const cv::Mat& frame,
+ const std::string& message,
+ cv::Point position,
+ int fontFace,
+ double fontScale,
+ cv::Scalar color,
+ int thickness) {
+ cv::putText(frame, message, position, fontFace, fontScale, cv::Scalar(255, 255, 255), thickness + 1);
+ cv::putText(frame, message, position, fontFace, fontScale, color, thickness);
+}
+
+// TODO: replace with Size::empty() after OpenCV3 is dropped
+static inline bool isSizeEmpty(const cv::Size& size) {
+ return size.width <= 0 || size.height <= 0;
+}
+
+// TODO: replace with Rect::empty() after OpenCV3 is dropped
+static inline bool isRectEmpty(const cv::Rect& rect) {
+ return rect.width <= 0 || rect.height <= 0;
+}
+
+class OutputTransform {
+public:
+ OutputTransform() : doResize(false), scaleFactor(1) {}
+
+ OutputTransform(cv::Size inputSize, cv::Size outputResolution) :
+ doResize(true), scaleFactor(1), inputSize(inputSize), outputResolution(outputResolution) {}
+
+ cv::Size computeResolution() {
+ float inputWidth = static_cast<float>(inputSize.width);
+ float inputHeight = static_cast<float>(inputSize.height);
+ scaleFactor = std::min(outputResolution.height / inputHeight, outputResolution.width / inputWidth);
+ newResolution = cv::Size{static_cast<int>(inputWidth * scaleFactor), static_cast<int>(inputHeight * scaleFactor)};
+ return newResolution;
+ }
+
+ void resize(cv::Mat& image) {
+ if (!doResize) { return; }
+ cv::Size currSize = image.size();
+ if (currSize != inputSize) {
+ inputSize = currSize;
+ computeResolution();
+ }
+ if (scaleFactor == 1) { return; }
+ cv::resize(image, image, newResolution);
+ }
+
+ template<typename T>
+ void scaleCoord(T& coord) {
+ if (!doResize || scaleFactor == 1) { return; }
+ coord.x = std::floor(coord.x * scaleFactor);
+ coord.y = std::floor(coord.y * scaleFactor);
+ }
+
+ template<typename T>
+ void scaleRect(T& rect) {
+ if (!doResize || scaleFactor == 1) { return; }
+ scaleCoord(rect);
+ rect.width = std::floor(rect.width * scaleFactor);
+ rect.height = std::floor(rect.height * scaleFactor);
+ }
+
+ bool doResize;
+
+private:
+ float scaleFactor;
+ cv::Size inputSize;
+ cv::Size outputResolution;
+ cv::Size newResolution;
+};
+
+class InputTransform {
+public:
+ InputTransform() : reverseInputChannels(false), isTrivial(true) {}
+
+ InputTransform(bool reverseInputChannels, const std::string& meanValues, const std::string& scaleValues) :
+ reverseInputChannels(reverseInputChannels),
+ isTrivial(!reverseInputChannels && meanValues.empty() && scaleValues.empty()),
+ means(meanValues.empty() ? cv::Scalar(0.0, 0.0, 0.0) : string2Vec(meanValues)),
+ stdScales(scaleValues.empty() ? cv::Scalar(1.0, 1.0, 1.0) : string2Vec(scaleValues)) {
+ }
+
+ cv::Scalar string2Vec(const std::string& string) {
+ const auto& strValues = split(string, ' ');
+ std::vector<float> values;
+ try {
+ for (auto& str : strValues)
+ values.push_back(std::stof(str));
+ }
+ catch (const std::invalid_argument&) {
+ throw std::runtime_error("Invalid parameter --mean_values or --scale_values is provided.");
+ }
+ if (values.size() != 3) {
+ throw std::runtime_error("InputTransform expects 3 values per channel, but get \"" + string + "\".");
+ }
+ return cv::Scalar(values[0], values[1], values[2]);
+ }
+
+ void setPrecision(ov::preprocess::PrePostProcessor& ppp, const std::string& tensorName) {
+ const auto precision = isTrivial ? ov::element::u8 : ov::element::f32;
+ ppp.input(tensorName).tensor().
+ set_element_type(precision);
+ }
+
+ cv::Mat operator()(const cv::Mat& inputs) {
+ if (isTrivial) { return inputs; }
+ cv::Mat result;
+ inputs.convertTo(result, CV_32F);
+ if (reverseInputChannels) {
+ cv::cvtColor(result, result, cv::COLOR_BGR2RGB);
+ }
+ // TODO: merge the two following lines after OpenCV3 is droppped
+ result -= means;
+ result /= cv::Mat{stdScales};
+ return result;
+ }
+
+private:
+ bool reverseInputChannels;
+ bool isTrivial;
+ cv::Scalar means;
+ cv::Scalar stdScales;
+};
+
+class LazyVideoWriter {
+ cv::VideoWriter writer;
+ unsigned nwritten;
+public:
+ const std::string filenames;
+ const double fps;
+ const unsigned lim;
+
+ LazyVideoWriter(const std::string& filenames, double fps, unsigned lim) :
+ nwritten{1}, filenames{filenames}, fps{fps}, lim{lim} {}
+ void write(const cv::Mat& im) {
+ if (writer.isOpened() && (nwritten < lim || 0 == lim)) {
+ writer.write(im);
+ ++nwritten;
+ return;
+ }
+ if (!writer.isOpened() && !filenames.empty()) {
+ if (!writer.open(filenames, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), fps, im.size())) {
+ throw std::runtime_error("Can't open video writer");
+ }
+ writer.write(im);
+ }
+ }
+};