diff options
| author | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
|---|---|---|
| committer | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
| commit | ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch) | |
| tree | a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/runtime/common | |
| parent | 40da1752f2c8639186b72f6838aa415e854d0b1d (diff) | |
| download | thesis-master.tar.gz thesis-master.tar.bz2 thesis-master.zip | |
Diffstat (limited to 'python/openvino/runtime/common')
114 files changed, 13961 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/CMakeLists.txt b/python/openvino/runtime/common/CMakeLists.txt new file mode 100644 index 0000000..8ea3028 --- /dev/null +++ b/python/openvino/runtime/common/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (C) 2018-2020 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# Add dependencies for the following modules +find_package(OpenCV COMPONENTS core REQUIRED) + +# pull in plugin apis and preproc +add_subdirectory(utils) +add_subdirectory(format_reader) +add_subdirectory(monitors) + +if(DEFINED BUILD_DEMO) + # This dependency defines CNN prototypes used by text-detection demos. + include_directories("$ENV{COREDLA_ROOT}/transformations/inc/") + add_subdirectory(demo_utils) + add_subdirectory(models) + # This dependency is needed for runtime demos. The config_factory is used + # to produce hardware configurations and is required by pipelines. + #add_subdirectory(utils) + + # Following steps compile and link the pipelines library from OpenVINO 2021.4 installation folder. + # This dependency is required by segmentation demo. It implements a pipeline for sending streaming input and output for inference. + add_subdirectory(pipelines) +endif() diff --git a/python/openvino/runtime/common/README.md b/python/openvino/runtime/common/README.md new file mode 100644 index 0000000..1953fed --- /dev/null +++ b/python/openvino/runtime/common/README.md @@ -0,0 +1,7 @@ +## Patch Log + +This README documents the changes made to `runtime/common` so that they can be preserved and reapplied in future OpenVINO uplifts or updates. + +| Patch Name | PR Number | Description | +| ------------------------- | ------------------------- | ------------------------- | +| Make dla_benchmark less chatty | #3065 | Set the maximum number of printed warnings | diff --git a/python/openvino/runtime/common/demo_utils/CMakeLists.txt b/python/openvino/runtime/common/demo_utils/CMakeLists.txt new file mode 100644 index 0000000..b79d72a --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright (C) 2018-2020 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +file(GLOB_RECURSE HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/*") +file(GLOB_RECURSE SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*") + +source_group("src" FILES ${SOURCES}) +source_group("include" FILES ${HEADERS}) + +add_library(utils STATIC ${HEADERS} ${SOURCES}) +target_include_directories(utils PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" + "$ENV{COREDLA_ROOT}/dla_plugin/inc/") +target_link_libraries(utils PRIVATE openvino::runtime opencv_core opencv_imgcodecs opencv_videoio ie_samples_utils) diff --git a/python/openvino/runtime/common/demo_utils/include/utils/args_helper.hpp b/python/openvino/runtime/common/demo_utils/include/utils/args_helper.hpp new file mode 100644 index 0000000..7a638cc --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/args_helper.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with common samples functionality + * @file args_helper.hpp + */ + +#pragma once + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include <opencv2/core/types.hpp> +#include <openvino/openvino.hpp> + +/** +* @brief This function checks input args and existence of specified files in a given folder +* @param arg path to a file to be checked for existence +* @return files updated vector of verified input files +*/ +void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg); + +/** +* @brief This function finds -i/--i key in input args +* It's necessary to process multiple values for single key +* @return files updated vector of verified input files +*/ +void parseInputFilesArguments(std::vector<std::string>& files); + +std::vector<std::string> split(const std::string& s, char delim); + +std::vector<std::string> parseDevices(const std::string& device_string); + +std::map<std::string, int32_t> parseValuePerDevice(const std::set<std::string>& devices, + const std::string& values_string); + +cv::Size stringToSize(const std::string& str); + +std::map<std::string, ov::Layout> parseLayoutString(const std::string& layout_string); diff --git a/python/openvino/runtime/common/demo_utils/include/utils/common.hpp b/python/openvino/runtime/common/demo_utils/include/utils/common.hpp new file mode 100644 index 0000000..dbe7cf0 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/common.hpp @@ -0,0 +1,190 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with common samples functionality + * @file common.hpp + */ + +#pragma once + +#include <iostream> +#include <string> +#include <utility> +#include <vector> + +#include <openvino/openvino.hpp> +#include "utils/slog.hpp" +#include "utils/args_helper.hpp" + +#ifndef UNUSED +#ifdef _WIN32 +#define UNUSED +#else +#define UNUSED __attribute__((unused)) +#endif +#endif + +template <typename T, std::size_t N> +constexpr std::size_t arraySize(const T(&)[N]) noexcept { + return N; +} + +static inline void catcher() noexcept { + if (std::current_exception()) { + try { + std::rethrow_exception(std::current_exception()); + } catch (const std::exception& error) { + slog::err << error.what() << slog::endl; + } catch (...) { + slog::err << "Non-exception object thrown" << slog::endl; + } + std::exit(1); + } + std::abort(); +} + +template <typename T> +T clamp(T value, T low, T high) { + return value < low ? low : (value > high ? high : value); +} + +inline slog::LogStream& operator<<(slog::LogStream& os, const ov::Version& version) { + return os << "OpenVINO" << slog::endl + << "\tversion: " << OPENVINO_VERSION_MAJOR << "." << OPENVINO_VERSION_MINOR << "." << OPENVINO_VERSION_PATCH << slog::endl + << "\tbuild: " << version.buildNumber; +} + +/** + * @class Color + * @brief A Color class stores channels of a given color + */ +class Color { +private: + unsigned char _r; + unsigned char _g; + unsigned char _b; + +public: + /** + * A default constructor. + * @param r - value for red channel + * @param g - value for green channel + * @param b - value for blue channel + */ + Color(unsigned char r, + unsigned char g, + unsigned char b) : _r(r), _g(g), _b(b) {} + + inline unsigned char red() const { + return _r; + } + + inline unsigned char blue() const { + return _b; + } + + inline unsigned char green() const { + return _g; + } +}; + +// Known colors for training classes from the Cityscapes dataset +static UNUSED const Color CITYSCAPES_COLORS[] = { + { 128, 64, 128 }, + { 232, 35, 244 }, + { 70, 70, 70 }, + { 156, 102, 102 }, + { 153, 153, 190 }, + { 153, 153, 153 }, + { 30, 170, 250 }, + { 0, 220, 220 }, + { 35, 142, 107 }, + { 152, 251, 152 }, + { 180, 130, 70 }, + { 60, 20, 220 }, + { 0, 0, 255 }, + { 142, 0, 0 }, + { 70, 0, 0 }, + { 100, 60, 0 }, + { 90, 0, 0 }, + { 230, 0, 0 }, + { 32, 11, 119 }, + { 0, 74, 111 }, + { 81, 0, 81 } +}; + +inline void showAvailableDevices() { + ov::Core core; + std::vector<std::string> devices = core.get_available_devices(); + + std::cout << "Available devices:"; + for (const auto& device : devices) { + std::cout << ' ' << device; + } + std::cout << std::endl; +} + +inline std::string fileNameNoExt(const std::string& filepath) { + auto pos = filepath.rfind('.'); + if (pos == std::string::npos) return filepath; + return filepath.substr(0, pos); +} + +inline void logCompiledModelInfo( + const ov::CompiledModel& compiledModel, + const std::string& modelName, + const std::string& deviceName, + const std::string& modelType = "") { + slog::info << "The " << modelType << (modelType.empty() ? "" : " ") << "model " << modelName << " is loaded to " << deviceName << slog::endl; + std::set<std::string> devices; + for (const std::string& device : parseDevices(deviceName)) { + devices.insert(device); + } + + if (devices.find("AUTO") == devices.end()) { // do not print info for AUTO device + for (const auto& device : devices) { + try { + slog::info << "\tDevice: " << device << slog::endl; + int32_t nstreams = compiledModel.get_property(ov::streams::num); + slog::info << "\t\tNumber of streams: " << nstreams << slog::endl; + if (device == "CPU") { + int32_t nthreads = compiledModel.get_property(ov::inference_num_threads); + slog::info << "\t\tNumber of threads: " << (nthreads == 0 ? "AUTO" : std::to_string(nthreads)) << slog::endl; + } + } + catch (const ov::Exception&) {} + } + } +} + +inline void logBasicModelInfo(const std::shared_ptr<ov::Model>& model) { + slog::info << "Model name: " << model->get_friendly_name() << slog::endl; + + // Dump information about model inputs/outputs + ov::OutputVector inputs = model->inputs(); + ov::OutputVector outputs = model->outputs(); + + slog::info << "\tInputs: " << slog::endl; + for (const ov::Output<ov::Node>& input : inputs) { + const std::string name = input.get_any_name(); + const ov::element::Type type = input.get_element_type(); + const ov::PartialShape shape = input.get_partial_shape(); + const ov::Layout layout = ov::layout::get_layout(input); + + slog::info << "\t\t" << name << ", " << type << ", " << shape << ", " << layout.to_string() << slog::endl; + } + + slog::info << "\tOutputs: " << slog::endl; + for (const ov::Output<ov::Node>& output : outputs) { + const std::string name = output.get_any_name(); + const ov::element::Type type = output.get_element_type(); + const ov::PartialShape shape = output.get_partial_shape(); + const ov::Layout layout = ov::layout::get_layout(output); + + slog::info << "\t\t" << name << ", " << type << ", " << shape << ", " << layout.to_string() << slog::endl; + } + + return; +} diff --git a/python/openvino/runtime/common/demo_utils/include/utils/config_factory.h b/python/openvino/runtime/common/demo_utils/include/utils/config_factory.h new file mode 100644 index 0000000..c7440b5 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/config_factory.h @@ -0,0 +1,52 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stdint.h> + +#include <map> +#include <set> +#include <string> + +#include <openvino/openvino.hpp> + +struct ModelConfig { + std::string deviceName; + std::string cpuExtensionsPath; + std::string clKernelsConfigPath; + std::string fpgaArchPath; + unsigned int maxAsyncRequests; + ov::AnyMap compiledModelConfig; + + std::set<std::string> getDevices(); + std::map<std::string, std::string> getLegacyConfig(); + +protected: + std::set<std::string> devices; +}; + +class ConfigFactory { +public: + static ModelConfig getUserConfig(const std::string& flags_d, + uint32_t flags_nireq, + const std::string& flags_nstreams, + uint32_t flags_nthreads, + const std::string &flags_arch); + static ModelConfig getMinLatencyConfig(const std::string& flags_d, uint32_t flags_nireq); + +protected: + static ModelConfig getCommonConfig(const std::string& flags_d, uint32_t flags_nireq); +}; diff --git a/python/openvino/runtime/common/demo_utils/include/utils/default_flags.hpp b/python/openvino/runtime/common/demo_utils/include/utils/default_flags.hpp new file mode 100644 index 0000000..83c32c2 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/default_flags.hpp @@ -0,0 +1,21 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <gflags/gflags.h> + +#define DEFINE_INPUT_FLAGS \ +DEFINE_string(i, "", input_message); \ +DEFINE_bool(loop, false, loop_message); + +#define DEFINE_OUTPUT_FLAGS \ +DEFINE_string(o, "", output_message); \ +DEFINE_int32(limit, 1000, limit_message); + +static const char input_message[] = "Required. An input to process. The input must be a single image, a folder of " + "images, video file or camera id."; +static const char loop_message[] = "Optional. Enable reading the input in a loop."; +static const char output_message[] = "Optional. Name of the output file(s) to save."; +static const char limit_message[] = "Optional. Number of frames to store in output. If 0 is set, all frames are stored."; diff --git a/python/openvino/runtime/common/demo_utils/include/utils/grid_mat.hpp b/python/openvino/runtime/common/demo_utils/include/utils/grid_mat.hpp new file mode 100644 index 0000000..7d46d2b --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/grid_mat.hpp @@ -0,0 +1,127 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <algorithm> +#include <set> +#include <string> +#include <vector> + +#include <opencv2/core/core.hpp> + +class GridMat { +public: + cv::Mat outimg; + + explicit GridMat(const std::vector<cv::Size>& sizes, const cv::Size maxDisp = cv::Size{1920, 1080}) { + size_t maxWidth = 0; + size_t maxHeight = 0; + for (size_t i = 0; i < sizes.size(); i++) { + maxWidth = std::max(maxWidth, static_cast<size_t>(sizes[i].width)); + maxHeight = std::max(maxHeight, static_cast<size_t>(sizes[i].height)); + } + if (0 == maxWidth || 0 == maxHeight) { + throw std::invalid_argument("Input resolution must not be zero."); + } + + size_t nGridCols = static_cast<size_t>(ceil(sqrt(static_cast<float>(sizes.size())))); + size_t nGridRows = (sizes.size() - 1) / nGridCols + 1; + size_t gridMaxWidth = static_cast<size_t>(maxDisp.width/nGridCols); + size_t gridMaxHeight = static_cast<size_t>(maxDisp.height/nGridRows); + + float scaleWidth = static_cast<float>(gridMaxWidth) / maxWidth; + float scaleHeight = static_cast<float>(gridMaxHeight) / maxHeight; + float scaleFactor = std::min(1.f, std::min(scaleWidth, scaleHeight)); + + cellSize.width = static_cast<int>(maxWidth * scaleFactor); + cellSize.height = static_cast<int>(maxHeight * scaleFactor); + + for (size_t i = 0; i < sizes.size(); i++) { + cv::Point p; + p.x = cellSize.width * (i % nGridCols); + p.y = cellSize.height * (i / nGridCols); + points.push_back(p); + } + + outimg.create(cellSize.height * nGridRows, cellSize.width * nGridCols, CV_8UC3); + outimg.setTo(0); + clear(); + } + + cv::Size getCellSize() { + return cellSize; + } + + void fill(std::vector<cv::Mat>& frames) { + if (frames.size() > points.size()) { + throw std::logic_error("Cannot display " + std::to_string(frames.size()) + " channels in a grid with " + std::to_string(points.size()) + " cells"); + } + + for (size_t i = 0; i < frames.size(); i++) { + cv::Mat cell = outimg(cv::Rect(points[i].x, points[i].y, cellSize.width, cellSize.height)); + + if ((cellSize.width == frames[i].cols) && (cellSize.height == frames[i].rows)) { + frames[i].copyTo(cell); + } else if ((cellSize.width > frames[i].cols) && (cellSize.height > frames[i].rows)) { + frames[i].copyTo(cell(cv::Rect(0, 0, frames[i].cols, frames[i].rows))); + } else { + cv::resize(frames[i], cell, cellSize); + } + } + unupdatedSourceIDs.clear(); + } + + void update(const cv::Mat& frame, const size_t sourceID) { + const cv::Mat& cell = outimg(cv::Rect(points[sourceID], cellSize)); + + if ((cellSize.width == frame.cols) && (cellSize.height == frame.rows)) { + frame.copyTo(cell); + } else if ((cellSize.width > frame.cols) && (cellSize.height > frame.rows)) { + frame.copyTo(cell(cv::Rect(0, 0, frame.cols, frame.rows))); + } else { + cv::resize(frame, cell, cellSize); + } + unupdatedSourceIDs.erase(unupdatedSourceIDs.find(sourceID)); + } + + bool isFilled() const noexcept { + return unupdatedSourceIDs.empty(); + } + void clear() { + size_t counter = 0; + std::generate_n(std::inserter(unupdatedSourceIDs, unupdatedSourceIDs.end()), points.size(), [&counter]{return counter++;}); + } + std::set<size_t> getUnupdatedSourceIDs() const noexcept { + return unupdatedSourceIDs; + } + cv::Mat getMat() const noexcept { + return outimg; + } + +private: + cv::Size cellSize; + std::set<size_t> unupdatedSourceIDs; + std::vector<cv::Point> points; +}; + +void fillROIColor(cv::Mat& displayImage, cv::Rect roi, cv::Scalar color, double opacity) { + if (opacity > 0) { + roi = roi & cv::Rect(0, 0, displayImage.cols, displayImage.rows); + cv::Mat textROI = displayImage(roi); + cv::addWeighted(color, opacity, textROI, 1.0 - opacity , 0.0, textROI); + } +} + +void putTextOnImage(cv::Mat& displayImage, std::string str, cv::Point p, + cv::HersheyFonts font, double fontScale, cv::Scalar color, + int thickness = 1, cv::Scalar bgcolor = cv::Scalar(), + double opacity = 0) { + int baseline = 0; + cv::Size textSize = cv::getTextSize(str, font, 0.5, 1, &baseline); + fillROIColor(displayImage, cv::Rect(cv::Point(p.x, p.y + baseline), + cv::Point(p.x + textSize.width, p.y - textSize.height)), + bgcolor, opacity); + cv::putText(displayImage, str, p, font, fontScale, color, thickness); +} diff --git a/python/openvino/runtime/common/demo_utils/include/utils/image_utils.h b/python/openvino/runtime/common/demo_utils/include/utils/image_utils.h new file mode 100644 index 0000000..2731a9a --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/image_utils.h @@ -0,0 +1,29 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include <opencv2/opencv.hpp> + +enum RESIZE_MODE { + RESIZE_FILL, + RESIZE_KEEP_ASPECT, + RESIZE_KEEP_ASPECT_LETTERBOX +}; + +cv::Mat resizeImageExt(const cv::Mat& mat, int width, int height, RESIZE_MODE resizeMode = RESIZE_FILL, + cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR, cv::Rect* roi = nullptr, + cv::Scalar BorderConstant = cv::Scalar(0, 0, 0)); diff --git a/python/openvino/runtime/common/demo_utils/include/utils/images_capture.h b/python/openvino/runtime/common/demo_utils/include/utils/images_capture.h new file mode 100644 index 0000000..f2afdfc --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/images_capture.h @@ -0,0 +1,53 @@ +// Copyright (C) 2020-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include <stddef.h> + +#include <limits> +#include <memory> +#include <string> + +#include <opencv2/core.hpp> + +#include "utils/performance_metrics.hpp" + +enum class read_type { efficient, safe }; + +class ImagesCapture { +public: + const bool loop; + + ImagesCapture(bool loop) : loop{loop} {} + virtual double fps() const = 0; + virtual cv::Mat read() = 0; + virtual std::string getType() const = 0; + const PerformanceMetrics& getMetrics() { + return readerMetrics; + } + virtual ~ImagesCapture() = default; + +protected: + PerformanceMetrics readerMetrics; +}; + +// An advanced version of +// try { +// return cv::VideoCapture(std::stoi(input)); +// } catch (const std::invalid_argument&) { +// return cv::VideoCapture(input); +// } catch (const std::out_of_range&) { +// return cv::VideoCapture(input); +// } +// Some VideoCapture backends continue owning the video buffer under cv::Mat. safe_copy forses to return a copy from +// read() +// https://github.com/opencv/opencv/blob/46e1560678dba83d25d309d8fbce01c40f21b7be/modules/gapi/include/opencv2/gapi/streaming/cap.hpp#L72-L76 +std::unique_ptr<ImagesCapture> openImagesCapture( + const std::string& input, + bool loop, + read_type type = read_type::efficient, + size_t initialImageId = 0, + size_t readLengthLimit = std::numeric_limits<size_t>::max(), // General option + cv::Size cameraResolution = {1280, 720} + ); diff --git a/python/openvino/runtime/common/demo_utils/include/utils/input_wrappers.hpp b/python/openvino/runtime/common/demo_utils/include/utils/input_wrappers.hpp new file mode 100644 index 0000000..eff38a7 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/input_wrappers.hpp @@ -0,0 +1,149 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <list> +#include <memory> +#include <set> +#include <thread> +#include <vector> +#include <queue> + +#include <opencv2/opencv.hpp> + +class InputChannel; + +class IInputSource { +public: + virtual bool read(cv::Mat& mat, const std::shared_ptr<InputChannel>& caller) = 0; + virtual void addSubscriber(const std::weak_ptr<InputChannel>& inputChannel) = 0; + virtual cv::Size getSize() = 0; + virtual void lock() { + sourceLock.lock(); + } + virtual void unlock() { + sourceLock.unlock(); + } + virtual ~IInputSource() = default; +private: + std::mutex sourceLock; +}; + +class InputChannel: public std::enable_shared_from_this<InputChannel> { // note: public inheritance +public: + InputChannel(const InputChannel&) = delete; + InputChannel& operator=(const InputChannel&) = delete; + static std::shared_ptr<InputChannel> create(const std::shared_ptr<IInputSource>& source) { + auto tmp = std::shared_ptr<InputChannel>(new InputChannel(source)); + source->addSubscriber(tmp); + return tmp; + } + bool read(cv::Mat& mat) { + readQueueMutex.lock(); + if (readQueue.empty()) { + readQueueMutex.unlock(); + source->lock(); + readQueueMutex.lock(); + if (readQueue.empty()) { + bool res = source->read(mat, shared_from_this()); + readQueueMutex.unlock(); + source->unlock(); + return res; + } else { + source->unlock(); + } + } + mat = readQueue.front().clone(); + readQueue.pop(); + readQueueMutex.unlock(); + return true; + } + void push(const cv::Mat& mat) { + readQueueMutex.lock(); + readQueue.push(mat); + readQueueMutex.unlock(); + } + cv::Size getSize() { + return source->getSize(); + } + +private: + explicit InputChannel(const std::shared_ptr<IInputSource>& source): source{source} {} + std::shared_ptr<IInputSource> source; + std::queue<cv::Mat, std::list<cv::Mat>> readQueue; + std::mutex readQueueMutex; +}; + +class VideoCaptureSource: public IInputSource { +public: + VideoCaptureSource(const cv::VideoCapture& videoCapture, bool loop): videoCapture{videoCapture}, loop{loop}, + imSize{static_cast<int>(videoCapture.get(cv::CAP_PROP_FRAME_WIDTH)), static_cast<int>(videoCapture.get(cv::CAP_PROP_FRAME_HEIGHT))} {} + bool read(cv::Mat& mat, const std::shared_ptr<InputChannel>& caller) override { + if (!videoCapture.read(mat)) { + if (loop) { + videoCapture.set(cv::CAP_PROP_POS_FRAMES, 0); + videoCapture.read(mat); + } else { + return false; + } + } + if (1 != subscribedInputChannels.size()) { + cv::Mat shared = mat.clone(); + for (const std::weak_ptr<InputChannel>& weakInputChannel : subscribedInputChannels) { + try { + std::shared_ptr<InputChannel> sharedInputChannel = std::shared_ptr<InputChannel>(weakInputChannel); + if (caller != sharedInputChannel) { + sharedInputChannel->push(shared); + } + } catch (const std::bad_weak_ptr&) {} + } + } + return true; + } + void addSubscriber(const std::weak_ptr<InputChannel>& inputChannel) override { + subscribedInputChannels.push_back(inputChannel); + } + cv::Size getSize() override { + return imSize; + } + +private: + std::vector<std::weak_ptr<InputChannel>> subscribedInputChannels; + cv::VideoCapture videoCapture; + bool loop; + cv::Size imSize; +}; + +class ImageSource: public IInputSource { +public: + ImageSource(const cv::Mat& im, bool loop): im{im.clone()}, loop{loop} {} // clone to avoid image changing + bool read(cv::Mat& mat, const std::shared_ptr<InputChannel>& caller) override { + if (!loop) { + auto subscribedInputChannelsIt = subscribedInputChannels.find(caller); + if (subscribedInputChannels.end() == subscribedInputChannelsIt) { + return false; + } else { + subscribedInputChannels.erase(subscribedInputChannelsIt); + mat = im; + return true; + } + } else { + mat = im; + return true; + } + } + void addSubscriber(const std::weak_ptr<InputChannel>& inputChannel) override { + if (false == subscribedInputChannels.insert(inputChannel).second) + throw std::invalid_argument("The insertion did not take place"); + } + cv::Size getSize() override { + return im.size(); + } + +private: + std::set<std::weak_ptr<InputChannel>, std::owner_less<std::weak_ptr<InputChannel>>> subscribedInputChannels; + cv::Mat im; + bool loop; +}; diff --git a/python/openvino/runtime/common/demo_utils/include/utils/kuhn_munkres.hpp b/python/openvino/runtime/common/demo_utils/include/utils/kuhn_munkres.hpp new file mode 100644 index 0000000..6e6ac51 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/kuhn_munkres.hpp @@ -0,0 +1,57 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "opencv2/core.hpp" + +#include <memory> +#include <vector> + + +/// +/// \brief The KuhnMunkres class +/// +/// Solves the assignment problem. +/// +class KuhnMunkres { +public: + /// + /// \brief Initializes the class for assignment problem solving. + /// \param[in] greedy If a faster greedy matching algorithm should be used. + explicit KuhnMunkres(bool greedy = false); + + /// + /// \brief Solves the assignment problem for given dissimilarity matrix. + /// It returns a vector that where each element is a column index for + /// corresponding row (e.g. result[0] stores optimal column index for very + /// first row in the dissimilarity matrix). + /// \param dissimilarity_matrix CV_32F dissimilarity matrix. + /// \return Optimal column index for each row. -1 means that there is no + /// column for row. + /// + std::vector<size_t> Solve(const cv::Mat &dissimilarity_matrix); + +private: + static constexpr int kStar = 1; + static constexpr int kPrime = 2; + + cv::Mat dm_; + cv::Mat marked_; + std::vector<cv::Point> points_; + + std::vector<int> is_row_visited_; + std::vector<int> is_col_visited_; + + int n_; + bool greedy_; + + void TrySimpleCase(); + bool CheckIfOptimumIsFound(); + cv::Point FindUncoveredMinValPos(); + void UpdateDissimilarityMatrix(float val); + int FindInRow(int row, int what); + int FindInCol(int col, int what); + void Run(); +}; diff --git a/python/openvino/runtime/common/demo_utils/include/utils/nms.hpp b/python/openvino/runtime/common/demo_utils/include/utils/nms.hpp new file mode 100644 index 0000000..1fd475f --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/nms.hpp @@ -0,0 +1,81 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include "opencv2/core.hpp" +#include <numeric> +#include <vector> + +struct Anchor { + float left; + float top; + float right; + float bottom; + + float getWidth() const { + return (right - left) + 1.0f; + } + float getHeight() const { + return (bottom - top) + 1.0f; + } + float getXCenter() const { + return left + (getWidth() - 1.0f) / 2.0f; + } + float getYCenter() const { + return top + (getHeight() - 1.0f) / 2.0f; + } +}; + +template <typename Anchor> +std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>& scores, + const float thresh, bool includeBoundaries=false) { + std::vector<float> areas(boxes.size()); + for (size_t i = 0; i < boxes.size(); ++i) { + areas[i] = (boxes[i].right - boxes[i].left + includeBoundaries) * (boxes[i].bottom - boxes[i].top + includeBoundaries); + } + std::vector<int> order(scores.size()); + std::iota(order.begin(), order.end(), 0); + std::sort(order.begin(), order.end(), [&scores](int o1, int o2) { return scores[o1] > scores[o2]; }); + + size_t ordersNum = 0; + for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0; ordersNum++); + + std::vector<int> keep; + bool shouldContinue = true; + for (size_t i = 0; shouldContinue && i < ordersNum; ++i) { + auto idx1 = order[i]; + if (idx1 >= 0) { + keep.push_back(idx1); + shouldContinue = false; + for (size_t j = i + 1; j < ordersNum; ++j) { + auto idx2 = order[j]; + if (idx2 >= 0) { + shouldContinue = true; + auto overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - std::fmaxf(boxes[idx1].left, boxes[idx2].left); + auto overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - std::fmaxf(boxes[idx1].top, boxes[idx2].top); + auto intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0; + auto overlap = intersection / (areas[idx1] + areas[idx2] - intersection); + + if (overlap >= thresh) { + order[j] = -1; + } + } + } + } + } + return keep; +} diff --git a/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp b/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp new file mode 100644 index 0000000..ebb5e14 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp @@ -0,0 +1,289 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with common samples functionality using OpenCV + * @file ocv_common.hpp + */ + +#pragma once + +#include <opencv2/opencv.hpp> +#include <openvino/openvino.hpp> + +#include "utils/common.hpp" +#include "utils/shared_tensor_allocator.hpp" + +/** +* @brief Get cv::Mat value in the correct format. +*/ +template <typename T> +const T getMatValue(const cv::Mat& mat, size_t h, size_t w, size_t c) { + switch (mat.type()) { + case CV_8UC1: return (T)mat.at<uchar>(h, w); + case CV_8UC3: return (T)mat.at<cv::Vec3b>(h, w)[c]; + case CV_32FC1: return (T)mat.at<float>(h, w); + case CV_32FC3: return (T)mat.at<cv::Vec3f>(h, w)[c]; + } + throw std::runtime_error("cv::Mat type is not recognized"); +}; + +/** +* @brief Resize and copy image data from cv::Mat object to a given Tensor object. +* @param mat - given cv::Mat object with an image data. +* @param tensor - Tensor object which to be filled by an image data. +* @param batchIndex - batch index of an image inside of the blob. +*/ +static UNUSED void matToTensor(const cv::Mat& mat, const ov::Tensor& tensor, int batchIndex = 0) { + ov::Shape tensorShape = tensor.get_shape(); + static const ov::Layout layout("NCHW"); + const size_t width = tensorShape[ov::layout::width_idx(layout)]; + const size_t height = tensorShape[ov::layout::height_idx(layout)]; + const size_t channels = tensorShape[ov::layout::channels_idx(layout)]; + if (static_cast<size_t>(mat.channels()) != channels) { + throw std::runtime_error("The number of channels for model input and image must match"); + } + if (channels != 1 && channels != 3) { + throw std::runtime_error("Unsupported number of channels"); + } + int batchOffset = batchIndex * width * height * channels; + + cv::Mat resizedMat; + if (static_cast<int>(width) != mat.size().width || static_cast<int>(height) != mat.size().height) { + cv::resize(mat, resizedMat, cv::Size(width, height)); + } else { + resizedMat = mat; + } + + if (tensor.get_element_type() == ov::element::f32) { + float_t* tensorData = tensor.data<float_t>(); + for (size_t c = 0; c < channels; c++) + for (size_t h = 0; h < height; h++) + for (size_t w = 0; w < width; w++) + tensorData[batchOffset + c * width * height + h * width + w] = + getMatValue<float_t>(resizedMat, h, w, c); + } else { + uint8_t* tensorData = tensor.data<uint8_t>(); + if (resizedMat.depth() == CV_32F) { + throw std::runtime_error("Conversion of cv::Mat from float_t to uint8_t is forbidden"); + } + for (size_t c = 0; c < channels; c++) + for (size_t h = 0; h < height; h++) + for (size_t w = 0; w < width; w++) + tensorData[batchOffset + c * width * height + h * width + w] = + getMatValue<uint8_t>(resizedMat, h, w, c); + } +} + +static UNUSED ov::Tensor wrapMat2Tensor(const cv::Mat& mat) { + auto matType = mat.type() & CV_MAT_DEPTH_MASK; + if (matType != CV_8U && matType != CV_32F) { + throw std::runtime_error("Unsupported mat type for wrapping"); + } + bool isMatFloat = matType == CV_32F; + + const size_t channels = mat.channels(); + const size_t height = mat.rows; + const size_t width = mat.cols; + + const size_t strideH = mat.step.buf[0]; + const size_t strideW = mat.step.buf[1]; + + const bool isDense = !isMatFloat ? (strideW == channels && strideH == channels * width) : + (strideW == channels * sizeof(float) && strideH == channels * width * sizeof(float)); + if (!isDense) { + throw std::runtime_error("Doesn't support conversion from not dense cv::Mat"); + } + auto precision = isMatFloat ? ov::element::f32 : ov::element::u8; + auto allocator = std::make_shared<SharedTensorAllocator>(mat); + return ov::Tensor(precision, ov::Shape{ 1, height, width, channels }, ov::Allocator(allocator)); +} + +static inline void resize2tensor(const cv::Mat& mat, const ov::Tensor& tensor) { + static const ov::Layout layout{"NHWC"}; + const ov::Shape& shape = tensor.get_shape(); + cv::Size size{int(shape[ov::layout::width_idx(layout)]), int(shape[ov::layout::height_idx(layout)])}; + assert(tensor.get_element_type() == ov::element::u8); + assert(shape.size() == 4); + assert(shape[ov::layout::batch_idx(layout)] == 1); + assert(shape[ov::layout::channels_idx(layout)] == 3); + cv::resize(mat, cv::Mat{size, CV_8UC3, tensor.data()}, size); +} + +static inline ov::Layout getLayoutFromShape(const ov::Shape& shape) { + if (shape.size() == 2) { + return "NC"; + } + else if (shape.size() == 3) { + return (shape[0] >= 1 && shape[0] <= 4) ? "CHW" : + "HWC"; + } + else if (shape.size() == 4) { + return (shape[1] >= 1 && shape[1] <= 4) ? "NCHW" : + "NHWC"; + } + else { + throw std::runtime_error("Usupported " + std::to_string(shape.size()) + "D shape"); + } +} + +/** + * @brief Puts text message on the frame, highlights the text with a white border to make it distinguishable from + * the background. + * @param frame - frame to put the text on. + * @param message - text of the message. + * @param position - bottom-left corner of the text string in the image. + * @param fontFace - font type. + * @param fontScale - font scale factor that is multiplied by the font-specific base size. + * @param color - text color. + * @param thickness - thickness of the lines used to draw a text. + */ +inline void putHighlightedText(const cv::Mat& frame, + const std::string& message, + cv::Point position, + int fontFace, + double fontScale, + cv::Scalar color, + int thickness) { + cv::putText(frame, message, position, fontFace, fontScale, cv::Scalar(255, 255, 255), thickness + 1); + cv::putText(frame, message, position, fontFace, fontScale, color, thickness); +} + +// TODO: replace with Size::empty() after OpenCV3 is dropped +static inline bool isSizeEmpty(const cv::Size& size) { + return size.width <= 0 || size.height <= 0; +} + +// TODO: replace with Rect::empty() after OpenCV3 is dropped +static inline bool isRectEmpty(const cv::Rect& rect) { + return rect.width <= 0 || rect.height <= 0; +} + +class OutputTransform { +public: + OutputTransform() : doResize(false), scaleFactor(1) {} + + OutputTransform(cv::Size inputSize, cv::Size outputResolution) : + doResize(true), scaleFactor(1), inputSize(inputSize), outputResolution(outputResolution) {} + + cv::Size computeResolution() { + float inputWidth = static_cast<float>(inputSize.width); + float inputHeight = static_cast<float>(inputSize.height); + scaleFactor = std::min(outputResolution.height / inputHeight, outputResolution.width / inputWidth); + newResolution = cv::Size{static_cast<int>(inputWidth * scaleFactor), static_cast<int>(inputHeight * scaleFactor)}; + return newResolution; + } + + void resize(cv::Mat& image) { + if (!doResize) { return; } + cv::Size currSize = image.size(); + if (currSize != inputSize) { + inputSize = currSize; + computeResolution(); + } + if (scaleFactor == 1) { return; } + cv::resize(image, image, newResolution); + } + + template<typename T> + void scaleCoord(T& coord) { + if (!doResize || scaleFactor == 1) { return; } + coord.x = std::floor(coord.x * scaleFactor); + coord.y = std::floor(coord.y * scaleFactor); + } + + template<typename T> + void scaleRect(T& rect) { + if (!doResize || scaleFactor == 1) { return; } + scaleCoord(rect); + rect.width = std::floor(rect.width * scaleFactor); + rect.height = std::floor(rect.height * scaleFactor); + } + + bool doResize; + +private: + float scaleFactor; + cv::Size inputSize; + cv::Size outputResolution; + cv::Size newResolution; +}; + +class InputTransform { +public: + InputTransform() : reverseInputChannels(false), isTrivial(true) {} + + InputTransform(bool reverseInputChannels, const std::string& meanValues, const std::string& scaleValues) : + reverseInputChannels(reverseInputChannels), + isTrivial(!reverseInputChannels && meanValues.empty() && scaleValues.empty()), + means(meanValues.empty() ? cv::Scalar(0.0, 0.0, 0.0) : string2Vec(meanValues)), + stdScales(scaleValues.empty() ? cv::Scalar(1.0, 1.0, 1.0) : string2Vec(scaleValues)) { + } + + cv::Scalar string2Vec(const std::string& string) { + const auto& strValues = split(string, ' '); + std::vector<float> values; + try { + for (auto& str : strValues) + values.push_back(std::stof(str)); + } + catch (const std::invalid_argument&) { + throw std::runtime_error("Invalid parameter --mean_values or --scale_values is provided."); + } + if (values.size() != 3) { + throw std::runtime_error("InputTransform expects 3 values per channel, but get \"" + string + "\"."); + } + return cv::Scalar(values[0], values[1], values[2]); + } + + void setPrecision(ov::preprocess::PrePostProcessor& ppp, const std::string& tensorName) { + const auto precision = isTrivial ? ov::element::u8 : ov::element::f32; + ppp.input(tensorName).tensor(). + set_element_type(precision); + } + + cv::Mat operator()(const cv::Mat& inputs) { + if (isTrivial) { return inputs; } + cv::Mat result; + inputs.convertTo(result, CV_32F); + if (reverseInputChannels) { + cv::cvtColor(result, result, cv::COLOR_BGR2RGB); + } + // TODO: merge the two following lines after OpenCV3 is droppped + result -= means; + result /= cv::Mat{stdScales}; + return result; + } + +private: + bool reverseInputChannels; + bool isTrivial; + cv::Scalar means; + cv::Scalar stdScales; +}; + +class LazyVideoWriter { + cv::VideoWriter writer; + unsigned nwritten; +public: + const std::string filenames; + const double fps; + const unsigned lim; + + LazyVideoWriter(const std::string& filenames, double fps, unsigned lim) : + nwritten{1}, filenames{filenames}, fps{fps}, lim{lim} {} + void write(const cv::Mat& im) { + if (writer.isOpened() && (nwritten < lim || 0 == lim)) { + writer.write(im); + ++nwritten; + return; + } + if (!writer.isOpened() && !filenames.empty()) { + if (!writer.open(filenames, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), fps, im.size())) { + throw std::runtime_error("Can't open video writer"); + } + writer.write(im); + } + } +}; diff --git a/python/openvino/runtime/common/demo_utils/include/utils/performance_metrics.hpp b/python/openvino/runtime/common/demo_utils/include/utils/performance_metrics.hpp new file mode 100644 index 0000000..6c728b0 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/performance_metrics.hpp @@ -0,0 +1,92 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file for performance metrics calculation class + * @file performance_metrics.hpp + */ + +#pragma once + +#include <chrono> +#include <iomanip> +#include <iostream> +#include <sstream> + +#include "utils/ocv_common.hpp" + +class PerformanceMetrics { +public: + using Clock = std::chrono::steady_clock; + using TimePoint = std::chrono::time_point<Clock>; + using Duration = Clock::duration; + using Ms = std::chrono::duration<double, std::ratio<1, 1000>>; + using Sec = std::chrono::duration<double, std::ratio<1, 1>>; + + struct Metrics { + double latency; + double fps; + }; + + enum MetricTypes { + ALL, + FPS, + LATENCY + }; + + PerformanceMetrics(Duration timeWindow = std::chrono::seconds(1)); + void update(TimePoint lastRequestStartTime, + const cv::Mat& frame, + cv::Point position = {15, 30}, + int fontFace = cv::FONT_HERSHEY_COMPLEX, + double fontScale = 0.75, + cv::Scalar color = {200, 10, 10}, + int thickness = 2, MetricTypes metricType = ALL); + void update(TimePoint lastRequestStartTime); + + /// Paints metrics over provided mat + /// @param frame frame to paint over + /// @param position left top corner of text block + /// @param fontScale font scale + /// @param color font color + /// @param thickness font thickness + void paintMetrics(const cv::Mat& frame, + cv::Point position = { 15, 30 }, + int fontFace = cv::FONT_HERSHEY_COMPLEX, + double fontScale = 0.75, + cv::Scalar color = { 200, 10, 10 }, + int thickness = 2, MetricTypes metricType = ALL) const; + + Metrics getLast() const; + Metrics getTotal() const; + void logTotal() const; + +private: + struct Statistic { + Duration latency; + Duration period; + int frameCount; + + Statistic() { + latency = Duration::zero(); + period = Duration::zero(); + frameCount = 0; + } + + void combine(const Statistic& other) { + latency += other.latency; + period += other.period; + frameCount += other.frameCount; + } + }; + + Duration timeWindowSize; + Statistic lastMovingStatistic; + Statistic currentMovingStatistic; + Statistic totalStatistic; + TimePoint lastUpdateTime; + bool firstFrameProcessed; +}; + +void logLatencyPerStage(double readLat, double preprocLat, double inferLat, double postprocLat, double renderLat); diff --git a/python/openvino/runtime/common/demo_utils/include/utils/shared_tensor_allocator.hpp b/python/openvino/runtime/common/demo_utils/include/utils/shared_tensor_allocator.hpp new file mode 100644 index 0000000..f74e8d0 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/shared_tensor_allocator.hpp @@ -0,0 +1,47 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include <opencv2/core.hpp> +#include <openvino/runtime/allocator.hpp> + +// To prevent false-positive clang compiler warning +// (https://github.com/openvinotoolkit/openvino/pull/11092#issuecomment-1073846256): +// warning: destructor called on non-final 'SharedTensorAllocator' that has virtual functions +// but non-virtual destructor [-Wdelete-non-abstract-non-virtual-dtor] +// SharedTensorAllocator class declared as final + +class SharedTensorAllocator final : public ov::AllocatorImpl { +public: + SharedTensorAllocator(const cv::Mat& img) : img(img) {} + + ~SharedTensorAllocator() = default; + + void* allocate(const size_t bytes, const size_t) override { + return bytes <= img.rows * img.step[0] ? img.data : nullptr; + } + + void deallocate(void* handle, const size_t bytes, const size_t) override {} + + bool is_equal(const AllocatorImpl& other) const override { + auto other_tensor_allocator = dynamic_cast<const SharedTensorAllocator*>(&other); + return other_tensor_allocator != nullptr && other_tensor_allocator == this; + } + +private: + const cv::Mat img; +}; diff --git a/python/openvino/runtime/common/demo_utils/include/utils/slog.hpp b/python/openvino/runtime/common/demo_utils/include/utils/slog.hpp new file mode 100644 index 0000000..316b98d --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/slog.hpp @@ -0,0 +1,99 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with logging facility for common samples + * @file log.hpp + */ + +#pragma once + +#include <iostream> +#include <string> + +namespace slog { + +/** + * @class LogStreamEndLine + * @brief The LogStreamEndLine class implements an end line marker for a log stream + */ +class LogStreamEndLine { }; + +static constexpr LogStreamEndLine endl; + + +/** + * @class LogStreamBoolAlpha + * @brief The LogStreamBoolAlpha class implements bool printing for a log stream + */ +class LogStreamBoolAlpha { }; + +static constexpr LogStreamBoolAlpha boolalpha; + + +/** + * @class LogStream + * @brief The LogStream class implements a stream for sample logging + */ +class LogStream { + std::string _prefix; + std::ostream* _log_stream; + bool _new_line; + +public: + /** + * @brief A constructor. Creates a LogStream object + * @param prefix The prefix to print + */ + LogStream(const std::string &prefix, std::ostream& log_stream) + : _prefix(prefix), _new_line(true) { + _log_stream = &log_stream; + } + + /** + * @brief A stream output operator to be used within the logger + * @param arg Object for serialization in the logger message + */ + template<class T> + LogStream &operator<<(const T &arg) { + if (_new_line) { + (*_log_stream) << "[ " << _prefix << " ] "; + _new_line = false; + } + + (*_log_stream) << arg; + return *this; + } + + // Specializing for LogStreamEndLine to support slog::endl + LogStream& operator<< (const LogStreamEndLine &/*arg*/) { + _new_line = true; + + (*_log_stream) << std::endl; + return *this; + } + + // Specializing for LogStreamBoolAlpha to support slog::boolalpha + LogStream& operator<< (const LogStreamBoolAlpha &/*arg*/) { + (*_log_stream) << std::boolalpha; + return *this; + } + + // Specializing for std::vector and std::list + template<template<class, class> class Container, class T> + LogStream& operator<< (const Container<T, std::allocator<T>>& container) { + for (const auto& el : container) { + *this << el << slog::endl; + } + return *this; + } +}; + + +static LogStream info("INFO", std::cout); +static LogStream debug("DEBUG", std::cout); +static LogStream warn("WARNING", std::cout); +static LogStream err("ERROR", std::cerr); + +} // namespace slog diff --git a/python/openvino/runtime/common/demo_utils/include/utils/threads_common.hpp b/python/openvino/runtime/common/demo_utils/include/utils/threads_common.hpp new file mode 100644 index 0000000..f0e5cbf --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/include/utils/threads_common.hpp @@ -0,0 +1,165 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <algorithm> +#include <atomic> +#include <condition_variable> +#include <memory> +#include <mutex> +#include <utility> +#include <set> +#include <string> +#include <thread> +#include <vector> + +#include <opencv2/core/core.hpp> +#include "utils/performance_metrics.hpp" + +// VideoFrame can represent not a single image but the whole grid +class VideoFrame { +public: + typedef std::shared_ptr<VideoFrame> Ptr; + + VideoFrame(unsigned sourceID, int64_t frameId, const cv::Mat& frame = cv::Mat()) : + sourceID{sourceID}, frameId{frameId}, frame{frame} {} + virtual ~VideoFrame() = default; // A user has to define how it is reconstructed + + const unsigned sourceID; + const int64_t frameId; + cv::Mat frame; + + PerformanceMetrics::TimePoint timestamp; +}; + +class Worker; + +class Task { +public: + explicit Task(VideoFrame::Ptr sharedVideoFrame, float priority = 0): + sharedVideoFrame{sharedVideoFrame}, priority{priority} {} + virtual bool isReady() = 0; + virtual void process() = 0; + virtual ~Task() = default; + + std::string name; + VideoFrame::Ptr sharedVideoFrame; // it is possible that two tasks try to draw on the same cvMat + const float priority; +}; + +struct HigherPriority { + bool operator()(const std::shared_ptr<Task>& lhs, const std::shared_ptr<Task>& rhs) const { + return lhs->priority > rhs->priority + || (lhs->priority == rhs->priority && lhs->sharedVideoFrame->frameId < rhs->sharedVideoFrame->frameId) + || (lhs->priority == rhs->priority && lhs->sharedVideoFrame->frameId == rhs->sharedVideoFrame->frameId && lhs < rhs); + } +}; + +class Worker { +public: + explicit Worker(unsigned threadNum): + threadPool(threadNum), running{false} {} + ~Worker() { + stop(); + } + void runThreads() { + running = true; + for (std::thread& t : threadPool) { + t = std::thread(&Worker::threadFunc, this); + } + } + void push(std::shared_ptr<Task> task) { + tasksMutex.lock(); + tasks.insert(task); + tasksMutex.unlock(); + tasksCondVar.notify_one(); + } + void threadFunc() { + while (running) { + std::unique_lock<std::mutex> lk(tasksMutex); + while (running && tasks.empty()) { + tasksCondVar.wait(lk); + } + try { + auto it = std::find_if(tasks.begin(), tasks.end(), [](const std::shared_ptr<Task>& task){return task->isReady();}); + if (tasks.end() != it) { + const std::shared_ptr<Task> task = std::move(*it); + tasks.erase(it); + lk.unlock(); + task->process(); + } + } catch (...) { + std::lock_guard<std::mutex> lock{exceptionMutex}; + if (nullptr == currentException) { + currentException = std::current_exception(); + stop(); + } + } + } + } + void stop() { + running = false; + tasksCondVar.notify_all(); + } + void join() { + for (auto& t : threadPool) { + t.join(); + } + if (nullptr != currentException) { + std::rethrow_exception(currentException); + } + } + +private: + std::condition_variable tasksCondVar; + std::set<std::shared_ptr<Task>, HigherPriority> tasks; + std::mutex tasksMutex; + std::vector<std::thread> threadPool; + std::atomic<bool> running; + std::exception_ptr currentException; + std::mutex exceptionMutex; +}; + +void tryPush(const std::weak_ptr<Worker>& worker, std::shared_ptr<Task>&& task) { + try { + std::shared_ptr<Worker>(worker)->push(task); + } catch (const std::bad_weak_ptr&) {} +} + +template <class C> class ConcurrentContainer { +public: + C container; + mutable std::mutex mutex; + + bool lockedEmpty() const noexcept { + std::lock_guard<std::mutex> lock{mutex}; + return container.empty(); + } + typename C::size_type lockedSize() const noexcept { + std::lock_guard<std::mutex> lock{mutex}; + return container.size(); + } + void lockedPushBack(const typename C::value_type& value) { + std::lock_guard<std::mutex> lock{mutex}; + container.push_back(value); + } + bool lockedTryPop(typename C::value_type& value) { + mutex.lock(); + if (!container.empty()) { + value = container.back(); + container.pop_back(); + mutex.unlock(); + return true; + } else { + mutex.unlock(); + return false; + } + } + + operator C() const { + std::lock_guard<std::mutex> lock{mutex}; + return container; + } +}; diff --git a/python/openvino/runtime/common/demo_utils/src/args_helper.cpp b/python/openvino/runtime/common/demo_utils/src/args_helper.cpp new file mode 100644 index 0000000..8f4bc35 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/src/args_helper.cpp @@ -0,0 +1,155 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "utils/args_helper.hpp" +#include "utils/slog.hpp" + +#ifdef _WIN32 +#include "w_dirent.hpp" +#else +#include <dirent.h> +#endif + +#include <gflags/gflags.h> + +#include <sys/stat.h> +#include <map> + +#include <algorithm> +#include <cctype> +#include <sstream> + +void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg) { + struct stat sb; + if (stat(arg.c_str(), &sb) != 0) { + if (arg.compare(0, 5, "rtsp:") != 0) { + slog::warn << "File " << arg << " cannot be opened!" << slog::endl; + return; + } + } + if (S_ISDIR(sb.st_mode)) { + DIR *dp; + dp = opendir(arg.c_str()); + if (dp == nullptr) { + slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl; + return; + } + + struct dirent *ep; + while (nullptr != (ep = readdir(dp))) { + std::string fileName = ep->d_name; + if (fileName == "." || fileName == "..") continue; + files.push_back(arg + "/" + ep->d_name); + } + closedir(dp); + } else { + files.push_back(arg); + } +} + +void parseInputFilesArguments(std::vector<std::string>& files) { + std::vector<std::string> args = gflags::GetArgvs(); + bool readArguments = false; + for (size_t i = 0; i < args.size(); i++) { + if (args.at(i) == "-i" || args.at(i) == "--i") { + readArguments = true; + continue; + } + if (!readArguments) { + continue; + } + if (args.at(i).c_str()[0] == '-') { + break; + } + readInputFilesArguments(files, args.at(i)); + } +} + +std::vector<std::string> split(const std::string& s, char delim) { + std::vector<std::string> result; + std::stringstream ss(s); + std::string item; + + while (getline(ss, item, delim)) { + result.push_back(item); + } + return result; +} + +std::vector<std::string> parseDevices(const std::string& device_string) { + const std::string::size_type colon_position = device_string.find(":"); + if (colon_position != std::string::npos) { + std::string device_type = device_string.substr(0, colon_position); + if (device_type == "HETERO" || device_type == "MULTI") { + std::string comma_separated_devices = device_string.substr(colon_position + 1); + std::vector<std::string> devices = split(comma_separated_devices, ','); + for (auto& device : devices) + device = device.substr(0, device.find("(")); + return devices; + } + } + return {device_string}; +} + +// Format: <device1>:<value1>,<device2>:<value2> or just <value> +std::map<std::string, int32_t> parseValuePerDevice(const std::set<std::string>& devices, + const std::string& values_string) { + auto values_string_upper = values_string; + std::transform(values_string_upper.begin(), + values_string_upper.end(), + values_string_upper.begin(), + [](unsigned char c){ return std::toupper(c); }); + std::map<std::string, int32_t> result; + auto device_value_strings = split(values_string_upper, ','); + for (auto& device_value_string : device_value_strings) { + auto device_value_vec = split(device_value_string, ':'); + if (device_value_vec.size() == 2) { + auto it = std::find(devices.begin(), devices.end(), device_value_vec.at(0)); + if (it != devices.end()) { + result[device_value_vec.at(0)] = std::stoi(device_value_vec.at(1)); + } + } else if (device_value_vec.size() == 1) { + uint32_t value = std::stoi(device_value_vec.at(0)); + for (const auto& device : devices) { + result[device] = value; + } + } else if (device_value_vec.size() != 0) { + throw std::runtime_error("Unknown string format: " + values_string); + } + } + return result; +} + +cv::Size stringToSize(const std::string& str) { + std::vector<std::string> strings = split(str, 'x'); + if (strings.size() != 2) { + throw std::invalid_argument("Can't convert std::string to cv::Size. The string must contain exactly one x"); + } + return {std::stoi(strings[0]), std::stoi(strings[1])}; +} + +std::map<std::string, ov::Layout> parseLayoutString(const std::string& layout_string) { + // Parse parameter string like "input0:NCHW,input1:NC" or "NCHW" (applied to all + // inputs) + std::map<std::string, ov::Layout> layouts; + std::string searchStr = (layout_string.find_last_of(':') == std::string::npos && !layout_string.empty() ? + ":" : "") + layout_string; + auto colonPos = searchStr.find_last_of(':'); + while (colonPos != std::string::npos) { + auto startPos = searchStr.find_last_of(','); + auto inputName = searchStr.substr(startPos + 1, colonPos - startPos - 1); + auto inputLayout = searchStr.substr(colonPos + 1); + layouts[inputName] = ov::Layout(inputLayout); + searchStr = searchStr.substr(0, startPos + 1); + if (searchStr.empty() || searchStr.back() != ',') { + break; + } + searchStr.pop_back(); + colonPos = searchStr.find_last_of(':'); + } + if (!searchStr.empty()) { + throw std::invalid_argument("Can't parse input layout string: " + layout_string); + } + return layouts; +} diff --git a/python/openvino/runtime/common/demo_utils/src/config_factory.cpp b/python/openvino/runtime/common/demo_utils/src/config_factory.cpp new file mode 100644 index 0000000..2e9a442 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/src/config_factory.cpp @@ -0,0 +1,111 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "utils/config_factory.h" + +#include <set> +#include <string> +#include <utility> +#include <vector> + +#include <openvino/runtime/intel_gpu/properties.hpp> +#include "dla_plugin_config.hpp" +#include "utils/args_helper.hpp" +#include <sys/stat.h> + +std::set<std::string> ModelConfig::getDevices() { + if (devices.empty()) { + for (const std::string& device : parseDevices(deviceName)) { + devices.insert(device); + } + } + + return devices; +} + +ModelConfig ConfigFactory::getUserConfig(const std::string& flags_d, + uint32_t flags_nireq, + const std::string& flags_nstreams, + uint32_t flags_nthreads, + const std::string &flags_arch) { + auto config = getCommonConfig(flags_d, flags_nireq); + + std::map<std::string, int> deviceNstreams = parseValuePerDevice(config.getDevices(), flags_nstreams); + for (const auto& device : config.getDevices()) { + if (flags_arch != "" && device == "FPGA") { + struct stat buffer; + if (stat(flags_arch.c_str(), &buffer) != 0) { + std::cout << "Error: architecture file: " << flags_arch << " doesn't exist. Please provide a valid path." << std::endl; + throw std::logic_error("architecture file path does not exist."); + } + config.compiledModelConfig.emplace(DLIAPlugin::properties::arch_path.name(), flags_arch); + } else if (device == "CPU") { // CPU supports a few special performance-oriented keys + // limit threading for CPU portion of inference + if (flags_nthreads != 0) + config.compiledModelConfig.emplace(ov::inference_num_threads.name(), flags_nthreads); + + config.compiledModelConfig.emplace(ov::affinity.name(), ov::Affinity::NONE); + + ov::streams::Num nstreams = + deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO; + config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams); + } else if (device == "GPU") { + ov::streams::Num nstreams = + deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO; + config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams); + if (flags_d.find("MULTI") != std::string::npos && + config.getDevices().find("CPU") != config.getDevices().end()) { + // multi-device execution with the CPU + GPU performs best with GPU throttling hint, + // which releases another CPU thread (that is otherwise used by the GPU driver for active polling) + config.compiledModelConfig.emplace(ov::intel_gpu::hint::queue_throttle.name(), + ov::intel_gpu::hint::ThrottleLevel(1)); + } + } + } + return config; +} + +ModelConfig ConfigFactory::getMinLatencyConfig(const std::string& flags_d, uint32_t flags_nireq) { + auto config = getCommonConfig(flags_d, flags_nireq); + for (const auto& device : config.getDevices()) { + if (device == "CPU") { // CPU supports a few special performance-oriented keys + config.compiledModelConfig.emplace(ov::streams::num.name(), 1); + } else if (device == "GPU") { + config.compiledModelConfig.emplace(ov::streams::num.name(), 1); + } + } + return config; +} + +ModelConfig ConfigFactory::getCommonConfig(const std::string& flags_d, uint32_t flags_nireq) { + ModelConfig config; + + if (!flags_d.empty()) { + config.deviceName = flags_d; + } + + config.maxAsyncRequests = flags_nireq; + + return config; +} + +std::map<std::string, std::string> ModelConfig::getLegacyConfig() { + std::map<std::string, std::string> config; + for (const auto& item : compiledModelConfig) { + config[item.first] = item.second.as<std::string>(); + } + return config; +} diff --git a/python/openvino/runtime/common/demo_utils/src/image_utils.cpp b/python/openvino/runtime/common/demo_utils/src/image_utils.cpp new file mode 100644 index 0000000..039dd66 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/src/image_utils.cpp @@ -0,0 +1,55 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "utils/image_utils.h" + +cv::Mat resizeImageExt(const cv::Mat& mat, int width, int height, RESIZE_MODE resizeMode, + cv::InterpolationFlags interpolationMode, cv::Rect* roi, cv::Scalar BorderConstant) { + if (width == mat.cols && height == mat.rows) { + return mat; + } + + cv::Mat dst; + + switch (resizeMode) { + case RESIZE_FILL: + { + cv::resize(mat, dst, cv::Size(width, height), interpolationMode); + if (roi) { + *roi = cv::Rect(0, 0, width, height); + } + break; + } + case RESIZE_KEEP_ASPECT: + case RESIZE_KEEP_ASPECT_LETTERBOX: + { + double scale = std::min(static_cast<double>(width) / mat.cols, static_cast<double>(height) / mat.rows); + cv::Mat resizedImage; + cv::resize(mat, resizedImage, cv::Size(0, 0), scale, scale, interpolationMode); + + int dx = resizeMode == RESIZE_KEEP_ASPECT ? 0 : (width - resizedImage.cols) / 2; + int dy = resizeMode == RESIZE_KEEP_ASPECT ? 0 : (height - resizedImage.rows) / 2; + + cv::copyMakeBorder(resizedImage, dst, dy, height - resizedImage.rows - dy, + dx, width - resizedImage.cols - dx, cv::BORDER_CONSTANT, BorderConstant); + if (roi) { + *roi = cv::Rect(dx, dy, resizedImage.cols, resizedImage.rows); + } + break; + } + } + return dst; +} diff --git a/python/openvino/runtime/common/demo_utils/src/images_capture.cpp b/python/openvino/runtime/common/demo_utils/src/images_capture.cpp new file mode 100644 index 0000000..febcdd7 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/src/images_capture.cpp @@ -0,0 +1,327 @@ +// Copyright (C) 2020-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "utils/images_capture.h" + +#include <string.h> + +#ifdef _WIN32 +# include "w_dirent.hpp" +#else +# include <dirent.h> // for closedir, dirent, opendir, readdir, DIR +#endif + +#include <algorithm> +#include <chrono> +#include <fstream> +#include <memory> +#include <stdexcept> +#include <string> +#include <vector> + +#include <opencv2/imgcodecs.hpp> +#include <opencv2/videoio.hpp> + +class InvalidInput : public std::runtime_error { +public: + explicit InvalidInput(const std::string& message) noexcept : std::runtime_error(message) {} +}; + +class OpenError : public std::runtime_error { +public: + explicit OpenError(const std::string& message) noexcept : std::runtime_error(message) {} +}; + +class ImreadWrapper : public ImagesCapture { + cv::Mat img; + bool canRead; + +public: + ImreadWrapper(const std::string& input, bool loop) : ImagesCapture{loop}, canRead{true} { + auto startTime = std::chrono::steady_clock::now(); + + std::ifstream file(input.c_str()); + if (!file.good()) + throw InvalidInput("Can't find the image by " + input); + + img = cv::imread(input); + if (!img.data) + throw OpenError("Can't open the image from " + input); + else + readerMetrics.update(startTime); + } + + double fps() const override { + return 1.0; + } + + std::string getType() const override { + return "IMAGE"; + } + + cv::Mat read() override { + if (loop) + return img.clone(); + if (canRead) { + canRead = false; + return img.clone(); + } + return cv::Mat{}; + } +}; + +class DirReader : public ImagesCapture { + std::vector<std::string> names; + size_t fileId; + size_t nextImgId; + const size_t initialImageId; + const size_t readLengthLimit; + const std::string input; + +public: + DirReader(const std::string& input, bool loop, size_t initialImageId, size_t readLengthLimit) + : ImagesCapture{loop}, + fileId{0}, + nextImgId{0}, + initialImageId{initialImageId}, + readLengthLimit{readLengthLimit}, + input{input} { + DIR* dir = opendir(input.c_str()); + if (!dir) + throw InvalidInput("Can't find the dir by " + input); + while (struct dirent* ent = readdir(dir)) + if (strcmp(ent->d_name, ".") && strcmp(ent->d_name, "..")) + names.emplace_back(ent->d_name); + closedir(dir); + if (names.empty()) + throw OpenError("The dir " + input + " is empty"); + sort(names.begin(), names.end()); + size_t readImgs = 0; + while (fileId < names.size()) { + cv::Mat img = cv::imread(input + '/' + names[fileId]); + if (img.data) { + ++readImgs; + if (readImgs - 1 >= initialImageId) + return; + } + ++fileId; + } + throw OpenError("Can't read the first image from " + input); + } + + double fps() const override { + return 1.0; + } + + std::string getType() const override { + return "DIR"; + } + + cv::Mat read() override { + auto startTime = std::chrono::steady_clock::now(); + + while (fileId < names.size() && nextImgId < readLengthLimit) { + cv::Mat img = cv::imread(input + '/' + names[fileId]); + ++fileId; + if (img.data) { + ++nextImgId; + readerMetrics.update(startTime); + return img; + } + } + + if (loop) { + fileId = 0; + size_t readImgs = 0; + while (fileId < names.size()) { + cv::Mat img = cv::imread(input + '/' + names[fileId]); + ++fileId; + if (img.data) { + ++readImgs; + if (readImgs - 1 >= initialImageId) { + nextImgId = 1; + readerMetrics.update(startTime); + return img; + } + } + } + } + return cv::Mat{}; + } +}; + +class VideoCapWrapper : public ImagesCapture { + cv::VideoCapture cap; + bool first_read; + const read_type type; + size_t nextImgId; + const double initialImageId; + size_t readLengthLimit; + +public: + VideoCapWrapper(const std::string& input, bool loop, read_type type, size_t initialImageId, size_t readLengthLimit) + : ImagesCapture{loop}, + first_read{true}, + type{type}, + nextImgId{0}, + initialImageId{static_cast<double>(initialImageId)} { + if (0 == readLengthLimit) { + throw std::runtime_error("readLengthLimit must be positive"); + } + if (cap.open(input)) { + this->readLengthLimit = readLengthLimit; + if (!cap.set(cv::CAP_PROP_POS_FRAMES, this->initialImageId)) + throw OpenError("Can't set the frame to begin with"); + return; + } + throw InvalidInput("Can't open the video from " + input); + } + + double fps() const override { + return cap.get(cv::CAP_PROP_FPS); + } + + std::string getType() const override { + return "VIDEO"; + } + + cv::Mat read() override { + auto startTime = std::chrono::steady_clock::now(); + + if (nextImgId >= readLengthLimit) { + if (loop && cap.set(cv::CAP_PROP_POS_FRAMES, initialImageId)) { + nextImgId = 1; + cv::Mat img; + cap.read(img); + if (type == read_type::safe) { + img = img.clone(); + } + readerMetrics.update(startTime); + return img; + } + return cv::Mat{}; + } + cv::Mat img; + bool success = cap.read(img); + if (!success && first_read) { + throw std::runtime_error("The first image can't be read"); + } + first_read = false; + if (!success && loop && cap.set(cv::CAP_PROP_POS_FRAMES, initialImageId)) { + nextImgId = 1; + cap.read(img); + } else { + ++nextImgId; + } + if (type == read_type::safe) { + img = img.clone(); + } + readerMetrics.update(startTime); + return img; + } +}; + +class CameraCapWrapper : public ImagesCapture { + cv::VideoCapture cap; + const read_type type; + size_t nextImgId; + size_t readLengthLimit; + +public: + CameraCapWrapper(const std::string& input, + bool loop, + read_type type, + size_t readLengthLimit, + cv::Size cameraResolution) + : ImagesCapture{loop}, + type{type}, + nextImgId{0} { + if (0 == readLengthLimit) { + throw std::runtime_error("readLengthLimit must be positive"); + } + try { + if (cap.open(std::stoi(input))) { + this->readLengthLimit = loop ? std::numeric_limits<size_t>::max() : readLengthLimit; + cap.set(cv::CAP_PROP_BUFFERSIZE, 1); + cap.set(cv::CAP_PROP_FRAME_WIDTH, cameraResolution.width); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, cameraResolution.height); + cap.set(cv::CAP_PROP_AUTOFOCUS, true); + cap.set(cv::CAP_PROP_FOURCC, cv::VideoWriter::fourcc('M', 'J', 'P', 'G')); + return; + } + throw OpenError("Can't open the camera from " + input); + } catch (const std::invalid_argument&) { + throw InvalidInput("Can't find the camera " + input); + } catch (const std::out_of_range&) { throw InvalidInput("Can't find the camera " + input); } + } + + double fps() const override { + return cap.get(cv::CAP_PROP_FPS) > 0 ? cap.get(cv::CAP_PROP_FPS) : 30; + } + + std::string getType() const override { + return "CAMERA"; + } + + cv::Mat read() override { + auto startTime = std::chrono::steady_clock::now(); + + if (nextImgId >= readLengthLimit) { + return cv::Mat{}; + } + cv::Mat img; + if (!cap.read(img)) { + throw std::runtime_error("The image can't be captured from the camera"); + } + if (type == read_type::safe) { + img = img.clone(); + } + ++nextImgId; + + readerMetrics.update(startTime); + return img; + } +}; + +std::unique_ptr<ImagesCapture> openImagesCapture(const std::string& input, + bool loop, + read_type type, + size_t initialImageId, + size_t readLengthLimit, + cv::Size cameraResolution + ) { + if (readLengthLimit == 0) + throw std::runtime_error{"Read length limit must be positive"}; + std::vector<std::string> invalidInputs, openErrors; + try { + return std::unique_ptr<ImagesCapture>(new ImreadWrapper{input, loop}); + } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) { + openErrors.push_back(e.what()); + } + + try { + return std::unique_ptr<ImagesCapture>(new DirReader{input, loop, initialImageId, readLengthLimit}); + } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) { + openErrors.push_back(e.what()); + } + + try { + return std::unique_ptr<ImagesCapture>(new VideoCapWrapper{input, loop, type, initialImageId, readLengthLimit}); + } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) { + openErrors.push_back(e.what()); + } + + try { + return std::unique_ptr<ImagesCapture>( + new CameraCapWrapper{input, loop, type, readLengthLimit, cameraResolution}); + } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) { + openErrors.push_back(e.what()); + } + + std::vector<std::string> errorMessages = openErrors.empty() ? invalidInputs : openErrors; + std::string errorsInfo; + for (const auto& message : errorMessages) { + errorsInfo.append(message + "\n"); + } + throw std::runtime_error(errorsInfo); +} diff --git a/python/openvino/runtime/common/demo_utils/src/kuhn_munkres.cpp b/python/openvino/runtime/common/demo_utils/src/kuhn_munkres.cpp new file mode 100644 index 0000000..7d612c1 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/src/kuhn_munkres.cpp @@ -0,0 +1,169 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include <algorithm> +#include <limits> +#include <vector> + +#include <utils/kuhn_munkres.hpp> + +KuhnMunkres::KuhnMunkres(bool greedy) : n_(), greedy_(greedy) {} + +std::vector<size_t> KuhnMunkres::Solve(const cv::Mat& dissimilarity_matrix) { + CV_Assert(dissimilarity_matrix.type() == CV_32F); + double min_val; + cv::minMaxLoc(dissimilarity_matrix, &min_val); + + n_ = std::max(dissimilarity_matrix.rows, dissimilarity_matrix.cols); + dm_ = cv::Mat(n_, n_, CV_32F, cv::Scalar(0)); + marked_ = cv::Mat(n_, n_, CV_8S, cv::Scalar(0)); + points_ = std::vector<cv::Point>(n_ * 2); + + dissimilarity_matrix.copyTo(dm_( + cv::Rect(0, 0, dissimilarity_matrix.cols, dissimilarity_matrix.rows))); + + is_row_visited_ = std::vector<int>(n_, 0); + is_col_visited_ = std::vector<int>(n_, 0); + + Run(); + + std::vector<size_t> results(dissimilarity_matrix.rows, -1); + for (int i = 0; i < dissimilarity_matrix.rows; i++) { + const auto ptr = marked_.ptr<char>(i); + for (int j = 0; j < dissimilarity_matrix.cols; j++) { + if (ptr[j] == kStar) { + results[i] = (size_t)j; + } + } + } + return results; +} + +void KuhnMunkres::TrySimpleCase() { + auto is_row_visited = std::vector<int>(n_, 0); + auto is_col_visited = std::vector<int>(n_, 0); + + for (int row = 0; row < n_; row++) { + auto ptr = dm_.ptr<float>(row); + auto marked_ptr = marked_.ptr<char>(row); + auto min_val = *std::min_element(ptr, ptr + n_); + for (int col = 0; col < n_; col++) { + ptr[col] -= min_val; + if (ptr[col] == 0 && !is_col_visited[col] && !is_row_visited[row]) { + marked_ptr[col] = kStar; + is_col_visited[col] = 1; + is_row_visited[row] = 1; + } + } + } +} + +bool KuhnMunkres::CheckIfOptimumIsFound() { + int count = 0; + for (int i = 0; i < n_; i++) { + const auto marked_ptr = marked_.ptr<char>(i); + for (int j = 0; j < n_; j++) { + if (marked_ptr[j] == kStar) { + is_col_visited_[j] = 1; + count++; + } + } + } + + return count >= n_; +} + +cv::Point KuhnMunkres::FindUncoveredMinValPos() { + auto min_val = std::numeric_limits<float>::max(); + cv::Point min_val_pos(-1, -1); + for (int i = 0; i < n_; i++) { + if (!is_row_visited_[i]) { + auto dm_ptr = dm_.ptr<float>(i); + for (int j = 0; j < n_; j++) { + if (!is_col_visited_[j] && dm_ptr[j] < min_val) { + min_val = dm_ptr[j]; + min_val_pos = cv::Point(j, i); + } + } + } + } + return min_val_pos; +} + +void KuhnMunkres::UpdateDissimilarityMatrix(float val) { + for (int i = 0; i < n_; i++) { + auto dm_ptr = dm_.ptr<float>(i); + for (int j = 0; j < n_; j++) { + if (is_row_visited_[i]) dm_ptr[j] += val; + if (!is_col_visited_[j]) dm_ptr[j] -= val; + } + } +} + +int KuhnMunkres::FindInRow(int row, int what) { + for (int j = 0; j < n_; j++) { + if (marked_.at<char>(row, j) == what) { + return j; + } + } + return -1; +} + +int KuhnMunkres::FindInCol(int col, int what) { + for (int i = 0; i < n_; i++) { + if (marked_.at<char>(i, col) == what) { + return i; + } + } + return -1; +} + +void KuhnMunkres::Run() { + TrySimpleCase(); + if (greedy_) + return; + while (!CheckIfOptimumIsFound()) { + while (true) { + auto point = FindUncoveredMinValPos(); + auto min_val = dm_.at<float>(point.y, point.x); + if (min_val > 0) { + UpdateDissimilarityMatrix(min_val); + } else { + marked_.at<char>(point.y, point.x) = kPrime; + int col = FindInRow(point.y, kStar); + if (col >= 0) { + is_row_visited_[point.y] = 1; + is_col_visited_[col] = 0; + } else { + int count = 0; + points_[count] = point; + + while (true) { + int row = FindInCol(points_[count].x, kStar); + if (row >= 0) { + count++; + points_[count] = cv::Point(points_[count - 1].x, row); + int col = FindInRow(points_[count].y, kPrime); + count++; + points_[count] = cv::Point(col, points_[count - 1].y); + } else { + break; + } + } + + for (int i = 0; i < count + 1; i++) { + auto& mark = marked_.at<char>(points_[i].y, points_[i].x); + mark = mark == kStar ? 0 : kStar; + } + + is_row_visited_ = std::vector<int>(n_, 0); + is_col_visited_ = std::vector<int>(n_, 0); + + marked_.setTo(0, marked_ == kPrime); + break; + } + } + } + } +} diff --git a/python/openvino/runtime/common/demo_utils/src/performance_metrics.cpp b/python/openvino/runtime/common/demo_utils/src/performance_metrics.cpp new file mode 100644 index 0000000..d1e494e --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/src/performance_metrics.cpp @@ -0,0 +1,114 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include <limits> +#include "utils/performance_metrics.hpp" +#include "utils/slog.hpp" + +// timeWindow defines the length of the timespan over which the 'current fps' value is calculated +PerformanceMetrics::PerformanceMetrics(Duration timeWindow) + : timeWindowSize(timeWindow) + , firstFrameProcessed(false) +{} + +void PerformanceMetrics::update(TimePoint lastRequestStartTime, + const cv::Mat& frame, + cv::Point position, + int fontFace, + double fontScale, + cv::Scalar color, + int thickness, + MetricTypes metricType) { + update(lastRequestStartTime); + paintMetrics(frame, position, fontFace, fontScale, color, thickness, metricType); +} + +void PerformanceMetrics::update(TimePoint lastRequestStartTime) { + TimePoint currentTime = Clock::now(); + + if (!firstFrameProcessed) { + lastUpdateTime = lastRequestStartTime; + firstFrameProcessed = true; + } + + currentMovingStatistic.latency += currentTime - lastRequestStartTime; + currentMovingStatistic.period = currentTime - lastUpdateTime; + currentMovingStatistic.frameCount++; + + if (currentTime - lastUpdateTime > timeWindowSize) { + lastMovingStatistic = currentMovingStatistic; + totalStatistic.combine(lastMovingStatistic); + currentMovingStatistic = Statistic(); + + lastUpdateTime = currentTime; + } +} + +void PerformanceMetrics::paintMetrics(const cv::Mat& frame, cv::Point position, int fontFace, + double fontScale, cv::Scalar color, int thickness, MetricTypes metricType) const { + // Draw performance stats over frame + Metrics metrics = getLast(); + + std::ostringstream out; + if (!std::isnan(metrics.latency) && + (metricType == PerformanceMetrics::MetricTypes::LATENCY || metricType == PerformanceMetrics::MetricTypes::ALL)) { + out << "Latency: " << std::fixed << std::setprecision(1) << metrics.latency << " ms"; + putHighlightedText(frame, out.str(), position, fontFace, fontScale, color, thickness); + } + if (!std::isnan(metrics.fps) && + (metricType == PerformanceMetrics::MetricTypes::FPS || metricType == PerformanceMetrics::MetricTypes::ALL)) { + out.str(""); + out << "FPS: " << std::fixed << std::setprecision(1) << metrics.fps; + int offset = metricType == PerformanceMetrics::MetricTypes::ALL ? 30 : 0; + putHighlightedText(frame, out.str(), {position.x, position.y + offset}, fontFace, fontScale, color, thickness); + } +} + +PerformanceMetrics::Metrics PerformanceMetrics::getLast() const { + Metrics metrics; + + metrics.latency = lastMovingStatistic.frameCount != 0 + ? std::chrono::duration_cast<Ms>(lastMovingStatistic.latency).count() + / lastMovingStatistic.frameCount + : std::numeric_limits<double>::signaling_NaN(); + metrics.fps = lastMovingStatistic.period != Duration::zero() + ? lastMovingStatistic.frameCount + / std::chrono::duration_cast<Sec>(lastMovingStatistic.period).count() + : std::numeric_limits<double>::signaling_NaN(); + + return metrics; +} + +PerformanceMetrics::Metrics PerformanceMetrics::getTotal() const { + Metrics metrics; + + int frameCount = totalStatistic.frameCount + currentMovingStatistic.frameCount; + if (frameCount != 0) { + metrics.latency = std::chrono::duration_cast<Ms>( + totalStatistic.latency + currentMovingStatistic.latency).count() / frameCount; + metrics.fps = frameCount / std::chrono::duration_cast<Sec>( + totalStatistic.period + currentMovingStatistic.period).count(); + } else { + metrics.latency = std::numeric_limits<double>::signaling_NaN(); + metrics.fps = std::numeric_limits<double>::signaling_NaN(); + } + + return metrics; +} + +void PerformanceMetrics::logTotal() const { + Metrics metrics = getTotal(); + + slog::info << "\tLatency: " << std::fixed << std::setprecision(1) << metrics.latency << " ms" << slog::endl; + slog::info << "\tFPS: " << metrics.fps << slog::endl; +} + +void logLatencyPerStage(double readLat, double preprocLat, double inferLat, double postprocLat, double renderLat) { + slog::info << "\tDecoding:\t" << std::fixed << std::setprecision(1) << + readLat << " ms" << slog::endl; + slog::info << "\tPreprocessing:\t" << preprocLat << " ms" << slog::endl; + slog::info << "\tInference:\t" << inferLat << " ms" << slog::endl; + slog::info << "\tPostprocessing:\t" << postprocLat << " ms" << slog::endl; + slog::info << "\tRendering:\t" << renderLat << " ms" << slog::endl; +} diff --git a/python/openvino/runtime/common/demo_utils/src/w_dirent.hpp b/python/openvino/runtime/common/demo_utils/src/w_dirent.hpp new file mode 100644 index 0000000..0df8636 --- /dev/null +++ b/python/openvino/runtime/common/demo_utils/src/w_dirent.hpp @@ -0,0 +1,114 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#if defined(_WIN32) + +#ifndef NOMINMAX +# define NOMINMAX +#endif + +#include <WinSock2.h> +#include <Windows.h> +#include <stdlib.h> + +#else + +#include <unistd.h> +#include <cstdlib> +#include <string.h> + +#endif + +#include <string> + +#include <sys/stat.h> + +#if defined(WIN32) + // Copied from linux libc sys/stat.h: + #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) + #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#endif + +struct dirent { + char *d_name; + + explicit dirent(const wchar_t *wsFilePath) { + size_t i; + auto slen = wcslen(wsFilePath); + d_name = static_cast<char*>(malloc(slen + 1)); + wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen); + } + + ~dirent() { + free(d_name); + } +}; + +class DIR { + WIN32_FIND_DATAA FindFileData; + HANDLE hFind; + dirent *next; + + static inline bool endsWith(const std::string &src, const char *with) { + int wl = static_cast<int>(strlen(with)); + int so = static_cast<int>(src.length()) - wl; + if (so < 0) return false; + return 0 == strncmp(with, &src[so], wl); + } + +public: + explicit DIR(const char *dirPath) : next(nullptr) { + std::string ws = dirPath; + if (endsWith(ws, "\\")) + ws += "*"; + else + ws += "\\*"; + hFind = FindFirstFileA(ws.c_str(), &FindFileData); + FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE; + } + + ~DIR() { + if (!next) delete next; + FindClose(hFind); + } + + bool isValid() const { + return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0); + } + + dirent* nextEnt() { + if (next != nullptr) delete next; + next = nullptr; + + if (!FindFileData.dwReserved0) return nullptr; + + wchar_t wbuf[4096]; + + size_t outSize; + mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094); + next = new dirent(wbuf); + FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData); + return next; + } +}; + + +static DIR *opendir(const char* dirPath) { + auto dp = new DIR(dirPath); + if (!dp->isValid()) { + delete dp; + return nullptr; + } + return dp; +} + +static struct dirent *readdir(DIR *dp) { + return dp->nextEnt(); +} + +static void closedir(DIR *dp) { + delete dp; +} diff --git a/python/openvino/runtime/common/format_reader/CMakeLists.txt b/python/openvino/runtime/common/format_reader/CMakeLists.txt new file mode 100644 index 0000000..3daab96 --- /dev/null +++ b/python/openvino/runtime/common/format_reader/CMakeLists.txt @@ -0,0 +1,55 @@ +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set (TARGET_NAME "format_reader") + +file (GLOB MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +file (GLOB LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h) + +# Create named folders for the sources within the .vcproj +# Empty name lists them directly under the .vcproj +source_group("src" FILES ${LIBRARY_SRC}) +source_group("include" FILES ${LIBRARY_HEADERS}) + +# Create library file from sources. +add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS}) + +# Find OpenCV components if exist +find_package(OpenCV QUIET COMPONENTS core imgproc imgcodecs) +if(NOT OpenCV_FOUND) + message(WARNING "OpenCV is disabled or not found, ${TARGET_NAME} will be built without OpenCV support") +else() + target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCV_LIBRARIES} ie_samples_utils) + if(UNIX AND NOT APPLE) + # Workaround issue that rpath-link is missing for PRIVATE dependencies + # Fixed in cmake 3.16.0 https://gitlab.kitware.com/cmake/cmake/issues/19556 + target_link_libraries(${TARGET_NAME} INTERFACE "-Wl,-rpath-link,${OpenCV_INSTALL_PATH}/lib") + endif() + # Make this definition public so that it's also seen by dla benchmark. As dla benchmark + # uses this macro to identify which image extensions are supported by the image reader + target_compile_definitions(${TARGET_NAME} PUBLIC USE_OPENCV) +endif() + +target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_FORMAT_READER) + +target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}/..") + +set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME} + FOLDER cpp_samples) + +if(COMMAND add_clang_format_target) + add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) +endif() + +install( + TARGETS ${TARGET_NAME} + RUNTIME DESTINATION samples_bin/ COMPONENT samples_bin EXCLUDE_FROM_ALL + LIBRARY DESTINATION samples_bin/ COMPONENT samples_bin EXCLUDE_FROM_ALL +) + +install(TARGETS ${TARGET_NAME} + RUNTIME DESTINATION "dla/bin" COMPONENT EMUTEST + LIBRARY DESTINATION "dla/lib" COMPONENT EMUTEST + ARCHIVE DESTINATION "dla/lib" COMPONENT EMUTEST) diff --git a/python/openvino/runtime/common/format_reader/MnistUbyte.cpp b/python/openvino/runtime/common/format_reader/MnistUbyte.cpp new file mode 100644 index 0000000..182ef99 --- /dev/null +++ b/python/openvino/runtime/common/format_reader/MnistUbyte.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// clang-format off +#include <fstream> +#include <iostream> +#include <string> + +#include "MnistUbyte.h" +// clang-format on + +using namespace FormatReader; + +int MnistUbyte::reverseInt(int i) { + unsigned char ch1, ch2, ch3, ch4; + ch1 = (unsigned char)(i & 255); + ch2 = (unsigned char)((i >> 8) & 255); + ch3 = (unsigned char)((i >> 16) & 255); + ch4 = (unsigned char)((i >> 24) & 255); + return (static_cast<int>(ch1) << 24) + (static_cast<int>(ch2) << 16) + (static_cast<int>(ch3) << 8) + ch4; +} + +MnistUbyte::MnistUbyte(const std::string& filename) { + std::ifstream file(filename, std::ios::binary); + if (!file.is_open()) { + return; + } + int magic_number = 0; + int number_of_images = 0; + int n_rows = 0; + int n_cols = 0; + file.read(reinterpret_cast<char*>(&magic_number), sizeof(magic_number)); + magic_number = reverseInt(magic_number); + if (magic_number != 2051) { + return; + } + file.read(reinterpret_cast<char*>(&number_of_images), sizeof(number_of_images)); + number_of_images = reverseInt(number_of_images); + file.read(reinterpret_cast<char*>(&n_rows), sizeof(n_rows)); + n_rows = reverseInt(n_rows); + _height = (size_t)n_rows; + file.read(reinterpret_cast<char*>(&n_cols), sizeof(n_cols)); + n_cols = reverseInt(n_cols); + _width = (size_t)n_cols; + if (number_of_images > 1) { + std::cout << "[MNIST] Warning: number_of_images in mnist file equals " << number_of_images + << ". Only a first image will be read." << std::endl; + } + + size_t size = _width * _height * 1; + + _data.reset(new unsigned char[size], std::default_delete<unsigned char[]>()); + size_t count = 0; + if (0 < number_of_images) { + for (int r = 0; r < n_rows; ++r) { + for (int c = 0; c < n_cols; ++c) { + unsigned char temp = 0; + file.read(reinterpret_cast<char*>(&temp), sizeof(temp)); + _data.get()[count++] = temp; + } + } + } + + file.close(); +} diff --git a/python/openvino/runtime/common/format_reader/MnistUbyte.h b/python/openvino/runtime/common/format_reader/MnistUbyte.h new file mode 100644 index 0000000..8991166 --- /dev/null +++ b/python/openvino/runtime/common/format_reader/MnistUbyte.h @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * \brief Mnist reader + * \file MnistUbyte.h + */ +#pragma once + +#include <memory> +#include <string> + +// clang-format off +#include "format_reader.h" +#include "register.h" +// clang-format on + +namespace FormatReader { +/** + * \class MnistUbyte + * \brief Reader for mnist db files + */ +class MnistUbyte : public Reader { +private: + int reverseInt(int i); + + static Register<MnistUbyte> reg; + +public: + /** + * \brief Constructor of Mnist reader + * @param filename - path to input data + * @return MnistUbyte reader object + */ + explicit MnistUbyte(const std::string& filename); + virtual ~MnistUbyte() {} + + /** + * \brief Get size + * @return size + */ + size_t size() const override { + return _width * _height * 1; + } + + // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper). + // format_reader is copied from openvino samples/cpp/common/format_reader/ + // this might need special care when doing a OV uplift + std::shared_ptr<unsigned char> getData(size_t width, size_t height, ResizeType resize_type) override { + if ((width * height != 0) && (_width * _height != width * height)) { + std::cout << "[ WARNING ] Image won't be resized! Please use OpenCV.\n"; + return nullptr; + } + return _data; + } +}; +} // namespace FormatReader diff --git a/python/openvino/runtime/common/format_reader/bmp.cpp b/python/openvino/runtime/common/format_reader/bmp.cpp new file mode 100644 index 0000000..240d13f --- /dev/null +++ b/python/openvino/runtime/common/format_reader/bmp.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// clang-format off +#include <fstream> +#include <iostream> + +#include "bmp.h" +// clang-format on + +using namespace std; +using namespace FormatReader; + +BitMap::BitMap(const string& filename) { + BmpHeader header; + BmpInfoHeader infoHeader; + + ifstream input(filename, ios::binary); + if (!input) { + return; + } + + input.read(reinterpret_cast<char*>(&header.type), 2); + + if (header.type != 'M' * 256 + 'B') { + std::cerr << "[BMP] file is not bmp type\n"; + return; + } + + input.read(reinterpret_cast<char*>(&header.size), 4); + input.read(reinterpret_cast<char*>(&header.reserved), 4); + input.read(reinterpret_cast<char*>(&header.offset), 4); + + input.read(reinterpret_cast<char*>(&infoHeader), sizeof(BmpInfoHeader)); + + bool rowsReversed = infoHeader.height < 0; + _width = infoHeader.width; + _height = abs(infoHeader.height); + + if (infoHeader.bits != 24) { + cerr << "[BMP] 24bpp only supported. But input has:" << infoHeader.bits << "\n"; + return; + } + + if (infoHeader.compression != 0) { + cerr << "[BMP] compression not supported\n"; + } + + int padSize = _width & 3; + char pad[3]; + size_t size = _width * _height * 3; + + _data.reset(new unsigned char[size], std::default_delete<unsigned char[]>()); + + input.seekg(header.offset, ios::beg); + + // reading by rows in invert vertically + for (uint32_t i = 0; i < _height; i++) { + uint32_t storeAt = rowsReversed ? i : (uint32_t)_height - 1 - i; + input.read(reinterpret_cast<char*>(_data.get()) + _width * 3 * storeAt, _width * 3); + input.read(pad, padSize); + } +} diff --git a/python/openvino/runtime/common/format_reader/bmp.h b/python/openvino/runtime/common/format_reader/bmp.h new file mode 100644 index 0000000..ac3ff31 --- /dev/null +++ b/python/openvino/runtime/common/format_reader/bmp.h @@ -0,0 +1,75 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * \brief BMP reader + * \file bmp.h + */ +#pragma once + +#include <memory> +#include <string> + +// clang-format off +#include "format_reader.h" +#include "register.h" +// clang-format on + +namespace FormatReader { +/** + * \class BitMap + * \brief Reader for bmp files + */ +class BitMap : public Reader { +private: + static Register<BitMap> reg; + + typedef struct BmpHeaderType { + unsigned short type = 0u; /* Magic identifier */ + unsigned int size = 0u; /* File size in bytes */ + unsigned int reserved = 0u; + unsigned int offset = 0u; /* Offset to image data, bytes */ + } BmpHeader; + + typedef struct BmpInfoHeaderType { + unsigned int size = 0u; /* Header size in bytes */ + int width = 0, height = 0; /* Width and height of image */ + unsigned short planes = 0u; /* Number of colour planes */ + unsigned short bits = 0u; /* Bits per pixel */ + unsigned int compression = 0u; /* Compression type */ + unsigned int imagesize = 0u; /* Image size in bytes */ + int xresolution = 0, yresolution = 0; /* Pixels per meter */ + unsigned int ncolours = 0u; /* Number of colours */ + unsigned int importantcolours = 0u; /* Important colours */ + } BmpInfoHeader; + +public: + /** + * \brief Constructor of BMP reader + * @param filename - path to input data + * @return BitMap reader object + */ + explicit BitMap(const std::string& filename); + virtual ~BitMap() {} + + /** + * \brief Get size + * @return size + */ + size_t size() const override { + return _width * _height * 3; + } + + // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper). + // format_reader is copied from openvino samples/cpp/common/format_reader/ + // this might need special care when doing a OV uplift + std::shared_ptr<unsigned char> getData(size_t width, size_t height, ResizeType resize_type) override { + if ((width * height != 0) && (_width * _height != width * height)) { + std::cout << "[ WARNING ] Image won't be resized! Please use OpenCV.\n"; + return nullptr; + } + return _data; + } +}; +} // namespace FormatReader diff --git a/python/openvino/runtime/common/format_reader/format_reader.cpp b/python/openvino/runtime/common/format_reader/format_reader.cpp new file mode 100644 index 0000000..94a8441 --- /dev/null +++ b/python/openvino/runtime/common/format_reader/format_reader.cpp @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include <iostream> + +// clang-format off +#include "bmp.h" +#include "MnistUbyte.h" +#include "yuv_nv12.h" +#include "opencv_wrapper.h" +#include "format_reader.h" +// clang-format on + +using namespace FormatReader; + +std::vector<Registry::CreatorFunction> Registry::_data; + +Register<MnistUbyte> MnistUbyte::reg; +Register<YUV_NV12> YUV_NV12::reg; +#ifdef USE_OPENCV +Register<OCVReader> OCVReader::reg; +#else +Register<BitMap> BitMap::reg; +#endif + +Reader* Registry::CreateReader(const char* filename) { + for (const auto &maker : _data) { + Reader* ol = maker(filename); + if (ol != nullptr && ol->size() != 0) + return ol; + if (ol != nullptr) + delete ol; + } + return nullptr; +} + +void Registry::RegisterReader(CreatorFunction f) { + _data.push_back(f); +} + +FORMAT_READER_API(Reader*) CreateFormatReader(const char* filename) { + return Registry::CreateReader(filename); +} diff --git a/python/openvino/runtime/common/format_reader/format_reader.h b/python/openvino/runtime/common/format_reader/format_reader.h new file mode 100644 index 0000000..99fc573 --- /dev/null +++ b/python/openvino/runtime/common/format_reader/format_reader.h @@ -0,0 +1,95 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * \brief Format reader abstract class implementation + * \file format_reader.h + */ +#pragma once + +#include <iostream> +#include <memory> +#include <string> +#include <vector> + +#if defined(_WIN32) +# ifdef IMPLEMENT_FORMAT_READER +# define FORMAT_READER_API(type) extern "C" __declspec(dllexport) type +# else +# define FORMAT_READER_API(type) extern "C" type +# endif +#elif (__GNUC__ >= 4) +# ifdef IMPLEMENT_FORMAT_READER +# define FORMAT_READER_API(type) extern "C" __attribute__((visibility("default"))) type +# else +# define FORMAT_READER_API(type) extern "C" type +# endif +#else +# define FORMAT_READER_API(TYPE) extern "C" TYPE +#endif + +namespace FormatReader { +/** + * \class FormatReader + * \brief This is an abstract class for reading input data + */ +class Reader { +protected: + /// \brief height + size_t _height = 0; + /// \brief width + size_t _width = 0; + /// \brief data + std::shared_ptr<unsigned char> _data; + +public: + virtual ~Reader() = default; + + // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper). + // format_reader is copied from openvino samples/cpp/common/format_reader/ + // this might need special care when doing a OV uplift + enum ResizeType { + RESIZE, // resize the image to target (height, width) + PAD_RESIZE, // pad the image into a squared image and then resize the image to target (height, width) + }; + + /** + * \brief Get width + * @return width + */ + size_t width() const { + return _width; + } + + /** + * \brief Get height + * @return height + */ + size_t height() const { + return _height; + } + + /** + * \brief Get input data ptr + * @return shared pointer with input data + * @In case of using OpenCV, parameters width and height will be used for image resizing + */ + // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper). + // Needs special care when doing a OV uplift + virtual std::shared_ptr<unsigned char> getData(size_t width = 0, size_t height = 0, + ResizeType resize_type = ResizeType::RESIZE) = 0; + + /** + * \brief Get size + * @return size + */ + virtual size_t size() const = 0; +}; +} // namespace FormatReader + +/** + * \brief Function for create reader + * @return FormatReader pointer + */ +FORMAT_READER_API(FormatReader::Reader*) CreateFormatReader(const char* filename); diff --git a/python/openvino/runtime/common/format_reader/format_reader_ptr.h b/python/openvino/runtime/common/format_reader/format_reader_ptr.h new file mode 100644 index 0000000..eb9bf8e --- /dev/null +++ b/python/openvino/runtime/common/format_reader/format_reader_ptr.h @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * \brief Implementation of smart pointer for Reader class + * \file format_reader_ptr.h + */ +#pragma once + +#include <functional> +#include <memory> + +#include "format_reader.h" + +namespace FormatReader { +class ReaderPtr { +public: + explicit ReaderPtr(const char* imageName) : reader(CreateFormatReader(imageName)) {} + /** + * @brief dereference operator overload + * @return Reader + */ + Reader* operator->() const noexcept { + return reader.get(); + } + + /** + * @brief dereference operator overload + * @return Reader + */ + Reader* operator*() const noexcept { + return reader.get(); + } + + Reader* get() { + return reader.get(); + } + +protected: + std::unique_ptr<Reader> reader; +}; +} // namespace FormatReader diff --git a/python/openvino/runtime/common/format_reader/opencv_wrapper.cpp b/python/openvino/runtime/common/format_reader/opencv_wrapper.cpp new file mode 100644 index 0000000..b8ebeef --- /dev/null +++ b/python/openvino/runtime/common/format_reader/opencv_wrapper.cpp @@ -0,0 +1,83 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef USE_OPENCV +# include <fstream> +# include <iostream> + +// clang-format off +# include <opencv2/opencv.hpp> + +# include "samples/slog.hpp" +# include "opencv_wrapper.h" +// clang-format on + +using namespace std; +using namespace FormatReader; + +OCVReader::OCVReader(const string& filename) { + img = cv::imread(filename); + _size = 0; + + if (img.empty()) { + return; + } + + _size = img.size().width * img.size().height * img.channels(); + _width = img.size().width; + _height = img.size().height; +} + +// Set the maximum number of printed warnings; large image directories can otherwise be overwhelming +static size_t resize_warning_count = 0; +const size_t max_resize_warnings = 5; + +std::shared_ptr<unsigned char> OCVReader::getData(size_t width = 0, size_t height = 0, ResizeType resize_type = ResizeType::RESIZE) { + if (width == 0) + width = img.cols; + + if (height == 0) + height = img.rows; + + size_t size = width * height * img.channels(); + _data.reset(new unsigned char[size], std::default_delete<unsigned char[]>()); + + if (width != static_cast<size_t>(img.cols) || height != static_cast<size_t>(img.rows)) { + if (resize_warning_count < max_resize_warnings) { + slog::warn << "Image is resized from (" << img.cols << ", " << img.rows << ") to (" << width << ", " << height + << ")" << slog::endl; + resize_warning_count++; + } else if (resize_warning_count == max_resize_warnings) { + slog::warn << "Additional image resizing messages have been suppressed." << slog::endl; + resize_warning_count++; + } + } + + cv::Mat resized; + if (resize_type == ResizeType::RESIZE) { + resized = cv::Mat(cv::Size(width, height), img.type(), _data.get()); + // cv::resize() just copy data to output image if sizes are the same + cv::resize(img, resized, cv::Size(width, height)); + } else if (resize_type == ResizeType::PAD_RESIZE) + { + cv::Mat padded; + // Find the larger side out of width and height of the image + int max_dim = std::max(img.rows, img.cols); + // Calculate padding for shorter dimension + int top = (max_dim - img.rows) / 2; + int bottom = (max_dim - img.rows + 1) / 2; + int left = (max_dim - img.cols) / 2; + int right = (max_dim - img.cols + 1) / 2; + // Add padding (0, i.e., black) to make the image a square + cv::copyMakeBorder(img, padded, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar()); + cv::resize(padded, resized, cv::Size(width, height)); + std::memcpy(_data.get(), resized.data, resized.total() * resized.elemSize()); + } else { + slog::err << "Specified resize type is not implemented." << slog::endl; + std::exit(1); + } + + return _data; +} +#endif diff --git a/python/openvino/runtime/common/format_reader/opencv_wrapper.h b/python/openvino/runtime/common/format_reader/opencv_wrapper.h new file mode 100644 index 0000000..c402e8d --- /dev/null +++ b/python/openvino/runtime/common/format_reader/opencv_wrapper.h @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * \brief Image reader + * \file opencv_wrapper.h + */ +#pragma once + +#ifdef USE_OPENCV +# include <memory> +# include <string> + +// clang-format off +# include <opencv2/opencv.hpp> + +# include "format_reader.h" +# include "register.h" +// clang-format on + +namespace FormatReader { +/** + * \class OCVMAT + * \brief OpenCV Wrapper + */ +class OCVReader : public Reader { +private: + cv::Mat img; + size_t _size; + static Register<OCVReader> reg; + +public: + /** + * \brief Constructor of BMP reader + * @param filename - path to input data + * @return BitMap reader object + */ + explicit OCVReader(const std::string& filename); + virtual ~OCVReader() {} + + /** + * \brief Get size + * @return size + */ + size_t size() const override { + return _size; + } + + // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper). + // format_reader is copied from openvino samples/cpp/common/format_reader/ + // this might need special care when doing a OV uplift + std::shared_ptr<unsigned char> getData(size_t width, + size_t height, + ResizeType resize_type) override; +}; +} // namespace FormatReader +#endif diff --git a/python/openvino/runtime/common/format_reader/register.h b/python/openvino/runtime/common/format_reader/register.h new file mode 100644 index 0000000..781eca3 --- /dev/null +++ b/python/openvino/runtime/common/format_reader/register.h @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +/** + * \brief Register for readers + * \file register.h + */ +#pragma once + +#include <functional> +#include <string> +#include <vector> + +#include "format_reader.h" + +namespace FormatReader { +/** + * \class Registry + * \brief Create reader from fabric + */ +class Registry { +private: + typedef std::function<Reader*(const std::string& filename)> CreatorFunction; + static std::vector<CreatorFunction> _data; + +public: + /** + * \brief Create reader + * @param filename - path to input data + * @return Reader for input data or nullptr + */ + static Reader* CreateReader(const char* filename); + + /** + * \brief Registers reader in fabric + * @param f - a creation function + */ + static void RegisterReader(CreatorFunction f); +}; + +/** + * \class Register + * \brief Registers reader in fabric + */ +template <typename To> +class Register { +public: + /** + * \brief Constructor creates creation function for fabric + * @return Register object + */ + Register() { + Registry::RegisterReader([](const std::string& filename) -> Reader* { + return new To(filename); + }); + } +}; +} // namespace FormatReader diff --git a/python/openvino/runtime/common/format_reader/yuv_nv12.cpp b/python/openvino/runtime/common/format_reader/yuv_nv12.cpp new file mode 100644 index 0000000..f25c5cb --- /dev/null +++ b/python/openvino/runtime/common/format_reader/yuv_nv12.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// clang-format off +#include <fstream> +#include <iostream> +#include <string> + +#include "yuv_nv12.h" +// clang-format on + +using namespace FormatReader; + +YUV_NV12::YUV_NV12(const std::string& filename) { + auto pos = filename.rfind('.'); + if (pos == std::string::npos) + return; + if (filename.substr(pos + 1) != "yuv") + return; + + std::ifstream file(filename, std::ios::binary); + if (!file.is_open()) { + return; + } + + file.seekg(0, file.end); + _size = file.tellg(); + file.seekg(0, file.beg); + + _data.reset(new unsigned char[_size], std::default_delete<unsigned char[]>()); + + file.read(reinterpret_cast<char*>(_data.get()), _size); + + file.close(); +} diff --git a/python/openvino/runtime/common/format_reader/yuv_nv12.h b/python/openvino/runtime/common/format_reader/yuv_nv12.h new file mode 100644 index 0000000..dd74c7b --- /dev/null +++ b/python/openvino/runtime/common/format_reader/yuv_nv12.h @@ -0,0 +1,57 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * \brief YUV NV12 reader + * \file yuv_nv12.h + */ +#pragma once + +#include <memory> +#include <string> + +// clang-format off +#include "format_reader.h" +#include "register.h" +// clang-format on + +namespace FormatReader { +/** + * \class YUV_NV12 + * \brief Reader for YUV NV12 files + */ +class YUV_NV12 : public Reader { +private: + static Register<YUV_NV12> reg; + size_t _size = 0; + +public: + /** + * \brief Constructor of YUV NV12 reader + * @param filename - path to input data + * @return YUV_NV12 reader object + */ + explicit YUV_NV12(const std::string& filename); + virtual ~YUV_NV12() {} + + /** + * \brief Get size + * @return size + */ + size_t size() const override { + return _size; + } + + // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper). + // format_reader is copied from openvino samples/cpp/common/format_reader/ + // this might need special care when doing a OV uplift + std::shared_ptr<unsigned char> getData(size_t width, size_t height, Reader::ResizeType resize_type) override { + if ((width * height * 3 / 2 != size())) { + std::cout << "Image dimensions not match with NV12 file size \n"; + return nullptr; + } + return _data; + } +}; +} // namespace FormatReader diff --git a/python/openvino/runtime/common/models/CMakeLists.txt b/python/openvino/runtime/common/models/CMakeLists.txt new file mode 100644 index 0000000..07c8da3 --- /dev/null +++ b/python/openvino/runtime/common/models/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +file(GLOB SOURCES ./src/*.cpp) +file(GLOB HEADERS ./include/models/*.h) + +# Create named folders for the sources within the .vcproj +# Empty name lists them directly under the .vcproj +source_group("src" FILES ${SOURCES}) +source_group("include" FILES ${HEADERS}) + +add_library(models STATIC ${SOURCES} ${HEADERS}) +target_include_directories(models PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") + +target_link_libraries(models PRIVATE openvino::runtime utils opencv_core opencv_imgproc) diff --git a/python/openvino/runtime/common/models/include/models/associative_embedding_decoder.h b/python/openvino/runtime/common/models/include/models/associative_embedding_decoder.h new file mode 100644 index 0000000..94afbda --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/associative_embedding_decoder.h @@ -0,0 +1,94 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <memory> +#include <vector> + +#include <opencv2/core.hpp> + +struct Peak { + explicit Peak(const cv::Point2f& keypoint = cv::Point2f(-1, -1), const float score = 0.0f, const float tag = 0.0f) + : keypoint(keypoint), + score(score), + tag(tag) {} + + cv::Point2f keypoint; + float score; + float tag; +}; + +class Pose { +public: + explicit Pose(size_t numJoints) : peaks(numJoints) {} + + void add(size_t index, Peak peak) { + peaks[index] = peak; + sum += peak.score; + poseTag = poseTag * static_cast<float>(validPointsNum) + peak.tag; + poseCenter = poseCenter * static_cast<float>(validPointsNum) + peak.keypoint; + validPointsNum += 1; + poseTag = poseTag / static_cast<float>(validPointsNum); + poseCenter = poseCenter / static_cast<float>(validPointsNum); + } + + float getPoseTag() const { + return poseTag; + } + + float getMeanScore() const { + return sum / static_cast<float>(size()); + } + + Peak& getPeak(size_t index) { + return peaks[index]; + } + + cv::Point2f& getPoseCenter() { + return poseCenter; + } + + size_t size() const { + return peaks.size(); + } + +private: + std::vector<Peak> peaks; + cv::Point2f poseCenter = cv::Point2f(0.f, 0.f); + int validPointsNum = 0; + float poseTag = 0; + float sum = 0; +}; + +void findPeaks(const std::vector<cv::Mat>& nmsHeatMaps, + const std::vector<cv::Mat>& aembdsMaps, + std::vector<std::vector<Peak>>& allPeaks, + size_t jointId, + size_t maxNumPeople, + float detectionThreshold); + +std::vector<Pose> matchByTag(std::vector<std::vector<Peak>>& allPeaks, + size_t maxNumPeople, + size_t numJoints, + float tagThreshold); + +void adjustAndRefine(std::vector<Pose>& allPoses, + const std::vector<cv::Mat>& heatMaps, + const std::vector<cv::Mat>& aembdsMaps, + int poseId, + float delta); diff --git a/python/openvino/runtime/common/models/include/models/classification_model.h b/python/openvino/runtime/common/models/include/models/classification_model.h new file mode 100644 index 0000000..6d32e44 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/classification_model.h @@ -0,0 +1,57 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <memory> +#include <string> +#include <vector> + +#include "models/image_model.h" + +namespace ov { +class Model; +} // namespace ov +struct InferenceResult; +struct ResultBase; + +class ClassificationModel : public ImageModel { +public: + /// Constructor + /// @param modelFileName name of model to load. + /// @param nTop - number of top results. + /// Any detected object with confidence lower than this threshold will be ignored. + /// @param useAutoResize - if true, image will be resized by openvino. + /// Otherwise, image will be preprocessed and resized using OpenCV routines. + /// @param labels - array of labels for every class. + /// @param layout - model input layout + ClassificationModel(const std::string& modelFileName, + size_t nTop, + bool useAutoResize, + const std::vector<std::string>& labels, + const std::string& layout = ""); + + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + + static std::vector<std::string> loadLabels(const std::string& labelFilename); + +protected: + size_t nTop; + std::vector<std::string> labels; + + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; +}; diff --git a/python/openvino/runtime/common/models/include/models/deblurring_model.h b/python/openvino/runtime/common/models/include/models/deblurring_model.h new file mode 100644 index 0000000..33f5542 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/deblurring_model.h @@ -0,0 +1,52 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writingb software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <memory> +#include <string> + +#include <opencv2/core/types.hpp> + +#include "models/image_model.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; + +class DeblurringModel : public ImageModel { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param inputImgSize size of image to set model input shape + /// @param layout - model input layout + DeblurringModel(const std::string& modelFileName, const cv::Size& inputImgSize, const std::string& layout = ""); + + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + void changeInputSize(std::shared_ptr<ov::Model>& model); + + static const size_t stride = 32; +}; diff --git a/python/openvino/runtime/common/models/include/models/detection_model.h b/python/openvino/runtime/common/models/include/models/detection_model.h new file mode 100644 index 0000000..4d57a61 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/detection_model.h @@ -0,0 +1,51 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <string> +#include <vector> + +#include "models/image_model.h" + +class DetectionModel : public ImageModel { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param confidenceThreshold - threshold to eliminate low-confidence detections. + /// Any detected object with confidence lower than this threshold will be ignored. + /// @param useAutoResize - if true, image will be resized by openvino. + /// Otherwise, image will be preprocessed and resized using OpenCV routines. + /// @param labels - array of labels for every class. If this array is empty or contains less elements + /// than actual classes number, default "Label #N" will be shown for missing items. + /// @param layout - model input layout + DetectionModel(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + const std::vector<std::string>& labels, + const std::string& layout = ""); + + static std::vector<std::string> loadLabels(const std::string& labelFilename); + +protected: + float confidenceThreshold; + std::vector<std::string> labels; + + std::string getLabelName(int labelID) { + return (size_t)labelID < labels.size() ? labels[labelID] : std::string("Label #") + std::to_string(labelID); + } +}; diff --git a/python/openvino/runtime/common/models/include/models/detection_model_centernet.h b/python/openvino/runtime/common/models/include/models/detection_model_centernet.h new file mode 100644 index 0000000..db9ebdb --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/detection_model_centernet.h @@ -0,0 +1,59 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <memory> +#include <string> +#include <vector> + +#include "models/detection_model.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; + +class ModelCenterNet : public DetectionModel { +public: + struct BBox { + float left; + float top; + float right; + float bottom; + + float getWidth() const { + return (right - left) + 1.0f; + } + float getHeight() const { + return (bottom - top) + 1.0f; + } + }; + static const int INIT_VECTOR_SIZE = 200; + + ModelCenterNet(const std::string& modelFileName, + float confidenceThreshold, + const std::vector<std::string>& labels = std::vector<std::string>(), + const std::string& layout = ""); + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; +}; diff --git a/python/openvino/runtime/common/models/include/models/detection_model_faceboxes.h b/python/openvino/runtime/common/models/include/models/detection_model_faceboxes.h new file mode 100644 index 0000000..8ec2b21 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/detection_model_faceboxes.h @@ -0,0 +1,55 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include <utils/nms.hpp> + +#include "models/detection_model.h" + +namespace ov { +class Model; +} // namespace ov +struct InferenceResult; +struct ResultBase; + +class ModelFaceBoxes : public DetectionModel { +public: + static const int INIT_VECTOR_SIZE = 200; + + ModelFaceBoxes(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout = ""); + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + size_t maxProposalsCount; + const float boxIOUThreshold; + const std::vector<float> variance; + const std::vector<int> steps; + const std::vector<std::vector<int>> minSizes; + std::vector<Anchor> anchors; + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + void priorBoxes(const std::vector<std::pair<size_t, size_t>>& featureMaps); +}; diff --git a/python/openvino/runtime/common/models/include/models/detection_model_retinaface.h b/python/openvino/runtime/common/models/include/models/detection_model_retinaface.h new file mode 100644 index 0000000..ac2c235 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/detection_model_retinaface.h @@ -0,0 +1,74 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include <utils/nms.hpp> + +#include "models/detection_model.h" + +namespace ov { +class Model; +} // namespace ov +struct InferenceResult; +struct ResultBase; + +class ModelRetinaFace : public DetectionModel { +public: + static const int LANDMARKS_NUM = 5; + static const int INIT_VECTOR_SIZE = 200; + /// Loads model and performs required initialization + /// @param model_name name of model to load + /// @param confidenceThreshold - threshold to eliminate low-confidence detections. + /// Any detected object with confidence lower than this threshold will be ignored. + /// @param useAutoResize - if true, image will be resized by openvino. + /// @param boxIOUThreshold - threshold for NMS boxes filtering, varies in [0.0, 1.0] range. + /// @param layout - model input layout + ModelRetinaFace(const std::string& model_name, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout = ""); + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + struct AnchorCfgLine { + int stride; + std::vector<int> scales; + int baseSize; + std::vector<int> ratios; + }; + + bool shouldDetectMasks; + bool shouldDetectLandmarks; + const float boxIOUThreshold; + const float maskThreshold; + float landmarkStd; + + enum OutputType { OUT_BOXES, OUT_SCORES, OUT_LANDMARKS, OUT_MASKSCORES, OUT_MAX }; + + std::vector<std::string> separateOutputsNames[OUT_MAX]; + const std::vector<AnchorCfgLine> anchorCfg; + std::map<int, std::vector<Anchor>> anchorsFpn; + std::vector<std::vector<Anchor>> anchors; + + void generateAnchorsFpn(); + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; +}; diff --git a/python/openvino/runtime/common/models/include/models/detection_model_retinaface_pt.h b/python/openvino/runtime/common/models/include/models/detection_model_retinaface_pt.h new file mode 100644 index 0000000..68cc907 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/detection_model_retinaface_pt.h @@ -0,0 +1,81 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <memory> +#include <string> +#include <vector> + +#include <opencv2/core/types.hpp> +#include <utils/nms.hpp> + +#include "models/detection_model.h" + +namespace ov { +class Model; +class Tensor; +} // namespace ov +struct InferenceResult; +struct ResultBase; + +class ModelRetinaFacePT : public DetectionModel { +public: + struct Box { + float cX; + float cY; + float width; + float height; + }; + + /// Loads model and performs required initialization + /// @param model_name name of model to load + /// @param confidenceThreshold - threshold to eliminate low-confidence detections. + /// Any detected object with confidence lower than this threshold will be ignored. + /// @param useAutoResize - if true, image will be resized by openvino. + /// @param boxIOUThreshold - threshold for NMS boxes filtering, varies in [0.0, 1.0] range. + /// @param layout - model input layout + ModelRetinaFacePT(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout = ""); + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + size_t landmarksNum; + const float boxIOUThreshold; + float variance[2] = {0.1f, 0.2f}; + + enum OutputType { OUT_BOXES, OUT_SCORES, OUT_LANDMARKS, OUT_MAX }; + + std::vector<ModelRetinaFacePT::Box> priors; + + std::vector<size_t> filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold); + std::vector<float> getFilteredScores(const ov::Tensor& scoresTensor, const std::vector<size_t>& indicies); + std::vector<cv::Point2f> getFilteredLandmarks(const ov::Tensor& landmarksTensor, + const std::vector<size_t>& indicies, + int imgWidth, + int imgHeight); + std::vector<ModelRetinaFacePT::Box> generatePriorData(); + std::vector<Anchor> getFilteredProposals(const ov::Tensor& boxesTensor, + const std::vector<size_t>& indicies, + int imgWidth, + int imgHeight); + + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; +}; diff --git a/python/openvino/runtime/common/models/include/models/detection_model_ssd.h b/python/openvino/runtime/common/models/include/models/detection_model_ssd.h new file mode 100644 index 0000000..646d7b0 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/detection_model_ssd.h @@ -0,0 +1,63 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <memory> +#include <string> +#include <vector> + +#include "models/detection_model.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; + +class ModelSSD : public DetectionModel { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param confidenceThreshold - threshold to eliminate low-confidence detections. + /// Any detected object with confidence lower than this threshold will be ignored. + /// @param useAutoResize - if true, image will be resized by openvino. + /// Otherwise, image will be preprocessed and resized using OpenCV routines. + /// @param labels - array of labels for every class. If this array is empty or contains less elements + /// than actual classes number, default "Label #N" will be shown for missing items. + /// @param layout - model input layout + ModelSSD(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + const std::vector<std::string>& labels = std::vector<std::string>(), + const std::string& layout = ""); + + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + std::unique_ptr<ResultBase> postprocessSingleOutput(InferenceResult& infResult); + std::unique_ptr<ResultBase> postprocessMultipleOutputs(InferenceResult& infResult); + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + void prepareSingleOutput(std::shared_ptr<ov::Model>& model); + void prepareMultipleOutputs(std::shared_ptr<ov::Model>& model); + size_t objectSize = 0; + size_t detectionsNumId = 0; +}; diff --git a/python/openvino/runtime/common/models/include/models/detection_model_yolo.h b/python/openvino/runtime/common/models/include/models/detection_model_yolo.h new file mode 100644 index 0000000..38b0b64 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/detection_model_yolo.h @@ -0,0 +1,107 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include <openvino/op/region_yolo.hpp> +#include <openvino/openvino.hpp> + +#include "models/detection_model.h" + +struct DetectedObject; +struct InferenceResult; +struct ResultBase; + +class ModelYolo : public DetectionModel { +protected: + class Region { + public: + int num = 0; + size_t classes = 0; + int coords = 0; + std::vector<float> anchors; + size_t outputWidth = 0; + size_t outputHeight = 0; + + Region(const std::shared_ptr<ov::op::v0::RegionYolo>& regionYolo); + Region(size_t classes, + int coords, + const std::vector<float>& anchors, + const std::vector<int64_t>& masks, + size_t outputWidth, + size_t outputHeight); + }; + +public: + enum YoloVersion { YOLO_V1V2, YOLO_V3, YOLO_V4, YOLO_V4_TINY, YOLOF }; + + /// Constructor. + /// @param modelFileName name of model to load + /// @param confidenceThreshold - threshold to eliminate low-confidence detections. + /// Any detected object with confidence lower than this threshold will be ignored. + /// @param useAutoResize - if true, image will be resized by openvino. + /// Otherwise, image will be preprocessed and resized using OpenCV routines. + /// @param useAdvancedPostprocessing - if true, an advanced algorithm for filtering/postprocessing will be used + /// (with better processing of multiple crossing objects). Otherwise, classic algorithm will be used. + /// @param boxIOUThreshold - threshold to treat separate output regions as one object for filtering + /// during postprocessing (only one of them should stay). The default value is 0.5 + /// @param labels - array of labels for every class. If this array is empty or contains less elements + /// than actual classes number, default "Label #N" will be shown for missing items. + /// @param anchors - vector of anchors coordinates. Required for YOLOv4, for other versions it may be omitted. + /// @param masks - vector of masks values. Required for YOLOv4, for other versions it may be omitted. + /// @param layout - model input layout + ModelYolo(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + bool useAdvancedPostprocessing = true, + float boxIOUThreshold = 0.5, + const std::vector<std::string>& labels = std::vector<std::string>(), + const std::vector<float>& anchors = std::vector<float>(), + const std::vector<int64_t>& masks = std::vector<int64_t>(), + const std::string& layout = ""); + + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + + void parseYOLOOutput(const std::string& output_name, + const ov::Tensor& tensor, + const unsigned long resized_im_h, + const unsigned long resized_im_w, + const unsigned long original_im_h, + const unsigned long original_im_w, + std::vector<DetectedObject>& objects); + + static int calculateEntryIndex(int entriesNum, int lcoords, size_t lclasses, int location, int entry); + static double intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2); + + std::map<std::string, Region> regions; + double boxIOUThreshold; + bool useAdvancedPostprocessing; + bool isObjConf = 1; + YoloVersion yoloVersion; + const std::vector<float> presetAnchors; + const std::vector<int64_t> presetMasks; + ov::Layout yoloRegionLayout = "NCHW"; +}; diff --git a/python/openvino/runtime/common/models/include/models/detection_model_yolov3_onnx.h b/python/openvino/runtime/common/models/include/models/detection_model_yolov3_onnx.h new file mode 100644 index 0000000..66c4f03 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/detection_model_yolov3_onnx.h @@ -0,0 +1,50 @@ +/* +// Copyright (C) 2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include <string> +#include <vector> + +#include <openvino/openvino.hpp> + +#include "models/detection_model.h" + +class ModelYoloV3ONNX: public DetectionModel { +public: + /// Constructor. + /// @param modelFileName name of model to load + /// @param confidenceThreshold - threshold to eliminate low-confidence detections. + /// Any detected object with confidence lower than this threshold will be ignored. + /// @param labels - array of labels for every class. If this array is empty or contains less elements + /// than actual classes number, default "Label #N" will be shown for missing items. + /// @param layout - model input layout + ModelYoloV3ONNX(const std::string& modelFileName, + float confidenceThreshold, + const std::vector<std::string>& labels = std::vector<std::string>(), + const std::string& layout = ""); + + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + + std::string boxesOutputName; + std::string scoresOutputName; + std::string indicesOutputName; + static const int numberOfClasses = 80; +}; diff --git a/python/openvino/runtime/common/models/include/models/detection_model_yolox.h b/python/openvino/runtime/common/models/include/models/detection_model_yolox.h new file mode 100644 index 0000000..d7e4ea3 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/detection_model_yolox.h @@ -0,0 +1,54 @@ +/* +// Copyright (C) 2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <memory> +#include <string> +#include <vector> + +#include <openvino/openvino.hpp> + +#include "models/detection_model.h" + +class ModelYoloX: public DetectionModel { +public: + /// Constructor. + /// @param modelFileName name of model to load + /// @param confidenceThreshold - threshold to eliminate low-confidence detections. + /// Any detected object with confidence lower than this threshold will be ignored. + /// @param boxIOUThreshold - threshold to treat separate output regions as one object for filtering + /// during postprocessing (only one of them should stay). The default value is 0.5 + /// @param labels - array of labels for every class. If this array is empty or contains less elements + /// than actual classes number, default "Label #N" will be shown for missing items. + /// @param layout - model input layout + ModelYoloX(const std::string& modelFileName, + float confidenceThreshold, + float boxIOUThreshold = 0.5, + const std::vector<std::string>& labels = std::vector<std::string>(), + const std::string& layout = ""); + + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + void setStridesGrids(); + + double boxIOUThreshold; + std::vector<std::pair<size_t, size_t>> grids; + std::vector<size_t> expandedStrides; + static const size_t numberOfClasses = 80; +}; diff --git a/python/openvino/runtime/common/models/include/models/hpe_model_associative_embedding.h b/python/openvino/runtime/common/models/include/models/hpe_model_associative_embedding.h new file mode 100644 index 0000000..66e217e --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/hpe_model_associative_embedding.h @@ -0,0 +1,89 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <memory> +#include <string> +#include <vector> + +#include <opencv2/core.hpp> + +#include <utils/image_utils.h> + +#include "models/image_model.h" + +namespace ov { +class InferRequest; +class Model; +class Shape; +} // namespace ov +struct HumanPose; +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; + +class HpeAssociativeEmbedding : public ImageModel { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param aspectRatio - the ratio of input width to its height. + /// @param targetSize - the length of a short image side used for model reshaping. + /// @param confidenceThreshold - threshold to eliminate low-confidence poses. + /// Any pose with confidence lower than this threshold will be ignored. + /// @param layout - model input layout + HpeAssociativeEmbedding(const std::string& modelFileName, + double aspectRatio, + int targetSize, + float confidenceThreshold, + const std::string& layout = "", + float delta = 0.0, + RESIZE_MODE resizeMode = RESIZE_KEEP_ASPECT); + + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + + cv::Size inputLayerSize; + double aspectRatio; + int targetSize; + float confidenceThreshold; + float delta; + + std::string embeddingsTensorName; + std::string heatmapsTensorName; + std::string nmsHeatmapsTensorName; + + static const int numJoints = 17; + static const int stride = 32; + static const int maxNumPeople = 30; + static const cv::Vec3f meanPixel; + static const float detectionThreshold; + static const float tagThreshold; + + void changeInputSize(std::shared_ptr<ov::Model>& model); + + std::string findTensorByName(const std::string& tensorName, const std::vector<std::string>& outputsNames); + + std::vector<cv::Mat> split(float* data, const ov::Shape& shape); + + std::vector<HumanPose> extractPoses(std::vector<cv::Mat>& heatMaps, + const std::vector<cv::Mat>& aembdsMaps, + const std::vector<cv::Mat>& nmsHeatMaps) const; +}; diff --git a/python/openvino/runtime/common/models/include/models/hpe_model_openpose.h b/python/openvino/runtime/common/models/include/models/hpe_model_openpose.h new file mode 100644 index 0000000..d5e1ce7 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/hpe_model_openpose.h @@ -0,0 +1,78 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <memory> +#include <string> +#include <vector> + +#include <opencv2/core.hpp> + +#include "models/image_model.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct HumanPose; +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; + +class HPEOpenPose : public ImageModel { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param aspectRatio - the ratio of input width to its height. + /// @param targetSize - the height used for model reshaping. + /// @param confidenceThreshold - threshold to eliminate low-confidence keypoints. + /// @param layout - model input layout + HPEOpenPose(const std::string& modelFileName, + double aspectRatio, + int targetSize, + float confidenceThreshold, + const std::string& layout = ""); + + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + + static const size_t keypointsNumber = 18; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + + static const int minJointsNumber = 3; + static const int stride = 8; + static const int upsampleRatio = 4; + static const cv::Vec3f meanPixel; + static const float minPeaksDistance; + static const float midPointsScoreThreshold; + static const float foundMidPointsRatioThreshold; + static const float minSubsetScore; + cv::Size inputLayerSize; + double aspectRatio; + int targetSize; + float confidenceThreshold; + + std::vector<HumanPose> extractPoses(const std::vector<cv::Mat>& heatMaps, const std::vector<cv::Mat>& pafs) const; + void resizeFeatureMaps(std::vector<cv::Mat>& featureMaps) const; + + void changeInputSize(std::shared_ptr<ov::Model>& model); +}; diff --git a/python/openvino/runtime/common/models/include/models/image_model.h b/python/openvino/runtime/common/models/include/models/image_model.h new file mode 100644 index 0000000..b18daa1 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/image_model.h @@ -0,0 +1,49 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <memory> +#include <string> + +#include "models/model_base.h" +#include "utils/image_utils.h" + +namespace ov { +class InferRequest; +} // namespace ov +struct InputData; +struct InternalModelData; + +class ImageModel : public ModelBase { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param useAutoResize - if true, image is resized by openvino + /// @param layout - model input layout + ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = ""); + + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + +protected: + bool useAutoResize; + + size_t netInputHeight = 0; + size_t netInputWidth = 0; + cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR; + RESIZE_MODE resizeMode = RESIZE_FILL; +}; diff --git a/python/openvino/runtime/common/models/include/models/input_data.h b/python/openvino/runtime/common/models/include/models/input_data.h new file mode 100644 index 0000000..bff9fa5 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/input_data.h @@ -0,0 +1,41 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <opencv2/opencv.hpp> + +struct InputData { + virtual ~InputData() {} + + template <class T> + T& asRef() { + return dynamic_cast<T&>(*this); + } + + template <class T> + const T& asRef() const { + return dynamic_cast<const T&>(*this); + } +}; + +struct ImageInputData : public InputData { + cv::Mat inputImage; + + ImageInputData() {} + ImageInputData(const cv::Mat& img) { + inputImage = img; + } +}; diff --git a/python/openvino/runtime/common/models/include/models/internal_model_data.h b/python/openvino/runtime/common/models/include/models/internal_model_data.h new file mode 100644 index 0000000..61d7744 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/internal_model_data.h @@ -0,0 +1,48 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +struct InternalModelData { + virtual ~InternalModelData() {} + + template <class T> + T& asRef() { + return dynamic_cast<T&>(*this); + } + + template <class T> + const T& asRef() const { + return dynamic_cast<const T&>(*this); + } +}; + +struct InternalImageModelData : public InternalModelData { + InternalImageModelData(int width, int height) : inputImgWidth(width), inputImgHeight(height) {} + + int inputImgWidth; + int inputImgHeight; +}; + +struct InternalScaleData : public InternalImageModelData { + InternalScaleData(int width, int height, float scaleX, float scaleY) + : InternalImageModelData(width, height), + scaleX(scaleX), + scaleY(scaleY) {} + + float scaleX; + float scaleY; +}; diff --git a/python/openvino/runtime/common/models/include/models/jpeg_restoration_model.h b/python/openvino/runtime/common/models/include/models/jpeg_restoration_model.h new file mode 100644 index 0000000..8b22ac2 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/jpeg_restoration_model.h @@ -0,0 +1,55 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writingb software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include <memory> +#include <string> + +#include <opencv2/core/types.hpp> + +#include "models/image_model.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; +class JPEGRestorationModel : public ImageModel { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param inputImgSize size of image to set model input shape + /// @param jpegCompression flag allows to perform compression before the inference + /// @param layout - model input layout + JPEGRestorationModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + bool jpegCompression, + const std::string& layout = ""); + + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + void changeInputSize(std::shared_ptr<ov::Model>& model); + + static const size_t stride = 8; + bool jpegCompression = false; +}; diff --git a/python/openvino/runtime/common/models/include/models/model_base.h b/python/openvino/runtime/common/models/include/models/model_base.h new file mode 100644 index 0000000..c6d9cc1 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/model_base.h @@ -0,0 +1,77 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <utils/args_helper.hpp> +#include <utils/config_factory.h> +#include <utils/ocv_common.hpp> + +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; + +class ModelBase { +public: + ModelBase(const std::string& modelFileName, const std::string& layout = "") + : modelFileName(modelFileName), + inputsLayouts(parseLayoutString(layout)) {} + + virtual ~ModelBase() {} + + virtual std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) = 0; + virtual ov::CompiledModel compileModel(const ModelConfig& config, ov::Core& core); + virtual void onLoadCompleted(const std::vector<ov::InferRequest>& requests) {} + virtual std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) = 0; + + const std::vector<std::string>& getOutputsNames() const { + return outputsNames; + } + const std::vector<std::string>& getInputsNames() const { + return inputsNames; + } + + std::string getModelFileName() { + return modelFileName; + } + + void setInputsPreprocessing(bool reverseInputChannels, + const std::string& meanValues, + const std::string& scaleValues) { + this->inputTransform = InputTransform(reverseInputChannels, meanValues, scaleValues); + } + +protected: + virtual void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) = 0; + + std::shared_ptr<ov::Model> prepareModel(ov::Core& core); + + InputTransform inputTransform = InputTransform(); + std::vector<std::string> inputsNames; + std::vector<std::string> outputsNames; + ov::CompiledModel compiledModel; + std::string modelFileName; + ModelConfig config = {}; + std::map<std::string, ov::Layout> inputsLayouts; + ov::Layout getInputLayout(const ov::Output<ov::Node>& input); +}; diff --git a/python/openvino/runtime/common/models/include/models/openpose_decoder.h b/python/openvino/runtime/common/models/include/models/openpose_decoder.h new file mode 100644 index 0000000..d40e56e --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/openpose_decoder.h @@ -0,0 +1,62 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stddef.h> + +#include <vector> + +#include <opencv2/core.hpp> + +struct HumanPose; + +struct Peak { + Peak(const int id = -1, const cv::Point2f& pos = cv::Point2f(), const float score = 0.0f); + + int id; + cv::Point2f pos; + float score; +}; + +struct HumanPoseByPeaksIndices { + explicit HumanPoseByPeaksIndices(const int keypointsNumber); + + std::vector<int> peaksIndices; + int nJoints; + float score; +}; + +struct TwoJointsConnection { + TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score); + + int firstJointIdx; + int secondJointIdx; + float score; +}; + +void findPeaks(const std::vector<cv::Mat>& heatMaps, + const float minPeaksDistance, + std::vector<std::vector<Peak>>& allPeaks, + int heatMapId, + float confidenceThreshold); + +std::vector<HumanPose> groupPeaksToPoses(const std::vector<std::vector<Peak>>& allPeaks, + const std::vector<cv::Mat>& pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore); diff --git a/python/openvino/runtime/common/models/include/models/results.h b/python/openvino/runtime/common/models/include/models/results.h new file mode 100644 index 0000000..6b3a89d --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/results.h @@ -0,0 +1,122 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include <opencv2/core.hpp> +#include <openvino/openvino.hpp> + +#include "internal_model_data.h" + +struct MetaData; +struct ResultBase { + ResultBase(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr) + : frameId(frameId), + metaData(metaData) {} + virtual ~ResultBase() {} + + int64_t frameId; + + std::shared_ptr<MetaData> metaData; + bool IsEmpty() { + return frameId < 0; + } + + template <class T> + T& asRef() { + return dynamic_cast<T&>(*this); + } + + template <class T> + const T& asRef() const { + return dynamic_cast<const T&>(*this); + } +}; + +struct InferenceResult : public ResultBase { + std::shared_ptr<InternalModelData> internalModelData; + std::map<std::string, ov::Tensor> outputsData; + + /// Returns the first output tensor + /// This function is a useful addition to direct access to outputs list as many models have only one output + /// @returns first output tensor + ov::Tensor getFirstOutputTensor() { + if (outputsData.empty()) { + throw std::out_of_range("Outputs map is empty."); + } + return outputsData.begin()->second; + } + + /// Returns true if object contains no valid data + /// @returns true if object contains no valid data + bool IsEmpty() { + return outputsData.empty(); + } +}; + +struct ClassificationResult : public ResultBase { + ClassificationResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr) + : ResultBase(frameId, metaData) {} + + struct Classification { + unsigned int id; + std::string label; + float score; + + Classification(unsigned int id, const std::string& label, float score) : id(id), label(label), score(score) {} + }; + + std::vector<Classification> topLabels; +}; + +struct DetectedObject : public cv::Rect2f { + unsigned int labelID; + std::string label; + float confidence; +}; + +struct DetectionResult : public ResultBase { + DetectionResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr) + : ResultBase(frameId, metaData) {} + std::vector<DetectedObject> objects; +}; + +struct RetinaFaceDetectionResult : public DetectionResult { + RetinaFaceDetectionResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr) + : DetectionResult(frameId, metaData) {} + std::vector<cv::Point2f> landmarks; +}; + +struct ImageResult : public ResultBase { + ImageResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr) + : ResultBase(frameId, metaData) {} + cv::Mat resultImage; +}; + +struct HumanPose { + std::vector<cv::Point2f> keypoints; + float score; +}; + +struct HumanPoseResult : public ResultBase { + HumanPoseResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr) + : ResultBase(frameId, metaData) {} + std::vector<HumanPose> poses; +}; diff --git a/python/openvino/runtime/common/models/include/models/segmentation_model.h b/python/openvino/runtime/common/models/include/models/segmentation_model.h new file mode 100644 index 0000000..9d4d2cb --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/segmentation_model.h @@ -0,0 +1,50 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writingb software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <memory> +#include <string> +#include <vector> + +#include "models/image_model.h" + +namespace ov { +class Model; +} // namespace ov +struct InferenceResult; +struct ResultBase; + +#pragma once +class SegmentationModel : public ImageModel { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param useAutoResize - if true, image will be resized by openvino. + /// Otherwise, image will be preprocessed and resized using OpenCV routines. + /// @param layout - model input layout + SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = ""); + + static std::vector<std::string> loadLabels(const std::string& labelFilename); + + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; + + int outHeight = 0; + int outWidth = 0; + int outChannels = 0; +}; diff --git a/python/openvino/runtime/common/models/include/models/style_transfer_model.h b/python/openvino/runtime/common/models/include/models/style_transfer_model.h new file mode 100644 index 0000000..9bcc541 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/style_transfer_model.h @@ -0,0 +1,43 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writingb software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <memory> +#include <string> + +#include "models/image_model.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; + +class StyleTransferModel : public ImageModel { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param layout - model input layout + StyleTransferModel(const std::string& modelFileName, const std::string& layout = ""); + + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; +}; diff --git a/python/openvino/runtime/common/models/include/models/super_resolution_model.h b/python/openvino/runtime/common/models/include/models/super_resolution_model.h new file mode 100644 index 0000000..773b5c3 --- /dev/null +++ b/python/openvino/runtime/common/models/include/models/super_resolution_model.h @@ -0,0 +1,49 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writingb software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <memory> +#include <string> + +#include <opencv2/core/types.hpp> + +#include "models/image_model.h" + +namespace ov { +class InferRequest; +class Model; +} // namespace ov +struct InferenceResult; +struct InputData; +struct InternalModelData; +struct ResultBase; + +class SuperResolutionModel : public ImageModel { +public: + /// Constructor + /// @param modelFileName name of model to load + /// @param layout - model input layout + SuperResolutionModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + const std::string& layout = ""); + + std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override; + std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override; + +protected: + void changeInputSize(std::shared_ptr<ov::Model>& model, int coeff); + void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override; +}; diff --git a/python/openvino/runtime/common/models/src/associative_embedding_decoder.cpp b/python/openvino/runtime/common/models/src/associative_embedding_decoder.cpp new file mode 100644 index 0000000..b1e8285 --- /dev/null +++ b/python/openvino/runtime/common/models/src/associative_embedding_decoder.cpp @@ -0,0 +1,201 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/associative_embedding_decoder.h" + +#include <algorithm> +#include <iterator> +#include <limits> +#include <numeric> +#include <vector> + +#include <utils/kuhn_munkres.hpp> + +void findPeaks(const std::vector<cv::Mat>& nmsHeatMaps, + const std::vector<cv::Mat>& aembdsMaps, + std::vector<std::vector<Peak>>& allPeaks, + size_t jointId, + size_t maxNumPeople, + float detectionThreshold) { + const cv::Mat& nmsHeatMap = nmsHeatMaps[jointId]; + const float* heatMapData = nmsHeatMap.ptr<float>(); + cv::Size outputSize = nmsHeatMap.size(); + + std::vector<int> indices(outputSize.area()); + std::iota(std::begin(indices), std::end(indices), 0); + std::partial_sort(std::begin(indices), + std::begin(indices) + maxNumPeople, + std::end(indices), + [heatMapData](int l, int r) { + return heatMapData[l] > heatMapData[r]; + }); + + for (size_t personId = 0; personId < maxNumPeople; personId++) { + int index = indices[personId]; + int x = index / outputSize.width; + int y = index % outputSize.width; + float tag = aembdsMaps[jointId].at<float>(x, y); + float score = heatMapData[index]; + allPeaks[jointId].reserve(maxNumPeople); + if (score > detectionThreshold) { + allPeaks[jointId].emplace_back(Peak{cv::Point2f(static_cast<float>(x), static_cast<float>(y)), score, tag}); + } + } +} + +std::vector<Pose> matchByTag(std::vector<std::vector<Peak>>& allPeaks, + size_t maxNumPeople, + size_t numJoints, + float tagThreshold) { + size_t jointOrder[]{0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16}; + std::vector<Pose> allPoses; + for (size_t jointId : jointOrder) { + std::vector<Peak>& jointPeaks = allPeaks[jointId]; + std::vector<float> tags; + for (auto& peak : jointPeaks) { + tags.push_back(peak.tag); + } + if (allPoses.empty()) { + for (size_t personId = 0; personId < jointPeaks.size(); personId++) { + Peak peak = jointPeaks[personId]; + Pose pose = Pose(numJoints); + pose.add(jointId, peak); + allPoses.push_back(pose); + } + continue; + } + if (jointPeaks.empty() || (allPoses.size() == maxNumPeople)) { + continue; + } + std::vector<float> posesTags; + std::vector<cv::Point2f> posesCenters; + for (auto& pose : allPoses) { + posesTags.push_back(pose.getPoseTag()); + posesCenters.push_back(pose.getPoseCenter()); + } + size_t numAdded = tags.size(); + size_t numGrouped = posesTags.size(); + cv::Mat tagsDiff(numAdded, numGrouped, CV_32F); + cv::Mat matchingCost(numAdded, numGrouped, CV_32F); + std::vector<float> dists(numAdded); + for (size_t j = 0; j < numGrouped; j++) { + float minDist = std::numeric_limits<float>::max(); + // Compute euclidean distance (in spatial space) between the pose center and all joints. + const cv::Point2f center = posesCenters.at(j); + for (size_t i = 0; i < numAdded; i++) { + cv::Point2f v = jointPeaks.at(i).keypoint - center; + float dist = std::sqrt(v.x * v.x + v.y * v.y); + dists[i] = dist; + minDist = std::min(dist, minDist); + } + // Compute semantic distance (in embedding space) between the pose tag and all joints + // and corresponding matching costs. + auto poseTag = posesTags[j]; + for (size_t i = 0; i < numAdded; i++) { + float diff = static_cast<float>(cv::norm(tags[i] - poseTag)); + tagsDiff.at<float>(i, j) = diff; + if (diff < tagThreshold) { + diff *= dists[i] / (minDist + 1e-10f); + } + matchingCost.at<float>(i, j) = std::round(diff) * 100 - jointPeaks[i].score; + } + } + + if (numAdded > numGrouped) { + cv::copyMakeBorder(matchingCost, + matchingCost, + 0, + 0, + 0, + numAdded - numGrouped, + cv::BORDER_CONSTANT, + 10000000); + } + // Get pairs + auto res = KuhnMunkres().Solve(matchingCost); + for (size_t row = 0; row < res.size(); row++) { + size_t col = res[row]; + if (row < numAdded && col < numGrouped && tagsDiff.at<float>(row, col) < tagThreshold) { + allPoses[col].add(jointId, jointPeaks[row]); + } else { + Pose pose = Pose(numJoints); + pose.add(jointId, jointPeaks[row]); + allPoses.push_back(pose); + } + } + } + return allPoses; +} + +namespace { +cv::Point2f adjustLocation(const int x, const int y, const cv::Mat& heatMap) { + cv::Point2f delta(0.f, 0.f); + int width = heatMap.cols; + int height = heatMap.rows; + if ((1 < x) && (x < width - 1) && (1 < y) && (y < height - 1)) { + auto diffX = heatMap.at<float>(y, x + 1) - heatMap.at<float>(y, x - 1); + auto diffY = heatMap.at<float>(y + 1, x) - heatMap.at<float>(y - 1, x); + delta.x = diffX > 0 ? 0.25f : -0.25f; + delta.y = diffY > 0 ? 0.25f : -0.25f; + } + return delta; +} +} // namespace + +void adjustAndRefine(std::vector<Pose>& allPoses, + const std::vector<cv::Mat>& heatMaps, + const std::vector<cv::Mat>& aembdsMaps, + int poseId, + const float delta) { + Pose& pose = allPoses[poseId]; + float poseTag = pose.getPoseTag(); + for (size_t jointId = 0; jointId < pose.size(); jointId++) { + Peak& peak = pose.getPeak(jointId); + const cv::Mat& heatMap = heatMaps[jointId]; + const cv::Mat& aembds = aembdsMaps[jointId]; + + if (peak.score > 0) { + // Adjust + int x = static_cast<int>(peak.keypoint.x); + int y = static_cast<int>(peak.keypoint.y); + peak.keypoint += adjustLocation(x, y, heatMap); + if (delta) { + peak.keypoint.x += delta; + peak.keypoint.y += delta; + } + } else { + // Refine + // Get position with the closest tag value to the pose tag + cv::Mat diff = cv::abs(aembds - poseTag); + diff.convertTo(diff, CV_32S, 1.0, 0.0); + diff.convertTo(diff, CV_32F); + diff -= heatMap; + double min; + cv::Point2i minLoc; + cv::minMaxLoc(diff, &min, 0, &minLoc); + int x = minLoc.x; + int y = minLoc.y; + float val = heatMap.at<float>(y, x); + if (val > 0) { + peak.keypoint.x = static_cast<float>(x); + peak.keypoint.y = static_cast<float>(y); + peak.keypoint += adjustLocation(x, y, heatMap); + // Peak score is assigned directly, so it does not affect the pose score. + peak.score = val; + } + } + } +} diff --git a/python/openvino/runtime/common/models/src/classification_model.cpp b/python/openvino/runtime/common/models/src/classification_model.cpp new file mode 100644 index 0000000..90bc0d5 --- /dev/null +++ b/python/openvino/runtime/common/models/src/classification_model.cpp @@ -0,0 +1,196 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/classification_model.h" + +#include <algorithm> +#include <fstream> +#include <iterator> +#include <map> +#include <stdexcept> +#include <string> +#include <utility> +#include <vector> + +#include <openvino/op/softmax.hpp> +#include <openvino/op/topk.hpp> +#include <openvino/openvino.hpp> + +#include <utils/slog.hpp> + +#include "models/results.h" + +ClassificationModel::ClassificationModel(const std::string& modelFileName, + size_t nTop, + bool useAutoResize, + const std::vector<std::string>& labels, + const std::string& layout) + : ImageModel(modelFileName, useAutoResize, layout), + nTop(nTop), + labels(labels) {} + +std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& infResult) { + const ov::Tensor& indicesTensor = infResult.outputsData.find(outputsNames[0])->second; + const int* indicesPtr = indicesTensor.data<int>(); + const ov::Tensor& scoresTensor = infResult.outputsData.find(outputsNames[1])->second; + const float* scoresPtr = scoresTensor.data<float>(); + + ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData); + auto retVal = std::unique_ptr<ResultBase>(result); + + result->topLabels.reserve(scoresTensor.get_size()); + for (size_t i = 0; i < scoresTensor.get_size(); ++i) { + int ind = indicesPtr[i]; + if (ind < 0 || ind >= static_cast<int>(labels.size())) { + throw std::runtime_error("Invalid index for the class label is found during postprocessing"); + } + result->topLabels.emplace_back(ind, labels[ind], scoresPtr[i]); + } + + return retVal; +} + +std::vector<std::string> ClassificationModel::loadLabels(const std::string& labelFilename) { + std::vector<std::string> labels; + + /* Read labels */ + std::ifstream inputFile(labelFilename); + if (!inputFile.is_open()) + throw std::runtime_error("Can't open the labels file: " + labelFilename); + std::string labelsLine; + while (std::getline(inputFile, labelsLine)) { + size_t labelBeginIdx = labelsLine.find(' '); + size_t labelEndIdx = labelsLine.find(','); // can be npos when class has only one label + if (labelBeginIdx == std::string::npos) { + throw std::runtime_error("The labels file has incorrect format."); + } + labels.push_back(labelsLine.substr(labelBeginIdx + 1, labelEndIdx - (labelBeginIdx + 1))); + } + if (labels.empty()) + throw std::logic_error("File is empty: " + labelFilename); + + return labels; +} + +void ClassificationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("Classification model wrapper supports topologies with only 1 input"); + } + const auto& input = model->input(); + inputsNames.push_back(input.get_any_name()); + + const ov::Shape& inputShape = input.get_shape(); + const ov::Layout& inputLayout = getInputLayout(input); + + if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("3-channel 4-dimensional model's input is expected"); + } + + const auto width = inputShape[ov::layout::width_idx(inputLayout)]; + const auto height = inputShape[ov::layout::height_idx(inputLayout)]; + if (height != width) { + throw std::logic_error("Model input has incorrect image shape. Must be NxN square." + " Got " + + std::to_string(height) + "x" + std::to_string(width) + "."); + } + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); + + if (useAutoResize) { + ppp.input().tensor().set_spatial_dynamic_shape(); + + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() != 1) { + throw std::logic_error("Classification model wrapper supports topologies with only 1 output"); + } + + const ov::Shape& outputShape = model->output().get_shape(); + if (outputShape.size() != 2 && outputShape.size() != 4) { + throw std::logic_error("Classification model wrapper supports topologies only with" + " 2-dimensional or 4-dimensional output"); + } + + const ov::Layout outputLayout("NCHW"); + if (outputShape.size() == 4 && (outputShape[ov::layout::height_idx(outputLayout)] != 1 || + outputShape[ov::layout::width_idx(outputLayout)] != 1)) { + throw std::logic_error("Classification model wrapper supports topologies only" + " with 4-dimensional output which has last two dimensions of size 1"); + } + + size_t classesNum = outputShape[ov::layout::channels_idx(outputLayout)]; + if (nTop > classesNum) { + throw std::logic_error("The model provides " + std::to_string(classesNum) + " classes, but " + + std::to_string(nTop) + " labels are requested to be predicted"); + } + if (classesNum == labels.size() + 1) { + labels.insert(labels.begin(), "other"); + slog::warn << "Inserted 'other' label as first." << slog::endl; + } else if (classesNum != labels.size()) { + throw std::logic_error("Model's number of classes and parsed labels must match (" + + std::to_string(outputShape[1]) + " and " + std::to_string(labels.size()) + ')'); + } + + ppp.output().tensor().set_element_type(ov::element::f32); + model = ppp.build(); + + // --------------------------- Adding softmax and topK output --------------------------- + auto nodes = model->get_ops(); + auto softmaxNodeIt = std::find_if(std::begin(nodes), std::end(nodes), [](const std::shared_ptr<ov::Node>& op) { + return std::string(op->get_type_name()) == "Softmax"; + }); + + std::shared_ptr<ov::Node> softmaxNode; + if (softmaxNodeIt == nodes.end()) { + auto logitsNode = model->get_output_op(0)->input(0).get_source_output().get_node(); + softmaxNode = std::make_shared<ov::op::v1::Softmax>(logitsNode->output(0), 1); + } else { + softmaxNode = *softmaxNodeIt; + } + const auto k = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{}, std::vector<size_t>{nTop}); + std::shared_ptr<ov::Node> topkNode = std::make_shared<ov::op::v3::TopK>(softmaxNode, + k, + 1, + ov::op::v3::TopK::Mode::MAX, + ov::op::v3::TopK::SortType::SORT_VALUES); + + auto indices = std::make_shared<ov::op::v0::Result>(topkNode->output(0)); + auto scores = std::make_shared<ov::op::v0::Result>(topkNode->output(1)); + ov::ResultVector res({scores, indices}); + model = std::make_shared<ov::Model>(res, model->get_parameters(), "classification"); + + // manually set output tensors name for created topK node + model->outputs()[0].set_names({"indices"}); + outputsNames.push_back("indices"); + model->outputs()[1].set_names({"scores"}); + outputsNames.push_back("scores"); + + // set output precisions + ppp = ov::preprocess::PrePostProcessor(model); + ppp.output("indices").tensor().set_element_type(ov::element::i32); + ppp.output("scores").tensor().set_element_type(ov::element::f32); + model = ppp.build(); +} diff --git a/python/openvino/runtime/common/models/src/deblurring_model.cpp b/python/openvino/runtime/common/models/src/deblurring_model.cpp new file mode 100644 index 0000000..261efb3 --- /dev/null +++ b/python/openvino/runtime/common/models/src/deblurring_model.cpp @@ -0,0 +1,158 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/deblurring_model.h" + +#include <algorithm> +#include <stdexcept> +#include <string> +#include <vector> + +#include <opencv2/imgproc.hpp> +#include <openvino/openvino.hpp> + +#include <utils/ocv_common.hpp> +#include <utils/slog.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" + +DeblurringModel::DeblurringModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + const std::string& layout) + : ImageModel(modelFileName, false, layout) { + netInputHeight = inputImgSize.height; + netInputWidth = inputImgSize.width; +} + +void DeblurringModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("Deblurring model wrapper supports topologies with only 1 input"); + } + + inputsNames.push_back(model->input().get_any_name()); + + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 || + inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("3-channel 4-dimensional model's input is expected"); + } + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() != 1) { + throw std::logic_error("Deblurring model wrapper supports topologies with only 1 output"); + } + + outputsNames.push_back(model->output().get_any_name()); + + const ov::Shape& outputShape = model->output().get_shape(); + const ov::Layout outputLayout("NCHW"); + if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 || + outputShape[ov::layout::channels_idx(outputLayout)] != 3) { + throw std::logic_error("3-channel 4-dimensional model's output is expected"); + } + + ppp.output().tensor().set_element_type(ov::element::f32); + model = ppp.build(); + + changeInputSize(model); +} + +void DeblurringModel::changeInputSize(std::shared_ptr<ov::Model>& model) { + const ov::Layout& layout = ov::layout::get_layout(model->input()); + ov::Shape inputShape = model->input().get_shape(); + + const auto batchId = ov::layout::batch_idx(layout); + const auto heightId = ov::layout::height_idx(layout); + const auto widthId = ov::layout::width_idx(layout); + + if (inputShape[heightId] % stride || inputShape[widthId] % stride) { + throw std::logic_error("Model input shape HxW = " + std::to_string(inputShape[heightId]) + "x" + + std::to_string(inputShape[widthId]) + "must be divisible by stride " + + std::to_string(stride)); + } + + netInputHeight = static_cast<int>((netInputHeight + stride - 1) / stride) * stride; + netInputWidth = static_cast<int>((netInputWidth + stride - 1) / stride) * stride; + + inputShape[batchId] = 1; + inputShape[heightId] = netInputHeight; + inputShape[widthId] = netInputWidth; + + model->reshape(inputShape); +} + +std::shared_ptr<InternalModelData> DeblurringModel::preprocess(const InputData& inputData, ov::InferRequest& request) { + auto& image = inputData.asRef<ImageInputData>().inputImage; + size_t h = image.rows; + size_t w = image.cols; + cv::Mat resizedImage; + + if (netInputHeight - stride < h && h <= netInputHeight && netInputWidth - stride < w && w <= netInputWidth) { + int bottom = netInputHeight - h; + int right = netInputWidth - w; + cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, cv::BORDER_CONSTANT, 0); + } else { + slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl; + cv::resize(image, resizedImage, cv::Size(netInputWidth, netInputHeight)); + } + request.set_input_tensor(wrapMat2Tensor(resizedImage)); + + return std::make_shared<InternalImageModelData>(image.cols, image.rows); +} + +std::unique_ptr<ResultBase> DeblurringModel::postprocess(InferenceResult& infResult) { + ImageResult* result = new ImageResult; + *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult); + + const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>(); + const auto outputData = infResult.getFirstOutputTensor().data<float>(); + + std::vector<cv::Mat> imgPlanes; + const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape(); + const ov::Layout outputLayout("NCHW"); + size_t outHeight = static_cast<int>((outputShape[ov::layout::height_idx(outputLayout)])); + size_t outWidth = static_cast<int>((outputShape[ov::layout::width_idx(outputLayout)])); + size_t numOfPixels = outWidth * outHeight; + imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))}; + cv::Mat resultImg; + cv::merge(imgPlanes, resultImg); + + if (netInputHeight - stride < static_cast<size_t>(inputImgSize.inputImgHeight) && + static_cast<size_t>(inputImgSize.inputImgHeight) <= netInputHeight && + netInputWidth - stride < static_cast<size_t>(inputImgSize.inputImgWidth) && + static_cast<size_t>(inputImgSize.inputImgWidth) <= netInputWidth) { + result->resultImage = resultImg(cv::Rect(0, 0, inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); + } else { + cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); + } + + result->resultImage.convertTo(result->resultImage, CV_8UC3, 255); + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/detection_model.cpp b/python/openvino/runtime/common/models/src/detection_model.cpp new file mode 100644 index 0000000..83e2d22 --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model.cpp @@ -0,0 +1,52 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model.h" + +#include <fstream> +#include <stdexcept> +#include <string> +#include <vector> + +#include "models/image_model.h" + +DetectionModel::DetectionModel(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + const std::vector<std::string>& labels, + const std::string& layout) + : ImageModel(modelFileName, useAutoResize, layout), + confidenceThreshold(confidenceThreshold), + labels(labels) {} + +std::vector<std::string> DetectionModel::loadLabels(const std::string& labelFilename) { + std::vector<std::string> labelsList; + + /* Read labels (if any) */ + if (!labelFilename.empty()) { + std::ifstream inputFile(labelFilename); + if (!inputFile.is_open()) + throw std::runtime_error("Can't open the labels file: " + labelFilename); + std::string label; + while (std::getline(inputFile, label)) { + labelsList.push_back(label); + } + if (labelsList.empty()) + throw std::logic_error("File is empty: " + labelFilename); + } + + return labelsList; +} diff --git a/python/openvino/runtime/common/models/src/detection_model_centernet.cpp b/python/openvino/runtime/common/models/src/detection_model_centernet.cpp new file mode 100644 index 0000000..eac42a7 --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_centernet.cpp @@ -0,0 +1,302 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_centernet.h" + +#include <stddef.h> + +#include <algorithm> +#include <cmath> +#include <map> +#include <stdexcept> +#include <utility> + +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/image_utils.h> +#include <utils/ocv_common.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" + +ModelCenterNet::ModelCenterNet(const std::string& modelFileName, + float confidenceThreshold, + const std::vector<std::string>& labels, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {} + +void ModelCenterNet::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("CenterNet model wrapper expects models that have only 1 input"); + } + + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("Expected 3-channel input"); + } + + ov::preprocess::PrePostProcessor ppp(model); + inputTransform.setPrecision(ppp, model->input().get_any_name()); + ppp.input().tensor().set_layout("NHWC"); + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Reading image input parameters ------------------------------------------- + inputsNames.push_back(model->input().get_any_name()); + netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; + netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() != 3) { + throw std::logic_error("CenterNet model wrapper expects models that have 3 outputs"); + } + + const ov::Layout outLayout{"NCHW"}; + for (const auto& output : model->outputs()) { + auto outTensorName = output.get_any_name(); + outputsNames.push_back(outTensorName); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outLayout); + } + std::sort(outputsNames.begin(), outputsNames.end()); + model = ppp.build(); +} + +cv::Point2f getDir(const cv::Point2f& srcPoint, float rotRadius) { + float sn = sinf(rotRadius); + float cs = cosf(rotRadius); + + cv::Point2f srcResult(0.0f, 0.0f); + srcResult.x = srcPoint.x * cs - srcPoint.y * sn; + srcResult.y = srcPoint.x * sn + srcPoint.y * cs; + + return srcResult; +} + +cv::Point2f get3rdPoint(const cv::Point2f& a, const cv::Point2f& b) { + cv::Point2f direct = a - b; + return b + cv::Point2f(-direct.y, direct.x); +} + +cv::Mat getAffineTransform(float centerX, + float centerY, + int srcW, + float rot, + size_t outputWidth, + size_t outputHeight, + bool inv = false) { + float rotRad = static_cast<float>(CV_PI) * rot / 180.0f; + auto srcDir = getDir({0.0f, -0.5f * srcW}, rotRad); + cv::Point2f dstDir(0.0f, -0.5f * outputWidth); + std::vector<cv::Point2f> src(3, {0.0f, 0.0f}); + std::vector<cv::Point2f> dst(3, {0.0f, 0.0f}); + + src[0] = {centerX, centerY}; + src[1] = srcDir + src[0]; + src[2] = get3rdPoint(src[0], src[1]); + + dst[0] = {outputWidth * 0.5f, outputHeight * 0.5f}; + dst[1] = dst[0] + dstDir; + dst[2] = get3rdPoint(dst[0], dst[1]); + + cv::Mat trans; + if (inv) { + trans = cv::getAffineTransform(dst, src); + } else { + trans = cv::getAffineTransform(src, dst); + } + + return trans; +} + +std::shared_ptr<InternalModelData> ModelCenterNet::preprocess(const InputData& inputData, ov::InferRequest& request) { + auto& img = inputData.asRef<ImageInputData>().inputImage; + const auto& resizedImg = resizeImageExt(img, netInputWidth, netInputHeight, RESIZE_KEEP_ASPECT_LETTERBOX); + + request.set_input_tensor(wrapMat2Tensor(inputTransform(resizedImg))); + return std::make_shared<InternalImageModelData>(img.cols, img.rows); +} + +std::vector<std::pair<size_t, float>> nms(float* scoresPtr, const ov::Shape& shape, float threshold, int kernel = 3) { + std::vector<std::pair<size_t, float>> scores; + scores.reserve(ModelCenterNet::INIT_VECTOR_SIZE); + auto chSize = shape[2] * shape[3]; + + for (size_t i = 0; i < shape[1] * shape[2] * shape[3]; ++i) { + scoresPtr[i] = expf(scoresPtr[i]) / (1 + expf(scoresPtr[i])); + } + + for (size_t ch = 0; ch < shape[1]; ++ch) { + for (size_t w = 0; w < shape[2]; ++w) { + for (size_t h = 0; h < shape[3]; ++h) { + float max = scoresPtr[chSize * ch + shape[2] * w + h]; + + // --------------------- filter on threshold-------------------------------------- + if (max < threshold) { + continue; + } + + // --------------------- store index and score------------------------------------ + scores.push_back({chSize * ch + shape[2] * w + h, max}); + + bool next = true; + // ---------------------- maxpool2d ----------------------------------------------- + for (int i = -kernel / 2; i < kernel / 2 + 1 && next; ++i) { + for (int j = -kernel / 2; j < kernel / 2 + 1; ++j) { + if (w + i >= 0 && w + i < shape[2] && h + j >= 0 && h + j < shape[3]) { + if (scoresPtr[chSize * ch + shape[2] * (w + i) + h + j] > max) { + scores.pop_back(); + next = false; + break; + } + } else { + if (max < 0) { + scores.pop_back(); + next = false; + break; + } + } + } + } + } + } + } + + return scores; +} + +static std::vector<std::pair<size_t, float>> filterScores(const ov::Tensor& scoresTensor, float threshold) { + auto shape = scoresTensor.get_shape(); + float* scoresPtr = scoresTensor.data<float>(); + + return nms(scoresPtr, shape, threshold); +} + +std::vector<std::pair<float, float>> filterReg(const ov::Tensor& regressionTensor, + const std::vector<std::pair<size_t, float>>& scores, + size_t chSize) { + const float* regPtr = regressionTensor.data<float>(); + std::vector<std::pair<float, float>> reg; + + for (auto s : scores) { + reg.push_back({regPtr[s.first % chSize], regPtr[chSize + s.first % chSize]}); + } + + return reg; +} + +std::vector<std::pair<float, float>> filterWH(const ov::Tensor& whTensor, + const std::vector<std::pair<size_t, float>>& scores, + size_t chSize) { + const float* whPtr = whTensor.data<float>(); + std::vector<std::pair<float, float>> wh; + + for (auto s : scores) { + wh.push_back({whPtr[s.first % chSize], whPtr[chSize + s.first % chSize]}); + } + + return wh; +} + +std::vector<ModelCenterNet::BBox> calcBoxes(const std::vector<std::pair<size_t, float>>& scores, + const std::vector<std::pair<float, float>>& reg, + const std::vector<std::pair<float, float>>& wh, + const ov::Shape& shape) { + std::vector<ModelCenterNet::BBox> boxes(scores.size()); + + for (size_t i = 0; i < boxes.size(); ++i) { + size_t chIdx = scores[i].first % (shape[2] * shape[3]); + auto xCenter = chIdx % shape[3]; + auto yCenter = chIdx / shape[3]; + + boxes[i].left = xCenter + reg[i].first - wh[i].first / 2.0f; + boxes[i].top = yCenter + reg[i].second - wh[i].second / 2.0f; + boxes[i].right = xCenter + reg[i].first + wh[i].first / 2.0f; + boxes[i].bottom = yCenter + reg[i].second + wh[i].second / 2.0f; + } + + return boxes; +} + +void transform(std::vector<ModelCenterNet::BBox>& boxes, + const ov::Shape& shape, + int scale, + float centerX, + float centerY) { + cv::Mat1f trans = getAffineTransform(centerX, centerY, scale, 0, shape[2], shape[3], true); + + for (auto& b : boxes) { + ModelCenterNet::BBox newbb; + + newbb.left = trans.at<float>(0, 0) * b.left + trans.at<float>(0, 1) * b.top + trans.at<float>(0, 2); + newbb.top = trans.at<float>(1, 0) * b.left + trans.at<float>(1, 1) * b.top + trans.at<float>(1, 2); + newbb.right = trans.at<float>(0, 0) * b.right + trans.at<float>(0, 1) * b.bottom + trans.at<float>(0, 2); + newbb.bottom = trans.at<float>(1, 0) * b.right + trans.at<float>(1, 1) * b.bottom + trans.at<float>(1, 2); + + b = newbb; + } +} + +std::unique_ptr<ResultBase> ModelCenterNet::postprocess(InferenceResult& infResult) { + // --------------------------- Filter data and get valid indices --------------------------------- + const auto& heatmapTensor = infResult.outputsData[outputsNames[0]]; + const auto& heatmapTensorShape = heatmapTensor.get_shape(); + const auto chSize = heatmapTensorShape[2] * heatmapTensorShape[3]; + const auto scores = filterScores(heatmapTensor, confidenceThreshold); + + const auto& regressionTensor = infResult.outputsData[outputsNames[1]]; + const auto reg = filterReg(regressionTensor, scores, chSize); + + const auto& whTensor = infResult.outputsData[outputsNames[2]]; + const auto wh = filterWH(whTensor, scores, chSize); + + // --------------------------- Calculate bounding boxes & apply inverse affine transform ---------- + auto boxes = calcBoxes(scores, reg, wh, heatmapTensorShape); + + const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth; + const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight; + const auto scale = std::max(imgWidth, imgHeight); + const float centerX = imgWidth / 2.0f; + const float centerY = imgHeight / 2.0f; + + transform(boxes, heatmapTensorShape, scale, centerX, centerY); + + // --------------------------- Create detection result objects ------------------------------------ + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + + result->objects.reserve(scores.size()); + for (size_t i = 0; i < scores.size(); ++i) { + DetectedObject desc; + desc.confidence = scores[i].second; + desc.labelID = scores[i].first / chSize; + desc.label = getLabelName(desc.labelID); + desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth)); + desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight)); + desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth)); + desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight)); + + result->objects.push_back(desc); + } + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp b/python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp new file mode 100644 index 0000000..bb349a6 --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp @@ -0,0 +1,261 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_faceboxes.h" + +#include <algorithm> +#include <cmath> +#include <map> +#include <stdexcept> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/nms.hpp> +#include <utils/ocv_common.hpp> + +#include "models/internal_model_data.h" +#include "models/results.h" + +ModelFaceBoxes::ModelFaceBoxes(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), + maxProposalsCount(0), + boxIOUThreshold(boxIOUThreshold), + variance({0.1f, 0.2f}), + steps({32, 64, 128}), + minSizes({{32, 64, 128}, {256}, {512}}) {} + +void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("FaceBoxes model wrapper expects models that have only 1 input"); + } + + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("Expected 3-channel input"); + } + + ov::preprocess::PrePostProcessor ppp(model); + inputTransform.setPrecision(ppp, model->input().get_any_name()); + ppp.input().tensor().set_layout({"NHWC"}); + + if (useAutoResize) { + ppp.input().tensor().set_spatial_dynamic_shape(); + + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Reading image input parameters ------------------------------------------- + inputsNames.push_back(model->input().get_any_name()); + netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; + netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() != 2) { + throw std::logic_error("FaceBoxes model wrapper expects models that have 2 outputs"); + } + + const ov::Layout outputLayout{"CHW"}; + maxProposalsCount = model->outputs().front().get_shape()[ov::layout::height_idx(outputLayout)]; + for (const auto& output : model->outputs()) { + const auto outTensorName = output.get_any_name(); + outputsNames.push_back(outTensorName); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout); + } + std::sort(outputsNames.begin(), outputsNames.end()); + model = ppp.build(); + + // --------------------------- Calculating anchors ---------------------------------------------------- + std::vector<std::pair<size_t, size_t>> featureMaps; + for (auto s : steps) { + featureMaps.push_back({netInputHeight / s, netInputWidth / s}); + } + + priorBoxes(featureMaps); +} + +void calculateAnchors(std::vector<Anchor>& anchors, + const std::vector<float>& vx, + const std::vector<float>& vy, + const int minSize, + const int step) { + float skx = static_cast<float>(minSize); + float sky = static_cast<float>(minSize); + + std::vector<float> dense_cx, dense_cy; + + for (auto x : vx) { + dense_cx.push_back(x * step); + } + + for (auto y : vy) { + dense_cy.push_back(y * step); + } + + for (auto cy : dense_cy) { + for (auto cx : dense_cx) { + anchors.push_back( + {cx - 0.5f * skx, cy - 0.5f * sky, cx + 0.5f * skx, cy + 0.5f * sky}); // left top right bottom + } + } +} + +void calculateAnchorsZeroLevel(std::vector<Anchor>& anchors, + const int fx, + const int fy, + const std::vector<int>& minSizes, + const int step) { + for (auto s : minSizes) { + std::vector<float> vx, vy; + if (s == 32) { + vx.push_back(static_cast<float>(fx)); + vx.push_back(fx + 0.25f); + vx.push_back(fx + 0.5f); + vx.push_back(fx + 0.75f); + + vy.push_back(static_cast<float>(fy)); + vy.push_back(fy + 0.25f); + vy.push_back(fy + 0.5f); + vy.push_back(fy + 0.75f); + } else if (s == 64) { + vx.push_back(static_cast<float>(fx)); + vx.push_back(fx + 0.5f); + + vy.push_back(static_cast<float>(fy)); + vy.push_back(fy + 0.5f); + } else { + vx.push_back(fx + 0.5f); + vy.push_back(fy + 0.5f); + } + calculateAnchors(anchors, vx, vy, s, step); + } +} + +void ModelFaceBoxes::priorBoxes(const std::vector<std::pair<size_t, size_t>>& featureMaps) { + anchors.reserve(maxProposalsCount); + + for (size_t k = 0; k < featureMaps.size(); ++k) { + std::vector<float> a; + for (size_t i = 0; i < featureMaps[k].first; ++i) { + for (size_t j = 0; j < featureMaps[k].second; ++j) { + if (k == 0) { + calculateAnchorsZeroLevel(anchors, j, i, minSizes[k], steps[k]); + } else { + calculateAnchors(anchors, {j + 0.5f}, {i + 0.5f}, minSizes[k][0], steps[k]); + } + } + } + } +} + +std::pair<std::vector<size_t>, std::vector<float>> filterScores(const ov::Tensor& scoresTensor, + const float confidenceThreshold) { + auto shape = scoresTensor.get_shape(); + const float* scoresPtr = scoresTensor.data<float>(); + + std::vector<size_t> indices; + std::vector<float> scores; + scores.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE); + indices.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE); + for (size_t i = 1; i < shape[1] * shape[2]; i = i + 2) { + if (scoresPtr[i] > confidenceThreshold) { + indices.push_back(i / 2); + scores.push_back(scoresPtr[i]); + } + } + + return {indices, scores}; +} + +std::vector<Anchor> filterBoxes(const ov::Tensor& boxesTensor, + const std::vector<Anchor>& anchors, + const std::vector<size_t>& validIndices, + const std::vector<float>& variance) { + auto shape = boxesTensor.get_shape(); + const float* boxesPtr = boxesTensor.data<float>(); + + std::vector<Anchor> boxes; + boxes.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE); + for (auto i : validIndices) { + auto objStart = shape[2] * i; + + auto dx = boxesPtr[objStart]; + auto dy = boxesPtr[objStart + 1]; + auto dw = boxesPtr[objStart + 2]; + auto dh = boxesPtr[objStart + 3]; + + auto predCtrX = dx * variance[0] * anchors[i].getWidth() + anchors[i].getXCenter(); + auto predCtrY = dy * variance[0] * anchors[i].getHeight() + anchors[i].getYCenter(); + auto predW = exp(dw * variance[1]) * anchors[i].getWidth(); + auto predH = exp(dh * variance[1]) * anchors[i].getHeight(); + + boxes.push_back({static_cast<float>(predCtrX - 0.5f * predW), + static_cast<float>(predCtrY - 0.5f * predH), + static_cast<float>(predCtrX + 0.5f * predW), + static_cast<float>(predCtrY + 0.5f * predH)}); + } + + return boxes; +} + +std::unique_ptr<ResultBase> ModelFaceBoxes::postprocess(InferenceResult& infResult) { + // Filter scores and get valid indices for bounding boxes + const auto scoresTensor = infResult.outputsData[outputsNames[1]]; + const auto scores = filterScores(scoresTensor, confidenceThreshold); + + // Filter bounding boxes on indices + auto boxesTensor = infResult.outputsData[outputsNames[0]]; + std::vector<Anchor> boxes = filterBoxes(boxesTensor, anchors, scores.first, variance); + + // Apply Non-maximum Suppression + const std::vector<int> keep = nms(boxes, scores.second, boxIOUThreshold); + + // Create detection result objects + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth; + const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight; + const float scaleX = static_cast<float>(netInputWidth) / imgWidth; + const float scaleY = static_cast<float>(netInputHeight) / imgHeight; + + result->objects.reserve(keep.size()); + for (auto i : keep) { + DetectedObject desc; + desc.confidence = scores.second[i]; + desc.x = clamp(boxes[i].left / scaleX, 0.f, static_cast<float>(imgWidth)); + desc.y = clamp(boxes[i].top / scaleY, 0.f, static_cast<float>(imgHeight)); + desc.width = clamp(boxes[i].getWidth() / scaleX, 0.f, static_cast<float>(imgWidth)); + desc.height = clamp(boxes[i].getHeight() / scaleY, 0.f, static_cast<float>(imgHeight)); + desc.labelID = 0; + desc.label = labels[0]; + + result->objects.push_back(desc); + } + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/detection_model_retinaface.cpp b/python/openvino/runtime/common/models/src/detection_model_retinaface.cpp new file mode 100644 index 0000000..8835725 --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_retinaface.cpp @@ -0,0 +1,394 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_retinaface.h" + +#include <stddef.h> + +#include <algorithm> +#include <cmath> +#include <stdexcept> + +#include <opencv2/core.hpp> +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/nms.hpp> + +#include "models/internal_model_data.h" +#include "models/results.h" + +ModelRetinaFace::ModelRetinaFace(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), // Default label is "Face" + shouldDetectMasks(false), + shouldDetectLandmarks(false), + boxIOUThreshold(boxIOUThreshold), + maskThreshold(0.8f), + landmarkStd(1.0f), + anchorCfg({{32, {32, 16}, 16, {1}}, {16, {8, 4}, 16, {1}}, {8, {2, 1}, 16, {1}}}) { + generateAnchorsFpn(); +} + +void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("RetinaFace model wrapper expects models that have only 1 input"); + } + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("Expected 3-channel input"); + } + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); + + if (useAutoResize) { + ppp.input().tensor().set_spatial_dynamic_shape(); + + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Reading image input parameters ------------------------------------------- + inputsNames.push_back(model->input().get_any_name()); + netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; + netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; + + // --------------------------- Prepare output ----------------------------------------------------- + + const ov::OutputVector& outputs = model->outputs(); + if (outputs.size() != 6 && outputs.size() != 9 && outputs.size() != 12) { + throw std::logic_error("RetinaFace model wrapper expects models that have 6, 9 or 12 outputs"); + } + + const ov::Layout outputLayout{"NCHW"}; + std::vector<size_t> outputsSizes[OUT_MAX]; + for (const auto& output : model->outputs()) { + auto outTensorName = output.get_any_name(); + outputsNames.push_back(outTensorName); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout); + + OutputType type = OUT_MAX; + if (outTensorName.find("box") != std::string::npos) { + type = OUT_BOXES; + } else if (outTensorName.find("cls") != std::string::npos) { + type = OUT_SCORES; + } else if (outTensorName.find("landmark") != std::string::npos) { + type = OUT_LANDMARKS; + shouldDetectLandmarks = true; + } else if (outTensorName.find("type") != std::string::npos) { + type = OUT_MASKSCORES; + labels.clear(); + labels.push_back("No Mask"); + labels.push_back("Mask"); + shouldDetectMasks = true; + landmarkStd = 0.2f; + } else { + continue; + } + + size_t num = output.get_shape()[ov::layout::height_idx(outputLayout)]; + size_t i = 0; + for (; i < outputsSizes[type].size(); ++i) { + if (num < outputsSizes[type][i]) { + break; + } + } + separateOutputsNames[type].insert(separateOutputsNames[type].begin() + i, outTensorName); + outputsSizes[type].insert(outputsSizes[type].begin() + i, num); + } + model = ppp.build(); + + for (size_t idx = 0; idx < outputsSizes[OUT_BOXES].size(); ++idx) { + size_t width = outputsSizes[OUT_BOXES][idx]; + size_t height = outputsSizes[OUT_BOXES][idx]; + auto s = anchorCfg[idx].stride; + auto anchorNum = anchorsFpn[s].size(); + + anchors.push_back(std::vector<Anchor>(height * width * anchorNum)); + for (size_t iw = 0; iw < width; ++iw) { + size_t sw = iw * s; + for (size_t ih = 0; ih < height; ++ih) { + size_t sh = ih * s; + for (size_t k = 0; k < anchorNum; ++k) { + Anchor& anc = anchors[idx][(ih * width + iw) * anchorNum + k]; + anc.left = anchorsFpn[s][k].left + sw; + anc.top = anchorsFpn[s][k].top + sh; + anc.right = anchorsFpn[s][k].right + sw; + anc.bottom = anchorsFpn[s][k].bottom + sh; + } + } + } + } +} + +std::vector<Anchor> ratioEnum(const Anchor& anchor, const std::vector<int>& ratios) { + std::vector<Anchor> retVal; + const auto w = anchor.getWidth(); + const auto h = anchor.getHeight(); + const auto xCtr = anchor.getXCenter(); + const auto yCtr = anchor.getYCenter(); + + for (const auto ratio : ratios) { + const auto size = w * h; + const auto sizeRatio = static_cast<float>(size) / ratio; + const auto ws = sqrt(sizeRatio); + const auto hs = ws * ratio; + retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)), + static_cast<float>(yCtr - 0.5f * (hs - 1.0f)), + static_cast<float>(xCtr + 0.5f * (ws - 1.0f)), + static_cast<float>(yCtr + 0.5f * (hs - 1.0f))}); + } + return retVal; +} + +std::vector<Anchor> scaleEnum(const Anchor& anchor, const std::vector<int>& scales) { + std::vector<Anchor> retVal; + const auto w = anchor.getWidth(); + const auto h = anchor.getHeight(); + const auto xCtr = anchor.getXCenter(); + const auto yCtr = anchor.getYCenter(); + + for (auto scale : scales) { + const auto ws = w * scale; + const auto hs = h * scale; + retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)), + static_cast<float>(yCtr - 0.5f * (hs - 1.0f)), + static_cast<float>(xCtr + 0.5f * (ws - 1.0f)), + static_cast<float>(yCtr + 0.5f * (hs - 1.0f))}); + } + return retVal; +} + +std::vector<Anchor> generateAnchors(const int baseSize, + const std::vector<int>& ratios, + const std::vector<int>& scales) { + Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f}; + auto ratioAnchors = ratioEnum(baseAnchor, ratios); + std::vector<Anchor> retVal; + + for (const auto& ra : ratioAnchors) { + auto addon = scaleEnum(ra, scales); + retVal.insert(retVal.end(), addon.begin(), addon.end()); + } + return retVal; +} + +void ModelRetinaFace::generateAnchorsFpn() { + auto cfg = anchorCfg; + std::sort(cfg.begin(), cfg.end(), [](const AnchorCfgLine& x, const AnchorCfgLine& y) { + return x.stride > y.stride; + }); + + for (const auto& cfgLine : cfg) { + anchorsFpn.emplace(cfgLine.stride, generateAnchors(cfgLine.baseSize, cfgLine.ratios, cfgLine.scales)); + } +} + +std::vector<size_t> thresholding(const ov::Tensor& scoresTensor, const int anchorNum, const float confidenceThreshold) { + std::vector<size_t> indices; + indices.reserve(ModelRetinaFace::INIT_VECTOR_SIZE); + auto shape = scoresTensor.get_shape(); + size_t restAnchors = shape[1] - anchorNum; + const float* scoresPtr = scoresTensor.data<float>(); + + for (size_t x = anchorNum; x < shape[1]; ++x) { + for (size_t y = 0; y < shape[2]; ++y) { + for (size_t z = 0; z < shape[3]; ++z) { + auto idx = (x * shape[2] + y) * shape[3] + z; + auto score = scoresPtr[idx]; + if (score >= confidenceThreshold) { + indices.push_back((y * shape[3] + z) * restAnchors + (x - anchorNum)); + } + } + } + } + + return indices; +} + +void filterScores(std::vector<float>& scores, + const std::vector<size_t>& indices, + const ov::Tensor& scoresTensor, + const int anchorNum) { + const auto& shape = scoresTensor.get_shape(); + const float* scoresPtr = scoresTensor.data<float>(); + const auto start = shape[2] * shape[3] * anchorNum; + + for (auto i : indices) { + auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum; + scores.push_back(scoresPtr[start + offset]); + } +} + +void filterBoxes(std::vector<Anchor>& boxes, + const std::vector<size_t>& indices, + const ov::Tensor& boxesTensor, + int anchorNum, + const std::vector<Anchor>& anchors) { + const auto& shape = boxesTensor.get_shape(); + const float* boxesPtr = boxesTensor.data<float>(); + const auto boxPredLen = shape[1] / anchorNum; + const auto blockWidth = shape[2] * shape[3]; + + for (auto i : indices) { + auto offset = blockWidth * boxPredLen * (i % anchorNum) + (i / anchorNum); + + const auto dx = boxesPtr[offset]; + const auto dy = boxesPtr[offset + blockWidth]; + const auto dw = boxesPtr[offset + blockWidth * 2]; + const auto dh = boxesPtr[offset + blockWidth * 3]; + + const auto predCtrX = dx * anchors[i].getWidth() + anchors[i].getXCenter(); + const auto predCtrY = dy * anchors[i].getHeight() + anchors[i].getYCenter(); + const auto predW = exp(dw) * anchors[i].getWidth(); + const auto predH = exp(dh) * anchors[i].getHeight(); + + boxes.push_back({static_cast<float>(predCtrX - 0.5f * (predW - 1.0f)), + static_cast<float>(predCtrY - 0.5f * (predH - 1.0f)), + static_cast<float>(predCtrX + 0.5f * (predW - 1.0f)), + static_cast<float>(predCtrY + 0.5f * (predH - 1.0f))}); + } +} + +void filterLandmarks(std::vector<cv::Point2f>& landmarks, + const std::vector<size_t>& indices, + const ov::Tensor& landmarksTensor, + int anchorNum, + const std::vector<Anchor>& anchors, + const float landmarkStd) { + const auto& shape = landmarksTensor.get_shape(); + const float* landmarksPtr = landmarksTensor.data<float>(); + const auto landmarkPredLen = shape[1] / anchorNum; + const auto blockWidth = shape[2] * shape[3]; + + for (auto i : indices) { + for (int j = 0; j < ModelRetinaFace::LANDMARKS_NUM; ++j) { + auto offset = (i % anchorNum) * landmarkPredLen * shape[2] * shape[3] + i / anchorNum; + auto deltaX = landmarksPtr[offset + j * 2 * blockWidth] * landmarkStd; + auto deltaY = landmarksPtr[offset + (j * 2 + 1) * blockWidth] * landmarkStd; + landmarks.push_back({deltaX * anchors[i].getWidth() + anchors[i].getXCenter(), + deltaY * anchors[i].getHeight() + anchors[i].getYCenter()}); + } + } +} + +void filterMasksScores(std::vector<float>& masks, + const std::vector<size_t>& indices, + const ov::Tensor& maskScoresTensor, + const int anchorNum) { + auto shape = maskScoresTensor.get_shape(); + const float* maskScoresPtr = maskScoresTensor.data<float>(); + auto start = shape[2] * shape[3] * anchorNum * 2; + + for (auto i : indices) { + auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum; + masks.push_back(maskScoresPtr[start + offset]); + } +} + +std::unique_ptr<ResultBase> ModelRetinaFace::postprocess(InferenceResult& infResult) { + std::vector<float> scores; + scores.reserve(INIT_VECTOR_SIZE); + std::vector<Anchor> boxes; + boxes.reserve(INIT_VECTOR_SIZE); + std::vector<cv::Point2f> landmarks; + std::vector<float> masks; + + if (shouldDetectLandmarks) { + landmarks.reserve(INIT_VECTOR_SIZE); + } + if (shouldDetectMasks) { + masks.reserve(INIT_VECTOR_SIZE); + } + + // --------------------------- Gather & Filter output from all levels + // ---------------------------------------------------------- + for (size_t idx = 0; idx < anchorCfg.size(); ++idx) { + const auto boxRaw = infResult.outputsData[separateOutputsNames[OUT_BOXES][idx]]; + const auto scoresRaw = infResult.outputsData[separateOutputsNames[OUT_SCORES][idx]]; + auto s = anchorCfg[idx].stride; + auto anchorNum = anchorsFpn[s].size(); + + auto validIndices = thresholding(scoresRaw, anchorNum, confidenceThreshold); + filterScores(scores, validIndices, scoresRaw, anchorNum); + filterBoxes(boxes, validIndices, boxRaw, anchorNum, anchors[idx]); + if (shouldDetectLandmarks) { + const auto landmarksRaw = infResult.outputsData[separateOutputsNames[OUT_LANDMARKS][idx]]; + filterLandmarks(landmarks, validIndices, landmarksRaw, anchorNum, anchors[idx], landmarkStd); + } + if (shouldDetectMasks) { + const auto masksRaw = infResult.outputsData[separateOutputsNames[OUT_MASKSCORES][idx]]; + filterMasksScores(masks, validIndices, masksRaw, anchorNum); + } + } + // --------------------------- Apply Non-maximum Suppression + // ---------------------------------------------------------- !shouldDetectLandmarks determines nms behavior, if + // true - boundaries are included in areas calculation + const auto keep = nms(boxes, scores, boxIOUThreshold, !shouldDetectLandmarks); + + // --------------------------- Create detection result objects + // -------------------------------------------------------- + RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData); + + const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth; + const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight; + const auto scaleX = static_cast<float>(netInputWidth) / imgWidth; + const auto scaleY = static_cast<float>(netInputHeight) / imgHeight; + + result->objects.reserve(keep.size()); + result->landmarks.reserve(keep.size() * ModelRetinaFace::LANDMARKS_NUM); + for (auto i : keep) { + DetectedObject desc; + desc.confidence = scores[i]; + //--- Scaling coordinates + boxes[i].left /= scaleX; + boxes[i].top /= scaleY; + boxes[i].right /= scaleX; + boxes[i].bottom /= scaleY; + + desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth)); + desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight)); + desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth)); + desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight)); + //--- Default label 0 - Face. If detecting masks then labels would be 0 - No Mask, 1 - Mask + desc.labelID = shouldDetectMasks ? (masks[i] > maskThreshold) : 0; + desc.label = labels[desc.labelID]; + result->objects.push_back(desc); + + //--- Scaling landmarks coordinates + for (size_t l = 0; l < ModelRetinaFace::LANDMARKS_NUM && shouldDetectLandmarks; ++l) { + landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x = + clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x / scaleX, 0.f, static_cast<float>(imgWidth)); + landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y = + clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y / scaleY, 0.f, static_cast<float>(imgHeight)); + result->landmarks.push_back(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l]); + } + } + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/detection_model_retinaface_pt.cpp b/python/openvino/runtime/common/models/src/detection_model_retinaface_pt.cpp new file mode 100644 index 0000000..8322c3c --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_retinaface_pt.cpp @@ -0,0 +1,277 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_retinaface_pt.h" + +#include <stdint.h> + +#include <algorithm> +#include <cmath> +#include <map> +#include <stdexcept> +#include <string> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/nms.hpp> +#include <utils/ocv_common.hpp> + +#include "models/internal_model_data.h" +#include "models/results.h" + +ModelRetinaFacePT::ModelRetinaFacePT(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + float boxIOUThreshold, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), // Default label is "Face" + landmarksNum(0), + boxIOUThreshold(boxIOUThreshold) {} + +void ModelRetinaFacePT::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("RetinaFacePT model wrapper expects models that have only 1 input"); + } + + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("Expected 3-channel input"); + } + + ov::preprocess::PrePostProcessor ppp(model); + inputTransform.setPrecision(ppp, model->input().get_any_name()); + ppp.input().tensor().set_layout({"NHWC"}); + + if (useAutoResize) { + ppp.input().tensor().set_spatial_dynamic_shape(); + + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Reading image input parameters ------------------------------------------- + inputsNames.push_back(model->input().get_any_name()); + netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; + netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() != 3) { + throw std::logic_error("RetinaFace model wrapper expects models that have 3 outputs"); + } + + landmarksNum = 0; + + outputsNames.resize(2); + std::vector<uint32_t> outputsSizes[OUT_MAX]; + const ov::Layout chw("CHW"); + const ov::Layout nchw("NCHW"); + for (auto& output : model->outputs()) { + auto outTensorName = output.get_any_name(); + outputsNames.push_back(outTensorName); + ppp.output(outTensorName) + .tensor() + .set_element_type(ov::element::f32) + .set_layout(output.get_shape().size() == 4 ? nchw : chw); + + if (outTensorName.find("bbox") != std::string::npos) { + outputsNames[OUT_BOXES] = outTensorName; + } else if (outTensorName.find("cls") != std::string::npos) { + outputsNames[OUT_SCORES] = outTensorName; + } else if (outTensorName.find("landmark") != std::string::npos) { + // Landmarks might be optional, if it is present, resize names array to fit landmarks output name to the + // last item of array Considering that other outputs names are already filled in or will be filled later + outputsNames.resize(std::max(outputsNames.size(), (size_t)OUT_LANDMARKS + 1)); + outputsNames[OUT_LANDMARKS] = outTensorName; + landmarksNum = + output.get_shape()[ov::layout::width_idx(chw)] / 2; // Each landmark consist of 2 variables (x and y) + } else { + continue; + } + } + + if (outputsNames[OUT_BOXES] == "" || outputsNames[OUT_SCORES] == "") { + throw std::logic_error("Bbox or cls layers are not found"); + } + + model = ppp.build(); + priors = generatePriorData(); +} + +std::vector<size_t> ModelRetinaFacePT::filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold) { + std::vector<size_t> indicies; + const auto& shape = scoresTensor.get_shape(); + const float* scoresPtr = scoresTensor.data<float>(); + + for (size_t x = 0; x < shape[1]; ++x) { + const auto idx = (x * shape[2] + 1); + const auto score = scoresPtr[idx]; + if (score >= confidenceThreshold) { + indicies.push_back(x); + } + } + + return indicies; +} + +std::vector<float> ModelRetinaFacePT::getFilteredScores(const ov::Tensor& scoresTensor, + const std::vector<size_t>& indicies) { + const auto& shape = scoresTensor.get_shape(); + const float* scoresPtr = scoresTensor.data<float>(); + + std::vector<float> scores; + scores.reserve(indicies.size()); + + for (auto i : indicies) { + scores.push_back(scoresPtr[i * shape[2] + 1]); + } + return scores; +} + +std::vector<cv::Point2f> ModelRetinaFacePT::getFilteredLandmarks(const ov::Tensor& landmarksTensor, + const std::vector<size_t>& indicies, + int imgWidth, + int imgHeight) { + const auto& shape = landmarksTensor.get_shape(); + const float* landmarksPtr = landmarksTensor.data<float>(); + + std::vector<cv::Point2f> landmarks(landmarksNum * indicies.size()); + + for (size_t i = 0; i < indicies.size(); i++) { + const size_t idx = indicies[i]; + const auto& prior = priors[idx]; + for (size_t j = 0; j < landmarksNum; j++) { + landmarks[i * landmarksNum + j].x = + clamp(prior.cX + landmarksPtr[idx * shape[2] + j * 2] * variance[0] * prior.width, 0.f, 1.f) * imgWidth; + landmarks[i * landmarksNum + j].y = + clamp(prior.cY + landmarksPtr[idx * shape[2] + j * 2 + 1] * variance[0] * prior.height, 0.f, 1.f) * + imgHeight; + } + } + return landmarks; +} + +std::vector<ModelRetinaFacePT::Box> ModelRetinaFacePT::generatePriorData() { + const float globalMinSizes[][2] = {{16, 32}, {64, 128}, {256, 512}}; + const float steps[] = {8., 16., 32.}; + std::vector<ModelRetinaFacePT::Box> anchors; + for (size_t stepNum = 0; stepNum < arraySize(steps); stepNum++) { + const int featureW = static_cast<int>(std::round(netInputWidth / steps[stepNum])); + const int featureH = static_cast<int>(std::round(netInputHeight / steps[stepNum])); + + const auto& minSizes = globalMinSizes[stepNum]; + for (int i = 0; i < featureH; i++) { + for (int j = 0; j < featureW; j++) { + for (auto minSize : minSizes) { + const float sKX = minSize / netInputWidth; + const float sKY = minSize / netInputHeight; + const float denseCY = (i + 0.5f) * steps[stepNum] / netInputHeight; + const float denseCX = (j + 0.5f) * steps[stepNum] / netInputWidth; + anchors.push_back(ModelRetinaFacePT::Box{denseCX, denseCY, sKX, sKY}); + } + } + } + } + return anchors; +} + +std::vector<Anchor> ModelRetinaFacePT::getFilteredProposals(const ov::Tensor& boxesTensor, + const std::vector<size_t>& indicies, + int imgWidth, + int imgHeight) { + std::vector<Anchor> rects; + rects.reserve(indicies.size()); + + const auto& shape = boxesTensor.get_shape(); + const float* boxesPtr = boxesTensor.data<float>(); + + if (shape[1] != priors.size()) { + throw std::logic_error("rawBoxes size is not equal to priors size"); + } + + for (auto i : indicies) { + const auto pRawBox = reinterpret_cast<const Box*>(boxesPtr + i * shape[2]); + const auto& prior = priors[i]; + const float cX = priors[i].cX + pRawBox->cX * variance[0] * prior.width; + const float cY = priors[i].cY + pRawBox->cY * variance[0] * prior.height; + const float width = prior.width * exp(pRawBox->width * variance[1]); + const float height = prior.height * exp(pRawBox->height * variance[1]); + rects.push_back(Anchor{clamp(cX - width / 2, 0.f, 1.f) * imgWidth, + clamp(cY - height / 2, 0.f, 1.f) * imgHeight, + clamp(cX + width / 2, 0.f, 1.f) * imgWidth, + clamp(cY + height / 2, 0.f, 1.f) * imgHeight}); + } + + return rects; +} + +std::unique_ptr<ResultBase> ModelRetinaFacePT::postprocess(InferenceResult& infResult) { + // (raw_output, scale_x, scale_y, face_prob_threshold, image_size): + const auto boxesTensor = infResult.outputsData[outputsNames[OUT_BOXES]]; + const auto scoresTensor = infResult.outputsData[outputsNames[OUT_SCORES]]; + + const auto& validIndicies = filterByScore(scoresTensor, confidenceThreshold); + const auto& scores = getFilteredScores(scoresTensor, validIndicies); + + const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>(); + const auto& landmarks = landmarksNum ? getFilteredLandmarks(infResult.outputsData[outputsNames[OUT_LANDMARKS]], + validIndicies, + internalData.inputImgWidth, + internalData.inputImgHeight) + : std::vector<cv::Point2f>(); + + const auto& proposals = + getFilteredProposals(boxesTensor, validIndicies, internalData.inputImgWidth, internalData.inputImgHeight); + + const auto& keptIndicies = nms(proposals, scores, boxIOUThreshold, !landmarksNum); + + // --------------------------- Create detection result objects + // -------------------------------------------------------- + RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData); + + result->objects.reserve(keptIndicies.size()); + result->landmarks.reserve(keptIndicies.size() * landmarksNum); + for (auto i : keptIndicies) { + DetectedObject desc; + desc.confidence = scores[i]; + + //--- Scaling coordinates + desc.x = proposals[i].left; + desc.y = proposals[i].top; + desc.width = proposals[i].getWidth(); + desc.height = proposals[i].getHeight(); + + desc.labelID = 0; + desc.label = labels[desc.labelID]; + result->objects.push_back(desc); + + //--- Filtering landmarks coordinates + for (uint32_t l = 0; l < landmarksNum; ++l) { + result->landmarks.emplace_back(landmarks[i * landmarksNum + l].x, landmarks[i * landmarksNum + l].y); + } + } + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/detection_model_ssd.cpp b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp new file mode 100644 index 0000000..ef741ee --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp @@ -0,0 +1,281 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_ssd.h" + +#include <algorithm> +#include <map> +#include <stdexcept> +#include <string> +#include <unordered_set> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/ocv_common.hpp> + +#include "models/internal_model_data.h" +#include "models/results.h" + +struct InputData; + +ModelSSD::ModelSSD(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + const std::vector<std::string>& labels, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {} + +std::shared_ptr<InternalModelData> ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) { + if (inputsNames.size() > 1) { + const auto& imageInfoTensor = request.get_tensor(inputsNames[1]); + const auto info = imageInfoTensor.data<float>(); + info[0] = static_cast<float>(netInputHeight); + info[1] = static_cast<float>(netInputWidth); + info[2] = 1; + request.set_tensor(inputsNames[1], imageInfoTensor); + } + + return DetectionModel::preprocess(inputData, request); +} + +std::unique_ptr<ResultBase> ModelSSD::postprocess(InferenceResult& infResult) { + return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult); +} + +std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& infResult) { + const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor(); + size_t detectionsNum = detectionsTensor.get_shape()[detectionsNumId]; + const float* detections = detectionsTensor.data<float>(); + + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + auto retVal = std::unique_ptr<ResultBase>(result); + + const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>(); + + for (size_t i = 0; i < detectionsNum; i++) { + float image_id = detections[i * objectSize + 0]; + if (image_id < 0) { + break; + } + + float confidence = detections[i * objectSize + 2]; + + /** Filtering out objects with confidence < confidence_threshold probability **/ + if (confidence > confidenceThreshold) { + DetectedObject desc; + + desc.confidence = confidence; + desc.labelID = static_cast<int>(detections[i * objectSize + 1]); + desc.label = getLabelName(desc.labelID); + + desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth, + 0.f, + static_cast<float>(internalData.inputImgWidth)); + desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight, + 0.f, + static_cast<float>(internalData.inputImgHeight)); + desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth, + 0.f, + static_cast<float>(internalData.inputImgWidth)) - + desc.x; + desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight, + 0.f, + static_cast<float>(internalData.inputImgHeight)) - + desc.y; + + result->objects.push_back(desc); + } + } + + return retVal; +} + +std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) { + const float* boxes = infResult.outputsData[outputsNames[0]].data<float>(); + size_t detectionsNum = infResult.outputsData[outputsNames[0]].get_shape()[detectionsNumId]; + const float* labels = infResult.outputsData[outputsNames[1]].data<float>(); + const float* scores = outputsNames.size() > 2 ? infResult.outputsData[outputsNames[2]].data<float>() : nullptr; + + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + auto retVal = std::unique_ptr<ResultBase>(result); + + const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>(); + + // In models with scores are stored in separate output, coordinates are normalized to [0,1] + // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight] + float widthScale = static_cast<float>(internalData.inputImgWidth) / (scores ? 1 : netInputWidth); + float heightScale = static_cast<float>(internalData.inputImgHeight) / (scores ? 1 : netInputHeight); + + for (size_t i = 0; i < detectionsNum; i++) { + float confidence = scores ? scores[i] : boxes[i * objectSize + 4]; + + /** Filtering out objects with confidence < confidence_threshold probability **/ + if (confidence > confidenceThreshold) { + DetectedObject desc; + + desc.confidence = confidence; + desc.labelID = static_cast<int>(labels[i]); + desc.label = getLabelName(desc.labelID); + + desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)); + desc.y = + clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)); + desc.width = + clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)) - + desc.x; + desc.height = + clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)) - + desc.y; + + result->objects.push_back(desc); + } + } + + return retVal; +} + +void ModelSSD::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + ov::preprocess::PrePostProcessor ppp(model); + for (const auto& input : model->inputs()) { + auto inputTensorName = input.get_any_name(); + const ov::Shape& shape = input.get_shape(); + ov::Layout inputLayout = getInputLayout(input); + + if (shape.size() == 4) { // 1st input contains images + if (inputsNames.empty()) { + inputsNames.push_back(inputTensorName); + } else { + inputsNames[0] = inputTensorName; + } + + inputTransform.setPrecision(ppp, inputTensorName); + ppp.input(inputTensorName).tensor().set_layout({"NHWC"}); + + if (useAutoResize) { + ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape(); + + ppp.input(inputTensorName) + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + ppp.input(inputTensorName).model().set_layout(inputLayout); + + netInputWidth = shape[ov::layout::width_idx(inputLayout)]; + netInputHeight = shape[ov::layout::height_idx(inputLayout)]; + } else if (shape.size() == 2) { // 2nd input contains image info + inputsNames.resize(2); + inputsNames[1] = inputTensorName; + ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32); + } else { + throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) + + "D " + "input layer '" + + input.get_any_name() + + "'. " + "Only 2D and 4D input layers are supported"); + } + } + model = ppp.build(); + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() == 1) { + prepareSingleOutput(model); + } else { + prepareMultipleOutputs(model); + } +} + +void ModelSSD::prepareSingleOutput(std::shared_ptr<ov::Model>& model) { + const auto& output = model->output(); + outputsNames.push_back(output.get_any_name()); + + const ov::Shape& shape = output.get_shape(); + const ov::Layout& layout("NCHW"); + if (shape.size() != 4) { + throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size())); + } + detectionsNumId = ov::layout::height_idx(layout); + objectSize = shape[ov::layout::width_idx(layout)]; + if (objectSize != 7) { + throw std::logic_error("SSD single output must have 7 as a last dimension, but had " + + std::to_string(objectSize)); + } + ov::preprocess::PrePostProcessor ppp(model); + ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout); + model = ppp.build(); +} + +void ModelSSD::prepareMultipleOutputs(std::shared_ptr<ov::Model>& model) { + const ov::OutputVector& outputs = model->outputs(); + for (auto& output : outputs) { + const auto& tensorNames = output.get_names(); + for (const auto& name : tensorNames) { + if (name.find("boxes") != std::string::npos) { + outputsNames.push_back(name); + break; + } else if (name.find("labels") != std::string::npos) { + outputsNames.push_back(name); + break; + } else if (name.find("scores") != std::string::npos) { + outputsNames.push_back(name); + break; + } + } + } + if (outputsNames.size() != 2 && outputsNames.size() != 3) { + throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " + + std::to_string(outputsNames.size())); + } + std::sort(outputsNames.begin(), outputsNames.end()); + + ov::preprocess::PrePostProcessor ppp(model); + const auto& boxesShape = model->output(outputsNames[0]).get_partial_shape().get_max_shape(); + + ov::Layout boxesLayout; + if (boxesShape.size() == 2) { + boxesLayout = "NC"; + detectionsNumId = ov::layout::batch_idx(boxesLayout); + objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)]; + + if (objectSize != 5) { + throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required"); + } + } else if (boxesShape.size() == 3) { + boxesLayout = "CHW"; + detectionsNumId = ov::layout::height_idx(boxesLayout); + objectSize = boxesShape[ov::layout::width_idx(boxesLayout)]; + + if (objectSize != 4) { + throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required"); + } + } else { + throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " + + std::to_string(boxesShape.size())); + } + + ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout); + + for (const auto& outName : outputsNames) { + ppp.output(outName).tensor().set_element_type(ov::element::f32); + } + model = ppp.build(); +} diff --git a/python/openvino/runtime/common/models/src/detection_model_yolo.cpp b/python/openvino/runtime/common/models/src/detection_model_yolo.cpp new file mode 100644 index 0000000..2c4fb1d --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_yolo.cpp @@ -0,0 +1,481 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_yolo.h" + +#include <algorithm> +#include <cmath> +#include <cstdint> +#include <stdexcept> +#include <string> +#include <utility> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/slog.hpp> + +#include "models/internal_model_data.h" +#include "models/results.h" + +std::vector<float> defaultAnchors[] = { + // YOLOv1v2 + {0.57273f, 0.677385f, 1.87446f, 2.06253f, 3.33843f, 5.47434f, 7.88282f, 3.52778f, 9.77052f, 9.16828f}, + // YOLOv3 + {10.0f, + 13.0f, + 16.0f, + 30.0f, + 33.0f, + 23.0f, + 30.0f, + 61.0f, + 62.0f, + 45.0f, + 59.0f, + 119.0f, + 116.0f, + 90.0f, + 156.0f, + 198.0f, + 373.0f, + 326.0f}, + // YOLOv4 + {12.0f, + 16.0f, + 19.0f, + 36.0f, + 40.0f, + 28.0f, + 36.0f, + 75.0f, + 76.0f, + 55.0f, + 72.0f, + 146.0f, + 142.0f, + 110.0f, + 192.0f, + 243.0f, + 459.0f, + 401.0f}, + // YOLOv4_Tiny + {10.0f, 14.0f, 23.0f, 27.0f, 37.0f, 58.0f, 81.0f, 82.0f, 135.0f, 169.0f, 344.0f, 319.0f}, + // YOLOF + {16.0f, 16.0f, 32.0f, 32.0f, 64.0f, 64.0f, 128.0f, 128.0f, 256.0f, 256.0f, 512.0f, 512.0f}}; + +const std::vector<int64_t> defaultMasks[] = { + // YOLOv1v2 + {}, + // YOLOv3 + {}, + // YOLOv4 + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + // YOLOv4_Tiny + {1, 2, 3, 3, 4, 5}, + // YOLOF + {0, 1, 2, 3, 4, 5}}; + +static inline float sigmoid(float x) { + return 1.f / (1.f + exp(-x)); +} + +static inline float linear(float x) { + return x; +} + +ModelYolo::ModelYolo(const std::string& modelFileName, + float confidenceThreshold, + bool useAutoResize, + bool useAdvancedPostprocessing, + float boxIOUThreshold, + const std::vector<std::string>& labels, + const std::vector<float>& anchors, + const std::vector<int64_t>& masks, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout), + boxIOUThreshold(boxIOUThreshold), + useAdvancedPostprocessing(useAdvancedPostprocessing), + yoloVersion(YOLO_V3), + presetAnchors(anchors), + presetMasks(masks) {} + +void ModelYolo::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("YOLO model wrapper accepts models that have only 1 input"); + } + + const auto& input = model->input(); + const ov::Shape& inputShape = model->input().get_shape(); + ov::Layout inputLayout = getInputLayout(input); + + if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("Expected 3-channel input"); + } + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); + + if (useAutoResize) { + ppp.input().tensor().set_spatial_dynamic_shape(); + + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + ppp.input().model().set_layout(inputLayout); + + //--- Reading image input parameters + inputsNames.push_back(model->input().get_any_name()); + netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; + netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; + + // --------------------------- Prepare output ----------------------------------------------------- + const ov::OutputVector& outputs = model->outputs(); + std::map<std::string, ov::Shape> outShapes; + for (auto& out : outputs) { + ppp.output(out.get_any_name()).tensor().set_element_type(ov::element::f32); + if (out.get_shape().size() == 4) { + if (out.get_shape()[ov::layout::height_idx("NCHW")] != out.get_shape()[ov::layout::width_idx("NCHW")] && + out.get_shape()[ov::layout::height_idx("NHWC")] == out.get_shape()[ov::layout::width_idx("NHWC")]) { + ppp.output(out.get_any_name()).model().set_layout("NHWC"); + // outShapes are saved before ppp.build() thus set yoloRegionLayout as it is in model before ppp.build() + yoloRegionLayout = "NHWC"; + } + // yolo-v1-tiny-tf out shape is [1, 21125] thus set layout only for 4 dim tensors + ppp.output(out.get_any_name()).tensor().set_layout("NCHW"); + } + outputsNames.push_back(out.get_any_name()); + outShapes[out.get_any_name()] = out.get_shape(); + } + model = ppp.build(); + + yoloVersion = YOLO_V3; + bool isRegionFound = false; + for (const auto& op : model->get_ordered_ops()) { + if (std::string("RegionYolo") == op->get_type_name()) { + auto regionYolo = std::dynamic_pointer_cast<ov::op::v0::RegionYolo>(op); + + if (regionYolo) { + if (!regionYolo->get_mask().size()) { + yoloVersion = YOLO_V1V2; + } + + const auto& opName = op->get_friendly_name(); + for (const auto& out : outputs) { + if (out.get_node()->get_friendly_name() == opName || + out.get_node()->get_input_node_ptr(0)->get_friendly_name() == opName) { + isRegionFound = true; + regions.emplace(out.get_any_name(), Region(regionYolo)); + } + } + } + } + } + + if (!isRegionFound) { + switch (outputsNames.size()) { + case 1: + yoloVersion = YOLOF; + break; + case 2: + yoloVersion = YOLO_V4_TINY; + break; + case 3: + yoloVersion = YOLO_V4; + break; + } + + int num = yoloVersion == YOLOF ? 6 : 3; + isObjConf = yoloVersion == YOLOF ? 0 : 1; + int i = 0; + + auto chosenMasks = presetMasks.size() ? presetMasks : defaultMasks[yoloVersion]; + if (chosenMasks.size() != num * outputs.size()) { + throw std::runtime_error(std::string("Invalid size of masks array, got ") + + std::to_string(presetMasks.size()) + ", should be " + + std::to_string(num * outputs.size())); + } + + std::sort(outputsNames.begin(), + outputsNames.end(), + [&outShapes, this](const std::string& x, const std::string& y) { + return outShapes[x][ov::layout::height_idx(yoloRegionLayout)] > + outShapes[y][ov::layout::height_idx(yoloRegionLayout)]; + }); + + for (const auto& name : outputsNames) { + const auto& shape = outShapes[name]; + if (shape[ov::layout::channels_idx(yoloRegionLayout)] % num != 0) { + throw std::logic_error(std::string("Output tensor ") + name + " has wrong channel dimension"); + } + regions.emplace( + name, + Region(shape[ov::layout::channels_idx(yoloRegionLayout)] / num - 4 - (isObjConf ? 1 : 0), + 4, + presetAnchors.size() ? presetAnchors : defaultAnchors[yoloVersion], + std::vector<int64_t>(chosenMasks.begin() + i * num, chosenMasks.begin() + (i + 1) * num), + shape[ov::layout::width_idx(yoloRegionLayout)], + shape[ov::layout::height_idx(yoloRegionLayout)])); + i++; + } + } else { + // Currently externally set anchors and masks are supported only for YoloV4 + if (presetAnchors.size() || presetMasks.size()) { + slog::warn << "Preset anchors and mask can be set for YoloV4 model only. " + "This model is not YoloV4, so these options will be ignored." + << slog::endl; + } + } +} + +std::unique_ptr<ResultBase> ModelYolo::postprocess(InferenceResult& infResult) { + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + std::vector<DetectedObject> objects; + + // Parsing outputs + const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>(); + + for (auto& output : infResult.outputsData) { + this->parseYOLOOutput(output.first, + output.second, + netInputHeight, + netInputWidth, + internalData.inputImgHeight, + internalData.inputImgWidth, + objects); + } + + if (useAdvancedPostprocessing) { + // Advanced postprocessing + // Checking IOU threshold conformance + // For every i-th object we're finding all objects it intersects with, and comparing confidence + // If i-th object has greater confidence than all others, we include it into result + for (const auto& obj1 : objects) { + bool isGoodResult = true; + for (const auto& obj2 : objects) { + if (obj1.labelID == obj2.labelID && obj1.confidence < obj2.confidence && + intersectionOverUnion(obj1, obj2) >= boxIOUThreshold) { // if obj1 is the same as obj2, condition + // expression will evaluate to false anyway + isGoodResult = false; + break; + } + } + if (isGoodResult) { + result->objects.push_back(obj1); + } + } + } else { + // Classic postprocessing + std::sort(objects.begin(), objects.end(), [](const DetectedObject& x, const DetectedObject& y) { + return x.confidence > y.confidence; + }); + for (size_t i = 0; i < objects.size(); ++i) { + if (objects[i].confidence == 0) + continue; + for (size_t j = i + 1; j < objects.size(); ++j) + if (intersectionOverUnion(objects[i], objects[j]) >= boxIOUThreshold) + objects[j].confidence = 0; + result->objects.push_back(objects[i]); + } + } + + return std::unique_ptr<ResultBase>(result); +} + +void ModelYolo::parseYOLOOutput(const std::string& output_name, + const ov::Tensor& tensor, + const unsigned long resized_im_h, + const unsigned long resized_im_w, + const unsigned long original_im_h, + const unsigned long original_im_w, + std::vector<DetectedObject>& objects) { + // --------------------------- Extracting layer parameters ------------------------------------- + auto it = regions.find(output_name); + if (it == regions.end()) { + throw std::runtime_error(std::string("Can't find output layer with name ") + output_name); + } + auto& region = it->second; + + int sideW = 0; + int sideH = 0; + unsigned long scaleH; + unsigned long scaleW; + switch (yoloVersion) { + case YOLO_V1V2: + sideH = region.outputHeight; + sideW = region.outputWidth; + scaleW = region.outputWidth; + scaleH = region.outputHeight; + break; + case YOLO_V3: + case YOLO_V4: + case YOLO_V4_TINY: + case YOLOF: + sideH = static_cast<int>(tensor.get_shape()[ov::layout::height_idx("NCHW")]); + sideW = static_cast<int>(tensor.get_shape()[ov::layout::width_idx("NCHW")]); + scaleW = resized_im_w; + scaleH = resized_im_h; + break; + } + + auto entriesNum = sideW * sideH; + const float* outData = tensor.data<float>(); + + auto postprocessRawData = + (yoloVersion == YOLO_V4 || yoloVersion == YOLO_V4_TINY || yoloVersion == YOLOF) ? sigmoid : linear; + + // --------------------------- Parsing YOLO Region output ------------------------------------- + for (int i = 0; i < entriesNum; ++i) { + int row = i / sideW; + int col = i % sideW; + for (int n = 0; n < region.num; ++n) { + //--- Getting region data + int obj_index = calculateEntryIndex(entriesNum, + region.coords, + region.classes + isObjConf, + n * entriesNum + i, + region.coords); + int box_index = + calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, 0); + float scale = isObjConf ? postprocessRawData(outData[obj_index]) : 1; + + //--- Preliminary check for confidence threshold conformance + if (scale >= confidenceThreshold) { + //--- Calculating scaled region's coordinates + float x, y; + if (yoloVersion == YOLOF) { + x = (static_cast<float>(col) / sideW + + outData[box_index + 0 * entriesNum] * region.anchors[2 * n] / scaleW) * + original_im_w; + y = (static_cast<float>(row) / sideH + + outData[box_index + 1 * entriesNum] * region.anchors[2 * n + 1] / scaleH) * + original_im_h; + } else { + x = static_cast<float>((col + postprocessRawData(outData[box_index + 0 * entriesNum])) / sideW * + original_im_w); + y = static_cast<float>((row + postprocessRawData(outData[box_index + 1 * entriesNum])) / sideH * + original_im_h); + } + float height = static_cast<float>(std::exp(outData[box_index + 3 * entriesNum]) * + region.anchors[2 * n + 1] * original_im_h / scaleH); + float width = static_cast<float>(std::exp(outData[box_index + 2 * entriesNum]) * region.anchors[2 * n] * + original_im_w / scaleW); + + DetectedObject obj; + obj.x = clamp(x - width / 2, 0.f, static_cast<float>(original_im_w)); + obj.y = clamp(y - height / 2, 0.f, static_cast<float>(original_im_h)); + obj.width = clamp(width, 0.f, static_cast<float>(original_im_w - obj.x)); + obj.height = clamp(height, 0.f, static_cast<float>(original_im_h - obj.y)); + + for (size_t j = 0; j < region.classes; ++j) { + int class_index = calculateEntryIndex(entriesNum, + region.coords, + region.classes + isObjConf, + n * entriesNum + i, + region.coords + isObjConf + j); + float prob = scale * postprocessRawData(outData[class_index]); + + //--- Checking confidence threshold conformance and adding region to the list + if (prob >= confidenceThreshold) { + obj.confidence = prob; + obj.labelID = j; + obj.label = getLabelName(obj.labelID); + objects.push_back(obj); + } + } + } + } + } +} + +int ModelYolo::calculateEntryIndex(int totalCells, int lcoords, size_t lclasses, int location, int entry) { + int n = location / totalCells; + int loc = location % totalCells; + return (n * (lcoords + lclasses) + entry) * totalCells + loc; +} + +double ModelYolo::intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2) { + double overlappingWidth = fmin(o1.x + o1.width, o2.x + o2.width) - fmax(o1.x, o2.x); + double overlappingHeight = fmin(o1.y + o1.height, o2.y + o2.height) - fmax(o1.y, o2.y); + double intersectionArea = + (overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth; + double unionArea = o1.width * o1.height + o2.width * o2.height - intersectionArea; + return intersectionArea / unionArea; +} + +ModelYolo::Region::Region(const std::shared_ptr<ov::op::v0::RegionYolo>& regionYolo) { + coords = regionYolo->get_num_coords(); + classes = regionYolo->get_num_classes(); + auto mask = regionYolo->get_mask(); + num = mask.size(); + + auto shape = regionYolo->get_input_shape(0); + outputWidth = shape[3]; + outputHeight = shape[2]; + + if (num) { + // Parsing YoloV3 parameters + anchors.resize(num * 2); + + for (int i = 0; i < num; ++i) { + anchors[i * 2] = regionYolo->get_anchors()[mask[i] * 2]; + anchors[i * 2 + 1] = regionYolo->get_anchors()[mask[i] * 2 + 1]; + } + } else { + // Parsing YoloV2 parameters + num = regionYolo->get_num_regions(); + anchors = regionYolo->get_anchors(); + if (anchors.empty()) { + anchors = defaultAnchors[YOLO_V1V2]; + num = 5; + } + } +} + +ModelYolo::Region::Region(size_t classes, + int coords, + const std::vector<float>& anchors, + const std::vector<int64_t>& masks, + size_t outputWidth, + size_t outputHeight) + : classes(classes), + coords(coords), + outputWidth(outputWidth), + outputHeight(outputHeight) { + num = masks.size(); + + if (anchors.size() == 0 || anchors.size() % 2 != 0) { + throw std::runtime_error("Explicitly initialized region should have non-empty even-sized regions vector"); + } + + if (num) { + this->anchors.resize(num * 2); + + for (int i = 0; i < num; ++i) { + this->anchors[i * 2] = anchors[masks[i] * 2]; + this->anchors[i * 2 + 1] = anchors[masks[i] * 2 + 1]; + } + } else { + this->anchors = anchors; + num = anchors.size() / 2; + } +} diff --git a/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp b/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp new file mode 100644 index 0000000..132eb9e --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp @@ -0,0 +1,188 @@ +/* +// Copyright (C) 2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_yolov3_onnx.h" + +#include <algorithm> +#include <cmath> +#include <cstdint> +#include <stdexcept> +#include <string> +#include <utility> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/slog.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" +#include "utils/image_utils.h" + +ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName, + float confidenceThreshold, + const std::vector<std::string>& labels, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) { + interpolationMode = cv::INTER_CUBIC; + resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX; + } + + +void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare inputs ------------------------------------------------------ + const ov::OutputVector& inputs = model->inputs(); + if (inputs.size() != 2) { + throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs"); + } + + ov::preprocess::PrePostProcessor ppp(model); + inputsNames.reserve(inputs.size()); + for (auto& input : inputs) { + const ov::Shape& currentShape = input.get_shape(); + std::string currentName = input.get_any_name(); + const ov::Layout& currentLayout = getInputLayout(input); + + if (currentShape.size() == 4) { + if (currentShape[ov::layout::channels_idx(currentLayout)] != 3) { + throw std::logic_error("Expected 4D image input with 3 channels"); + } + inputsNames[0] = currentName; + netInputWidth = currentShape[ov::layout::width_idx(currentLayout)]; + netInputHeight = currentShape[ov::layout::height_idx(currentLayout)]; + ppp.input(currentName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); + } else if (currentShape.size() == 2) { + if (currentShape[ov::layout::channels_idx(currentLayout)] != 2) { + throw std::logic_error("Expected 2D image info input with 2 channels"); + } + inputsNames[1] = currentName; + ppp.input(currentName).tensor().set_element_type(ov::element::i32); + } + ppp.input(currentName).model().set_layout(currentLayout); + } + + // --------------------------- Prepare outputs ----------------------------------------------------- + const ov::OutputVector& outputs = model->outputs(); + if (outputs.size() != 3) { + throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs"); + } + + for (auto& output : outputs) { + const ov::Shape& currentShape = output.get_partial_shape().get_max_shape(); + std::string currentName = output.get_any_name(); + if (currentShape.back() == 3) { + indicesOutputName = currentName; + ppp.output(currentName).tensor().set_element_type(ov::element::i32); + } else if (currentShape[2] == 4) { + boxesOutputName = currentName; + ppp.output(currentName).tensor().set_element_type(ov::element::f32); + } else if (currentShape[1] == numberOfClasses) { + scoresOutputName = currentName; + ppp.output(currentName).tensor().set_element_type(ov::element::f32); + } else { + throw std::logic_error("Expected shapes [:,:,4], [:," + + std::to_string(numberOfClasses) + ",:] and [:,3] for outputs"); + } + outputsNames.push_back(currentName); + } + model = ppp.build(); +} + +std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData& inputData, + ov::InferRequest& request) { + const auto& origImg = inputData.asRef<ImageInputData>().inputImage; + + cv::Mat info(cv::Size(1, 2), CV_32SC1); + info.at<int>(0, 0) = origImg.rows; + info.at<int>(0, 1) = origImg.cols; + auto allocator = std::make_shared<SharedTensorAllocator>(info); + ov::Tensor infoInput = ov::Tensor(ov::element::i32, ov::Shape({1, 2}), ov::Allocator(allocator)); + + request.set_tensor(inputsNames[1], infoInput); + + return ImageModel::preprocess(inputData, request); +} + +namespace { +float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) { + const float* scoresPtr = scoresTensor.data<float>(); + const auto shape = scoresTensor.get_shape(); + int N = shape[2]; + + return scoresPtr[classInd * N + boxInd]; +} +} + +std::unique_ptr<ResultBase> ModelYoloV3ONNX::postprocess(InferenceResult& infResult) { + // Get info about input image + const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth; + const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight; + + // Get outputs tensors + const ov::Tensor& boxes = infResult.outputsData[boxesOutputName]; + const float* boxesPtr = boxes.data<float>(); + + const ov::Tensor& scores = infResult.outputsData[scoresOutputName]; + const ov::Tensor& indices = infResult.outputsData[indicesOutputName]; + + const int* indicesData = indices.data<int>(); + const auto indicesShape = indices.get_shape(); + const auto boxShape = boxes.get_shape(); + + // Generate detection results + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0]; + int indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1]; + + for (size_t i = 0; i < numberOfBoxes; ++i) { + int batchInd = indicesData[i * indicesStride]; + int classInd = indicesData[i * indicesStride + 1]; + int boxInd = indicesData[i * indicesStride + 2]; + + if (batchInd == -1) { + break; + } + + float score = getScore(scores, classInd, boxInd); + + if (score > confidenceThreshold) { + DetectedObject obj; + size_t startPos = boxShape[2] * boxInd; + + auto x = boxesPtr[startPos + 1]; + auto y = boxesPtr[startPos]; + auto width = boxesPtr[startPos + 3] - x; + auto height = boxesPtr[startPos + 2] - y; + + // Create new detected box + obj.x = clamp(x, 0.f, static_cast<float>(imgWidth)); + obj.y = clamp(y, 0.f, static_cast<float>(imgHeight)); + obj.height = clamp(height, 0.f, static_cast<float>(imgHeight)); + obj.width = clamp(width, 0.f, static_cast<float>(imgWidth)); + obj.confidence = score; + obj.labelID = classInd; + obj.label = getLabelName(classInd); + + result->objects.push_back(obj); + + } + } + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/detection_model_yolox.cpp b/python/openvino/runtime/common/models/src/detection_model_yolox.cpp new file mode 100644 index 0000000..1e434ff --- /dev/null +++ b/python/openvino/runtime/common/models/src/detection_model_yolox.cpp @@ -0,0 +1,194 @@ +/* +// Copyright (C) 2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/detection_model_yolox.h" + +#include <algorithm> +#include <cmath> +#include <cstdint> +#include <stdexcept> +#include <string> +#include <utility> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/slog.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" +#include "utils/image_utils.h" +#include "utils/nms.hpp" + +ModelYoloX::ModelYoloX(const std::string& modelFileName, + float confidenceThreshold, + float boxIOUThreshold, + const std::vector<std::string>& labels, + const std::string& layout) + : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout), + boxIOUThreshold(boxIOUThreshold) { + resizeMode = RESIZE_KEEP_ASPECT; +} + +void ModelYoloX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + const ov::OutputVector& inputs = model->inputs(); + if (inputs.size() != 1) { + throw std::logic_error("YOLOX model wrapper accepts models that have only 1 input"); + } + + //--- Check image input + const auto& input = model->input(); + const ov::Shape& inputShape = model->input().get_shape(); + ov::Layout inputLayout = getInputLayout(input); + + if (inputShape.size() != 4 && inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("Expected 4D image input with 3 channels"); + } + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); + + ppp.input().model().set_layout(inputLayout); + + //--- Reading image input parameters + inputsNames.push_back(input.get_any_name()); + netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; + netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; + setStridesGrids(); + + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() != 1) { + throw std::logic_error("YoloX model wrapper expects models that have only 1 output"); + } + const auto& output = model->output(); + outputsNames.push_back(output.get_any_name()); + const ov::Shape& shape = output.get_shape(); + + if (shape.size() != 3) { + throw std::logic_error("YOLOX single output must have 3 dimensions, but had " + std::to_string(shape.size())); + } + ppp.output().tensor().set_element_type(ov::element::f32); + + model = ppp.build(); +} + +void ModelYoloX::setStridesGrids() { + std::vector<size_t> strides = {8, 16, 32}; + std::vector<size_t> hsizes(3); + std::vector<size_t> wsizes(3); + + for (size_t i = 0; i < strides.size(); ++i) { + hsizes[i] = netInputHeight / strides[i]; + wsizes[i] = netInputWidth / strides[i]; + } + + for (size_t size_index = 0; size_index < hsizes.size(); ++size_index) { + for (size_t h_index = 0; h_index < hsizes[size_index]; ++h_index) { + for (size_t w_index = 0; w_index < wsizes[size_index]; ++w_index) { + grids.emplace_back(w_index, h_index); + expandedStrides.push_back(strides[size_index]); + } + } + } +} + +std::shared_ptr<InternalModelData> ModelYoloX::preprocess(const InputData& inputData, + ov::InferRequest& request) { + const auto& origImg = inputData.asRef<ImageInputData>().inputImage; + double scale = std::min(static_cast<double>(netInputWidth) / origImg.cols, + static_cast<double>(netInputHeight) / origImg.rows); + + cv::Mat resizedImage = resizeImageExt(origImg, netInputWidth, netInputHeight, resizeMode, + interpolationMode, nullptr, cv::Scalar(114, 114, 114)); + + request.set_input_tensor(wrapMat2Tensor(resizedImage)); + return std::make_shared<InternalScaleData>(origImg.cols, origImg.rows, scale, scale); +} + +std::unique_ptr<ResultBase> ModelYoloX::postprocess(InferenceResult& infResult) { + // Get metadata about input image shape and scale + const auto& scale = infResult.internalModelData->asRef<InternalScaleData>(); + + // Get output tensor + const ov::Tensor& output = infResult.outputsData[outputsNames[0]]; + const auto& outputShape = output.get_shape(); + float* outputPtr = output.data<float>(); + + // Generate detection results + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); + + // Update coordinates according to strides + for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) { + size_t startPos = outputShape[2] * box_index; + outputPtr[startPos] = (outputPtr[startPos] + grids[box_index].first) * expandedStrides[box_index]; + outputPtr[startPos + 1] = (outputPtr[startPos + 1] + grids[box_index].second) * expandedStrides[box_index]; + outputPtr[startPos + 2] = std::exp(outputPtr[startPos + 2]) * expandedStrides[box_index]; + outputPtr[startPos + 3] = std::exp(outputPtr[startPos + 3]) * expandedStrides[box_index]; + } + + // Filter predictions + std::vector<Anchor> validBoxes; + std::vector<float> scores; + std::vector<size_t> classes; + for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) { + size_t startPos = outputShape[2] * box_index; + float score = outputPtr[startPos + 4]; + if (score < confidenceThreshold) + continue; + float maxClassScore = -1; + size_t mainClass = 0; + for (size_t class_index = 0; class_index < numberOfClasses; ++class_index) { + if (outputPtr[startPos + 5 + class_index] > maxClassScore) { + maxClassScore = outputPtr[startPos + 5 + class_index]; + mainClass = class_index; + } + } + + // Filter by score + score *= maxClassScore; + if (score < confidenceThreshold) + continue; + + // Add successful boxes + scores.push_back(score); + classes.push_back(mainClass); + Anchor trueBox = {outputPtr[startPos + 0] - outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] - outputPtr[startPos + 3] / 2, + outputPtr[startPos + 0] + outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] + outputPtr[startPos + 3] / 2}; + validBoxes.push_back(Anchor({trueBox.left / scale.scaleX, trueBox.top / scale.scaleY, + trueBox.right / scale.scaleX, trueBox.bottom / scale.scaleY})); + } + + // NMS for valid boxes + std::vector<int> keep = nms(validBoxes, scores, boxIOUThreshold, true); + for (auto& index: keep) { + // Create new detected box + DetectedObject obj; + obj.x = clamp(validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth)); + obj.y = clamp(validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight)); + obj.height = clamp(validBoxes[index].bottom - validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight)); + obj.width = clamp(validBoxes[index].right - validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth)); + obj.confidence = scores[index]; + obj.labelID = classes[index]; + obj.label = getLabelName(classes[index]); + result->objects.push_back(obj); + } + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/hpe_model_associative_embedding.cpp b/python/openvino/runtime/common/models/src/hpe_model_associative_embedding.cpp new file mode 100644 index 0000000..33a3604 --- /dev/null +++ b/python/openvino/runtime/common/models/src/hpe_model_associative_embedding.cpp @@ -0,0 +1,264 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/hpe_model_associative_embedding.h" + +#include <stddef.h> + +#include <algorithm> +#include <cmath> +#include <map> +#include <stdexcept> +#include <string> +#include <unordered_set> +#include <utility> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <utils/image_utils.h> +#include <utils/ocv_common.hpp> +#include <utils/slog.hpp> + +#include "models/associative_embedding_decoder.h" +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" + +const cv::Vec3f HpeAssociativeEmbedding::meanPixel = cv::Vec3f::all(128); +const float HpeAssociativeEmbedding::detectionThreshold = 0.1f; +const float HpeAssociativeEmbedding::tagThreshold = 1.0f; + +HpeAssociativeEmbedding::HpeAssociativeEmbedding(const std::string& modelFileName, + double aspectRatio, + int targetSize, + float confidenceThreshold, + const std::string& layout, + float delta, + RESIZE_MODE resizeMode) + : ImageModel(modelFileName, false, layout), + aspectRatio(aspectRatio), + targetSize(targetSize), + confidenceThreshold(confidenceThreshold), + delta(delta) { + resizeMode = resizeMode; + interpolationMode = cv::INTER_CUBIC; + } + +void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input Tensors ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("HPE AE model wrapper supports topologies with only 1 input."); + } + inputsNames.push_back(model->input().get_any_name()); + + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 || + inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("3-channel 4-dimensional model's input is expected"); + } + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Prepare output Tensors ----------------------------------------------------- + const ov::OutputVector& outputs = model->outputs(); + if (outputs.size() != 2 && outputs.size() != 3) { + throw std::logic_error("HPE AE model model wrapper supports topologies only with 2 or 3 outputs"); + } + + for (const auto& output : model->outputs()) { + const auto& outTensorName = output.get_any_name(); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32); + + for (const auto& name : output.get_names()) { + outputsNames.push_back(name); + } + + const ov::Shape& outputShape = output.get_shape(); + if (outputShape.size() != 4 && outputShape.size() != 5) { + throw std::logic_error("output tensors are expected to be 4-dimensional or 5-dimensional"); + } + if (outputShape[ov::layout::batch_idx("NC...")] != 1 || outputShape[ov::layout::channels_idx("NC...")] != 17) { + throw std::logic_error("output tensors are expected to have 1 batch size and 17 channels"); + } + } + model = ppp.build(); + + embeddingsTensorName = findTensorByName("embeddings", outputsNames); + heatmapsTensorName = findTensorByName("heatmaps", outputsNames); + try { + nmsHeatmapsTensorName = findTensorByName("nms_heatmaps", outputsNames); + } catch (const std::runtime_error&) { nmsHeatmapsTensorName = heatmapsTensorName; } + + changeInputSize(model); +} + +void HpeAssociativeEmbedding::changeInputSize(std::shared_ptr<ov::Model>& model) { + ov::Shape inputShape = model->input().get_shape(); + const ov::Layout& layout = ov::layout::get_layout(model->input()); + const auto batchId = ov::layout::batch_idx(layout); + const auto heightId = ov::layout::height_idx(layout); + const auto widthId = ov::layout::width_idx(layout); + + if (!targetSize) { + targetSize = static_cast<int>(std::min(inputShape[heightId], inputShape[widthId])); + } + int inputHeight = aspectRatio >= 1.0 ? targetSize : static_cast<int>(std::round(targetSize / aspectRatio)); + int inputWidth = aspectRatio >= 1.0 ? static_cast<int>(std::round(targetSize * aspectRatio)) : targetSize; + int height = static_cast<int>((inputHeight + stride - 1) / stride) * stride; + int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride; + inputShape[batchId] = 1; + inputShape[heightId] = height; + inputShape[widthId] = width; + inputLayerSize = cv::Size(width, height); + + model->reshape(inputShape); +} + +std::shared_ptr<InternalModelData> HpeAssociativeEmbedding::preprocess(const InputData& inputData, + ov::InferRequest& request) { + auto& image = inputData.asRef<ImageInputData>().inputImage; + cv::Rect roi; + auto paddedImage = resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi); + if (inputLayerSize.height - stride >= roi.height || inputLayerSize.width - stride >= roi.width) { + slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl; + } + request.set_input_tensor(wrapMat2Tensor(paddedImage)); + + return std::make_shared<InternalScaleData>(paddedImage.cols, + paddedImage.rows, + image.size().width / static_cast<float>(roi.width), + image.size().height / static_cast<float>(roi.height)); +} + +std::unique_ptr<ResultBase> HpeAssociativeEmbedding::postprocess(InferenceResult& infResult) { + HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData); + + const auto& aembds = infResult.outputsData[embeddingsTensorName]; + const ov::Shape& aembdsShape = aembds.get_shape(); + float* const aembdsMapped = aembds.data<float>(); + std::vector<cv::Mat> aembdsMaps = split(aembdsMapped, aembdsShape); + + const auto& heats = infResult.outputsData[heatmapsTensorName]; + const ov::Shape& heatMapsShape = heats.get_shape(); + float* const heatMapsMapped = heats.data<float>(); + std::vector<cv::Mat> heatMaps = split(heatMapsMapped, heatMapsShape); + + std::vector<cv::Mat> nmsHeatMaps = heatMaps; + if (nmsHeatmapsTensorName != heatmapsTensorName) { + const auto& nmsHeats = infResult.outputsData[nmsHeatmapsTensorName]; + const ov::Shape& nmsHeatMapsShape = nmsHeats.get_shape(); + float* const nmsHeatMapsMapped = nmsHeats.data<float>(); + nmsHeatMaps = split(nmsHeatMapsMapped, nmsHeatMapsShape); + } + std::vector<HumanPose> poses = extractPoses(heatMaps, aembdsMaps, nmsHeatMaps); + + // Rescale poses to the original image + const auto& scale = infResult.internalModelData->asRef<InternalScaleData>(); + const float outputScale = inputLayerSize.width / static_cast<float>(heatMapsShape[3]); + float shiftX = 0.0, shiftY = 0.0; + float scaleX = 1.0, scaleY = 1.0; + + if (resizeMode == RESIZE_KEEP_ASPECT_LETTERBOX) { + scaleX = scaleY = std::min(scale.scaleX, scale.scaleY); + if (aspectRatio >= 1.0) + shiftX = static_cast<float>((targetSize * scaleX * aspectRatio - scale.inputImgWidth * scaleX) / 2); + else + shiftY = static_cast<float>((targetSize * scaleY / aspectRatio - scale.inputImgHeight * scaleY) / 2); + scaleX = scaleY *= outputScale; + } else { + scaleX = scale.scaleX * outputScale; + scaleY = scale.scaleY * outputScale; + } + + for (auto& pose : poses) { + for (auto& keypoint : pose.keypoints) { + if (keypoint != cv::Point2f(-1, -1)) { + keypoint.x = keypoint.x * scaleX + shiftX; + keypoint.y = keypoint.y * scaleY + shiftY; + } + } + result->poses.push_back(pose); + } + + return std::unique_ptr<ResultBase>(result); +} + +std::string HpeAssociativeEmbedding::findTensorByName(const std::string& tensorName, + const std::vector<std::string>& outputsNames) { + std::vector<std::string> suitableLayers; + for (auto& outputName : outputsNames) { + if (outputName.rfind(tensorName, 0) == 0) { + suitableLayers.push_back(outputName); + } + } + if (suitableLayers.empty()) { + throw std::runtime_error("Suitable tensor for " + tensorName + " output is not found"); + } else if (suitableLayers.size() > 1) { + throw std::runtime_error("More than 1 tensor matched to " + tensorName + " output"); + } + return suitableLayers[0]; +} + +std::vector<cv::Mat> HpeAssociativeEmbedding::split(float* data, const ov::Shape& shape) { + std::vector<cv::Mat> flattenData(shape[1]); + for (size_t i = 0; i < flattenData.size(); i++) { + flattenData[i] = cv::Mat(shape[2], shape[3], CV_32FC1, data + i * shape[2] * shape[3]); + } + return flattenData; +} + +std::vector<HumanPose> HpeAssociativeEmbedding::extractPoses(std::vector<cv::Mat>& heatMaps, + const std::vector<cv::Mat>& aembdsMaps, + const std::vector<cv::Mat>& nmsHeatMaps) const { + std::vector<std::vector<Peak>> allPeaks(numJoints); + for (int i = 0; i < numJoints; i++) { + findPeaks(nmsHeatMaps, aembdsMaps, allPeaks, i, maxNumPeople, detectionThreshold); + } + std::vector<Pose> allPoses = matchByTag(allPeaks, maxNumPeople, numJoints, tagThreshold); + // swap for all poses + for (auto& pose : allPoses) { + for (size_t j = 0; j < numJoints; j++) { + Peak& peak = pose.getPeak(j); + std::swap(peak.keypoint.x, peak.keypoint.y); + } + } + std::vector<HumanPose> poses; + for (size_t i = 0; i < allPoses.size(); i++) { + Pose& pose = allPoses[i]; + // Filtering poses with low mean scores + if (pose.getMeanScore() <= confidenceThreshold) { + continue; + } + for (size_t j = 0; j < heatMaps.size(); j++) { + heatMaps[j] = cv::abs(heatMaps[j]); + } + adjustAndRefine(allPoses, heatMaps, aembdsMaps, i, delta); + std::vector<cv::Point2f> keypoints; + for (size_t j = 0; j < numJoints; j++) { + Peak& peak = pose.getPeak(j); + keypoints.push_back(peak.keypoint); + } + poses.push_back({keypoints, pose.getMeanScore()}); + } + return poses; +} diff --git a/python/openvino/runtime/common/models/src/hpe_model_openpose.cpp b/python/openvino/runtime/common/models/src/hpe_model_openpose.cpp new file mode 100644 index 0000000..d8b4cb6 --- /dev/null +++ b/python/openvino/runtime/common/models/src/hpe_model_openpose.cpp @@ -0,0 +1,256 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/hpe_model_openpose.h" + +#include <algorithm> +#include <cmath> +#include <map> +#include <stdexcept> +#include <string> +#include <utility> +#include <vector> + +#include <opencv2/imgproc.hpp> +#include <openvino/openvino.hpp> + +#include <utils/image_utils.h> +#include <utils/ocv_common.hpp> +#include <utils/slog.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/openpose_decoder.h" +#include "models/results.h" + +const cv::Vec3f HPEOpenPose::meanPixel = cv::Vec3f::all(128); +const float HPEOpenPose::minPeaksDistance = 3.0f; +const float HPEOpenPose::midPointsScoreThreshold = 0.05f; +const float HPEOpenPose::foundMidPointsRatioThreshold = 0.8f; +const float HPEOpenPose::minSubsetScore = 0.2f; + +HPEOpenPose::HPEOpenPose(const std::string& modelFileName, + double aspectRatio, + int targetSize, + float confidenceThreshold, + const std::string& layout) + : ImageModel(modelFileName, false, layout), + aspectRatio(aspectRatio), + targetSize(targetSize), + confidenceThreshold(confidenceThreshold) { + resizeMode = RESIZE_KEEP_ASPECT; + interpolationMode = cv::INTER_CUBIC; + } + +void HPEOpenPose::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("HPE OpenPose model wrapper supports topologies with only 1 input"); + } + inputsNames.push_back(model->input().get_any_name()); + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 || + inputShape[ov::layout::channels_idx(inputLayout)] != 3) + throw std::logic_error("3-channel 4-dimensional model's input is expected"); + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Prepare output ----------------------------------------------------- + const ov::OutputVector& outputs = model->outputs(); + if (outputs.size() != 2) { + throw std::runtime_error("HPE OpenPose supports topologies with only 2 outputs"); + } + + const ov::Layout outputLayout("NCHW"); + for (const auto& output : model->outputs()) { + const auto& outTensorName = output.get_any_name(); + ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout); + outputsNames.push_back(outTensorName); + } + model = ppp.build(); + + const size_t batchId = ov::layout::batch_idx(outputLayout); + const size_t channelsId = ov::layout::channels_idx(outputLayout); + const size_t widthId = ov::layout::width_idx(outputLayout); + const size_t heightId = ov::layout::height_idx(outputLayout); + + ov::Shape heatmapsOutputShape = model->outputs().front().get_shape(); + ov::Shape pafsOutputShape = model->outputs().back().get_shape(); + if (heatmapsOutputShape[channelsId] > pafsOutputShape[channelsId]) { + std::swap(heatmapsOutputShape, pafsOutputShape); + std::swap(outputsNames[0], outputsNames[1]); + } + + if (heatmapsOutputShape.size() != 4 || heatmapsOutputShape[batchId] != 1 || + heatmapsOutputShape[ov::layout::channels_idx(outputLayout)] != keypointsNumber + 1) { + throw std::logic_error("1x" + std::to_string(keypointsNumber + 1) + + "xHFMxWFM dimension of model's heatmap is expected"); + } + if (pafsOutputShape.size() != 4 || pafsOutputShape[batchId] != 1 || + pafsOutputShape[channelsId] != 2 * (keypointsNumber + 1)) { + throw std::logic_error("1x" + std::to_string(2 * (keypointsNumber + 1)) + + "xHFMxWFM dimension of model's output is expected"); + } + if (pafsOutputShape[heightId] != heatmapsOutputShape[heightId] || + pafsOutputShape[widthId] != heatmapsOutputShape[widthId]) { + throw std::logic_error("output and heatmap are expected to have matching last two dimensions"); + } + + changeInputSize(model); +} + +void HPEOpenPose::changeInputSize(std::shared_ptr<ov::Model>& model) { + ov::Shape inputShape = model->input().get_shape(); + const ov::Layout& layout = ov::layout::get_layout(model->inputs().front()); + const auto batchId = ov::layout::batch_idx(layout); + const auto heightId = ov::layout::height_idx(layout); + const auto widthId = ov::layout::width_idx(layout); + + if (!targetSize) { + targetSize = inputShape[heightId]; + } + int height = static_cast<int>((targetSize + stride - 1) / stride) * stride; + int inputWidth = static_cast<int>(std::round(targetSize * aspectRatio)); + int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride; + inputShape[batchId] = 1; + inputShape[heightId] = height; + inputShape[widthId] = width; + inputLayerSize = cv::Size(width, height); + model->reshape(inputShape); +} + +std::shared_ptr<InternalModelData> HPEOpenPose::preprocess(const InputData& inputData, ov::InferRequest& request) { + auto& image = inputData.asRef<ImageInputData>().inputImage; + cv::Rect roi; + auto paddedImage = + resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi); + if (inputLayerSize.width < roi.width) + throw std::runtime_error("The image aspect ratio doesn't fit current model shape"); + + if (inputLayerSize.width - stride >= roi.width) { + slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl; + } + + request.set_input_tensor(wrapMat2Tensor(paddedImage)); + return std::make_shared<InternalScaleData>(paddedImage.cols, + paddedImage.rows, + image.cols / static_cast<float>(roi.width), + image.rows / static_cast<float>(roi.height)); +} + +std::unique_ptr<ResultBase> HPEOpenPose::postprocess(InferenceResult& infResult) { + HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData); + + const auto& heatMapsMapped = infResult.outputsData[outputsNames[0]]; + const auto& outputMapped = infResult.outputsData[outputsNames[1]]; + + const ov::Shape& outputShape = outputMapped.get_shape(); + const ov::Shape& heatMapShape = heatMapsMapped.get_shape(); + + float* const predictions = outputMapped.data<float>(); + float* const heats = heatMapsMapped.data<float>(); + + std::vector<cv::Mat> heatMaps(keypointsNumber); + for (size_t i = 0; i < heatMaps.size(); i++) { + heatMaps[i] = + cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, heats + i * heatMapShape[2] * heatMapShape[3]); + } + resizeFeatureMaps(heatMaps); + + std::vector<cv::Mat> pafs(outputShape[1]); + for (size_t i = 0; i < pafs.size(); i++) { + pafs[i] = + cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, predictions + i * heatMapShape[2] * heatMapShape[3]); + } + resizeFeatureMaps(pafs); + + std::vector<HumanPose> poses = extractPoses(heatMaps, pafs); + + const auto& scale = infResult.internalModelData->asRef<InternalScaleData>(); + float scaleX = stride / upsampleRatio * scale.scaleX; + float scaleY = stride / upsampleRatio * scale.scaleY; + for (auto& pose : poses) { + for (auto& keypoint : pose.keypoints) { + if (keypoint != cv::Point2f(-1, -1)) { + keypoint.x *= scaleX; + keypoint.y *= scaleY; + } + } + } + for (size_t i = 0; i < poses.size(); ++i) { + result->poses.push_back(poses[i]); + } + + return std::unique_ptr<ResultBase>(result); +} + +void HPEOpenPose::resizeFeatureMaps(std::vector<cv::Mat>& featureMaps) const { + for (auto& featureMap : featureMaps) { + cv::resize(featureMap, featureMap, cv::Size(), upsampleRatio, upsampleRatio, cv::INTER_CUBIC); + } +} + +class FindPeaksBody : public cv::ParallelLoopBody { +public: + FindPeaksBody(const std::vector<cv::Mat>& heatMaps, + float minPeaksDistance, + std::vector<std::vector<Peak>>& peaksFromHeatMap, + float confidenceThreshold) + : heatMaps(heatMaps), + minPeaksDistance(minPeaksDistance), + peaksFromHeatMap(peaksFromHeatMap), + confidenceThreshold(confidenceThreshold) {} + + void operator()(const cv::Range& range) const override { + for (int i = range.start; i < range.end; i++) { + findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i, confidenceThreshold); + } + } + +private: + const std::vector<cv::Mat>& heatMaps; + float minPeaksDistance; + std::vector<std::vector<Peak>>& peaksFromHeatMap; + float confidenceThreshold; +}; + +std::vector<HumanPose> HPEOpenPose::extractPoses(const std::vector<cv::Mat>& heatMaps, + const std::vector<cv::Mat>& pafs) const { + std::vector<std::vector<Peak>> peaksFromHeatMap(heatMaps.size()); + FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance, peaksFromHeatMap, confidenceThreshold); + cv::parallel_for_(cv::Range(0, static_cast<int>(heatMaps.size())), findPeaksBody); + int peaksBefore = 0; + for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) { + peaksBefore += static_cast<int>(peaksFromHeatMap[heatmapId - 1].size()); + for (auto& peak : peaksFromHeatMap[heatmapId]) { + peak.id += peaksBefore; + } + } + std::vector<HumanPose> poses = groupPeaksToPoses(peaksFromHeatMap, + pafs, + keypointsNumber, + midPointsScoreThreshold, + foundMidPointsRatioThreshold, + minJointsNumber, + minSubsetScore); + return poses; +} diff --git a/python/openvino/runtime/common/models/src/image_model.cpp b/python/openvino/runtime/common/models/src/image_model.cpp new file mode 100644 index 0000000..511faf3 --- /dev/null +++ b/python/openvino/runtime/common/models/src/image_model.cpp @@ -0,0 +1,57 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/image_model.h" + +#include <stdexcept> +#include <vector> + +#include <opencv2/core.hpp> +#include <openvino/openvino.hpp> + +#include <utils/image_utils.h> +#include <utils/ocv_common.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" + +ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout) + : ModelBase(modelFileName, layout), + useAutoResize(useAutoResize) {} + +std::shared_ptr<InternalModelData> ImageModel::preprocess(const InputData& inputData, ov::InferRequest& request) { + const auto& origImg = inputData.asRef<ImageInputData>().inputImage; + auto img = inputTransform(origImg); + + if (!useAutoResize) { + // /* Resize and copy data from the image to the input tensor */ + const ov::Tensor& frameTensor = request.get_tensor(inputsNames[0]); // first input should be image + const ov::Shape& tensorShape = frameTensor.get_shape(); + const ov::Layout layout("NHWC"); + const size_t width = tensorShape[ov::layout::width_idx(layout)]; + const size_t height = tensorShape[ov::layout::height_idx(layout)]; + const size_t channels = tensorShape[ov::layout::channels_idx(layout)]; + if (static_cast<size_t>(img.channels()) != channels) { + throw std::runtime_error("The number of channels for model input and image must match"); + } + if (channels != 1 && channels != 3) { + throw std::runtime_error("Unsupported number of channels"); + } + img = resizeImageExt(img, width, height, resizeMode, interpolationMode); + } + request.set_tensor(inputsNames[0], wrapMat2Tensor(img)); + return std::make_shared<InternalImageModelData>(origImg.cols, origImg.rows); +} diff --git a/python/openvino/runtime/common/models/src/jpeg_restoration_model.cpp b/python/openvino/runtime/common/models/src/jpeg_restoration_model.cpp new file mode 100644 index 0000000..8eb3ae1 --- /dev/null +++ b/python/openvino/runtime/common/models/src/jpeg_restoration_model.cpp @@ -0,0 +1,167 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include <stddef.h> + +#include <algorithm> +#include <memory> +#include <stdexcept> +#include <string> +#include <vector> + +#include <opencv2/core.hpp> +#include <opencv2/imgcodecs.hpp> +#include <opencv2/imgproc.hpp> +#include <openvino/openvino.hpp> + +#include <utils/ocv_common.hpp> +#include <utils/slog.hpp> + +#include "models/image_model.h" +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/jpeg_restoration_model.h" +#include "models/results.h" + +JPEGRestorationModel::JPEGRestorationModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + bool _jpegCompression, + const std::string& layout) + : ImageModel(modelFileName, false, layout) { + netInputHeight = inputImgSize.height; + netInputWidth = inputImgSize.width; + jpegCompression = _jpegCompression; +} + +void JPEGRestorationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output ------------------------------------------------- + // --------------------------- Prepare input ------------------------------------------------------ + if (model->inputs().size() != 1) { + throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 input"); + } + inputsNames.push_back(model->input().get_any_name()); + + const ov::Shape& inputShape = model->input().get_shape(); + const ov::Layout& inputLayout = getInputLayout(model->input()); + + if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 || + inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("3-channel 4-dimensional model's input is expected"); + } + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Prepare output ----------------------------------------------------- + const ov::OutputVector& outputs = model->outputs(); + if (outputs.size() != 1) { + throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 output"); + } + const ov::Shape& outputShape = model->output().get_shape(); + const ov::Layout outputLayout{"NCHW"}; + if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 || + outputShape[ov::layout::channels_idx(outputLayout)] != 3) { + throw std::logic_error("3-channel 4-dimensional model's output is expected"); + } + + outputsNames.push_back(model->output().get_any_name()); + ppp.output().tensor().set_element_type(ov::element::f32); + model = ppp.build(); + + changeInputSize(model); +} + +void JPEGRestorationModel::changeInputSize(std::shared_ptr<ov::Model>& model) { + ov::Shape inputShape = model->input().get_shape(); + const ov::Layout& layout = ov::layout::get_layout(model->input()); + + const auto batchId = ov::layout::batch_idx(layout); + const auto heightId = ov::layout::height_idx(layout); + const auto widthId = ov::layout::width_idx(layout); + + if (inputShape[heightId] % stride || inputShape[widthId] % stride) { + throw std::logic_error("The shape of the model input must be divisible by stride"); + } + + netInputHeight = static_cast<int>((netInputHeight + stride - 1) / stride) * stride; + netInputWidth = static_cast<int>((netInputWidth + stride - 1) / stride) * stride; + + inputShape[batchId] = 1; + inputShape[heightId] = netInputHeight; + inputShape[widthId] = netInputWidth; + + model->reshape(inputShape); +} + +std::shared_ptr<InternalModelData> JPEGRestorationModel::preprocess(const InputData& inputData, + ov::InferRequest& request) { + cv::Mat image = inputData.asRef<ImageInputData>().inputImage; + const size_t h = image.rows; + const size_t w = image.cols; + cv::Mat resizedImage; + if (jpegCompression) { + std::vector<uchar> encimg; + std::vector<int> params{cv::IMWRITE_JPEG_QUALITY, 40}; + cv::imencode(".jpg", image, encimg, params); + image = cv::imdecode(cv::Mat(encimg), 3); + } + + if (netInputHeight - stride < h && h <= netInputHeight && netInputWidth - stride < w && w <= netInputWidth) { + int bottom = netInputHeight - h; + int right = netInputWidth - w; + cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, cv::BORDER_CONSTANT, 0); + } else { + slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl; + cv::resize(image, resizedImage, cv::Size(netInputWidth, netInputHeight)); + } + request.set_input_tensor(wrapMat2Tensor(resizedImage)); + + return std::make_shared<InternalImageModelData>(image.cols, image.rows); +} + +std::unique_ptr<ResultBase> JPEGRestorationModel::postprocess(InferenceResult& infResult) { + ImageResult* result = new ImageResult; + *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult); + + const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>(); + const auto outputData = infResult.getFirstOutputTensor().data<float>(); + + std::vector<cv::Mat> imgPlanes; + const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape(); + const size_t outHeight = static_cast<int>(outputShape[2]); + const size_t outWidth = static_cast<int>(outputShape[3]); + const size_t numOfPixels = outWidth * outHeight; + imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))}; + cv::Mat resultImg; + cv::merge(imgPlanes, resultImg); + + if (netInputHeight - stride < static_cast<size_t>(inputImgSize.inputImgHeight) && + static_cast<size_t>(inputImgSize.inputImgHeight) <= netInputHeight && + netInputWidth - stride < static_cast<size_t>(inputImgSize.inputImgWidth) && + static_cast<size_t>(inputImgSize.inputImgWidth) <= netInputWidth) { + result->resultImage = resultImg(cv::Rect(0, 0, inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); + } else { + cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); + } + + result->resultImage.convertTo(result->resultImage, CV_8UC3, 255); + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/model_base.cpp b/python/openvino/runtime/common/models/src/model_base.cpp new file mode 100644 index 0000000..c2ebd1b --- /dev/null +++ b/python/openvino/runtime/common/models/src/model_base.cpp @@ -0,0 +1,67 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/model_base.h" + +#include <utility> + +#include <openvino/openvino.hpp> + +#include <utils/common.hpp> +#include <utils/config_factory.h> +#include <utils/ocv_common.hpp> +#include <utils/slog.hpp> + +std::shared_ptr<ov::Model> ModelBase::prepareModel(ov::Core& core) { + // --------------------------- Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ + /** Read model **/ + slog::info << "Reading model " << modelFileName << slog::endl; + std::shared_ptr<ov::Model> model = core.read_model(modelFileName); + logBasicModelInfo(model); + // -------------------------- Reading all outputs names and customizing I/O tensors (in inherited classes) + prepareInputsOutputs(model); + + /** Set batch size to 1 **/ + ov::set_batch(model, 1); + + return model; +} + +ov::CompiledModel ModelBase::compileModel(const ModelConfig& config, ov::Core& core) { + this->config = config; + auto model = prepareModel(core); + compiledModel = core.compile_model(model, config.deviceName, config.compiledModelConfig); + logCompiledModelInfo(compiledModel, modelFileName, config.deviceName); + return compiledModel; +} + +ov::Layout ModelBase::getInputLayout(const ov::Output<ov::Node>& input) { + const ov::Shape& inputShape = input.get_shape(); + ov::Layout layout = ov::layout::get_layout(input); + if (layout.empty()) { + if (inputsLayouts.empty()) { + layout = getLayoutFromShape(inputShape); + slog::warn << "Automatically detected layout '" << layout.to_string() << "' for input '" + << input.get_any_name() << "' will be used." << slog::endl; + } else if (inputsLayouts.size() == 1) { + layout = inputsLayouts.begin()->second; + } else { + layout = inputsLayouts[input.get_any_name()]; + } + } + + return layout; +} diff --git a/python/openvino/runtime/common/models/src/openpose_decoder.cpp b/python/openvino/runtime/common/models/src/openpose_decoder.cpp new file mode 100644 index 0000000..6d51607 --- /dev/null +++ b/python/openvino/runtime/common/models/src/openpose_decoder.cpp @@ -0,0 +1,345 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/openpose_decoder.h" + +#include <algorithm> +#include <cmath> +#include <memory> +#include <utility> +#include <vector> + +#include <utils/common.hpp> + +#include "models/results.h" + +Peak::Peak(const int id, const cv::Point2f& pos, const float score) : id(id), pos(pos), score(score) {} + +HumanPoseByPeaksIndices::HumanPoseByPeaksIndices(const int keypointsNumber) + : peaksIndices(std::vector<int>(keypointsNumber, -1)), + nJoints(0), + score(0.0f) {} + +TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score) + : firstJointIdx(firstJointIdx), + secondJointIdx(secondJointIdx), + score(score) {} + +void findPeaks(const std::vector<cv::Mat>& heatMaps, + const float minPeaksDistance, + std::vector<std::vector<Peak>>& allPeaks, + int heatMapId, + float confidenceThreshold) { + std::vector<cv::Point> peaks; + const cv::Mat& heatMap = heatMaps[heatMapId]; + const float* heatMapData = heatMap.ptr<float>(); + size_t heatMapStep = heatMap.step1(); + for (int y = -1; y < heatMap.rows + 1; y++) { + for (int x = -1; x < heatMap.cols + 1; x++) { + float val = 0; + if (x >= 0 && y >= 0 && x < heatMap.cols && y < heatMap.rows) { + val = heatMapData[y * heatMapStep + x]; + val = val >= confidenceThreshold ? val : 0; + } + + float left_val = 0; + if (y >= 0 && x < (heatMap.cols - 1) && y < heatMap.rows) { + left_val = heatMapData[y * heatMapStep + x + 1]; + left_val = left_val >= confidenceThreshold ? left_val : 0; + } + + float right_val = 0; + if (x > 0 && y >= 0 && y < heatMap.rows) { + right_val = heatMapData[y * heatMapStep + x - 1]; + right_val = right_val >= confidenceThreshold ? right_val : 0; + } + + float top_val = 0; + if (x >= 0 && x < heatMap.cols && y < (heatMap.rows - 1)) { + top_val = heatMapData[(y + 1) * heatMapStep + x]; + top_val = top_val >= confidenceThreshold ? top_val : 0; + } + + float bottom_val = 0; + if (x >= 0 && y > 0 && x < heatMap.cols) { + bottom_val = heatMapData[(y - 1) * heatMapStep + x]; + bottom_val = bottom_val >= confidenceThreshold ? bottom_val : 0; + } + + if ((val > left_val) && (val > right_val) && (val > top_val) && (val > bottom_val)) { + peaks.push_back(cv::Point(x, y)); + } + } + } + std::sort(peaks.begin(), peaks.end(), [](const cv::Point& a, const cv::Point& b) { + return a.x < b.x; + }); + std::vector<bool> isActualPeak(peaks.size(), true); + int peakCounter = 0; + std::vector<Peak>& peaksWithScoreAndID = allPeaks[heatMapId]; + for (size_t i = 0; i < peaks.size(); i++) { + if (isActualPeak[i]) { + for (size_t j = i + 1; j < peaks.size(); j++) { + if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) + + (peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) { + isActualPeak[j] = false; + } + } + peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at<float>(peaks[i]))); + } + } +} + +std::vector<HumanPose> groupPeaksToPoses(const std::vector<std::vector<Peak>>& allPeaks, + const std::vector<cv::Mat>& pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore) { + static const std::pair<int, int> limbIdsHeatmap[] = {{2, 3}, + {2, 6}, + {3, 4}, + {4, 5}, + {6, 7}, + {7, 8}, + {2, 9}, + {9, 10}, + {10, 11}, + {2, 12}, + {12, 13}, + {13, 14}, + {2, 1}, + {1, 15}, + {15, 17}, + {1, 16}, + {16, 18}, + {3, 17}, + {6, 18}}; + static const std::pair<int, int> limbIdsPaf[] = {{31, 32}, + {39, 40}, + {33, 34}, + {35, 36}, + {41, 42}, + {43, 44}, + {19, 20}, + {21, 22}, + {23, 24}, + {25, 26}, + {27, 28}, + {29, 30}, + {47, 48}, + {49, 50}, + {53, 54}, + {51, 52}, + {55, 56}, + {37, 38}, + {45, 46}}; + + std::vector<Peak> candidates; + for (const auto& peaks : allPeaks) { + candidates.insert(candidates.end(), peaks.begin(), peaks.end()); + } + std::vector<HumanPoseByPeaksIndices> subset(0, HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t k = 0; k < arraySize(limbIdsPaf); k++) { + std::vector<TwoJointsConnection> connections; + const int mapIdxOffset = keypointsNumber + 1; + std::pair<cv::Mat, cv::Mat> scoreMid = {pafs[limbIdsPaf[k].first - mapIdxOffset], + pafs[limbIdsPaf[k].second - mapIdxOffset]}; + const int idxJointA = limbIdsHeatmap[k].first - 1; + const int idxJointB = limbIdsHeatmap[k].second - 1; + const std::vector<Peak>& candA = allPeaks[idxJointA]; + const std::vector<Peak>& candB = allPeaks[idxJointB]; + const size_t nJointsA = candA.size(); + const size_t nJointsB = candB.size(); + if (nJointsA == 0 && nJointsB == 0) { + continue; + } else if (nJointsA == 0) { + for (size_t i = 0; i < nJointsB; i++) { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) { + if (subset[j].peaksIndices[idxJointB] == candB[i].id) { + num++; + continue; + } + } + if (num == 0) { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointB] = candB[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candB[i].score; + subset.push_back(personKeypoints); + } + } + continue; + } else if (nJointsB == 0) { + for (size_t i = 0; i < nJointsA; i++) { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) { + if (subset[j].peaksIndices[idxJointA] == candA[i].id) { + num++; + continue; + } + } + if (num == 0) { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointA] = candA[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candA[i].score; + subset.push_back(personKeypoints); + } + } + continue; + } + + std::vector<TwoJointsConnection> tempJointConnections; + for (size_t i = 0; i < nJointsA; i++) { + for (size_t j = 0; j < nJointsB; j++) { + cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5; + cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y)); + cv::Point2f vec = candB[j].pos - candA[i].pos; + double norm_vec = cv::norm(vec); + if (norm_vec == 0) { + continue; + } + vec /= norm_vec; + float score = vec.x * scoreMid.first.at<float>(mid) + vec.y * scoreMid.second.at<float>(mid); + int height_n = pafs[0].rows / 2; + float suc_ratio = 0.0f; + float mid_score = 0.0f; + const int mid_num = 10; + const float scoreThreshold = -100.0f; + if (score > scoreThreshold) { + float p_sum = 0; + int p_count = 0; + cv::Size2f step((candB[j].pos.x - candA[i].pos.x) / (mid_num - 1), + (candB[j].pos.y - candA[i].pos.y) / (mid_num - 1)); + for (int n = 0; n < mid_num; n++) { + cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width), + cvRound(candA[i].pos.y + n * step.height)); + cv::Point2f pred(scoreMid.first.at<float>(midPoint), scoreMid.second.at<float>(midPoint)); + score = vec.x * pred.x + vec.y * pred.y; + if (score > midPointsScoreThreshold) { + p_sum += score; + p_count++; + } + } + suc_ratio = static_cast<float>(p_count / mid_num); + float ratio = p_count > 0 ? p_sum / p_count : 0.0f; + mid_score = ratio + static_cast<float>(std::min(height_n / norm_vec - 1, 0.0)); + } + if (mid_score > 0 && suc_ratio > foundMidPointsRatioThreshold) { + tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score)); + } + } + } + if (!tempJointConnections.empty()) { + std::sort(tempJointConnections.begin(), + tempJointConnections.end(), + [](const TwoJointsConnection& a, const TwoJointsConnection& b) { + return (a.score > b.score); + }); + } + size_t num_limbs = std::min(nJointsA, nJointsB); + size_t cnt = 0; + std::vector<int> occurA(nJointsA, 0); + std::vector<int> occurB(nJointsB, 0); + for (size_t row = 0; row < tempJointConnections.size(); row++) { + if (cnt == num_limbs) { + break; + } + const int& indexA = tempJointConnections[row].firstJointIdx; + const int& indexB = tempJointConnections[row].secondJointIdx; + const float& score = tempJointConnections[row].score; + if (occurA[indexA] == 0 && occurB[indexB] == 0) { + connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score)); + cnt++; + occurA[indexA] = 1; + occurB[indexB] = 1; + } + } + if (connections.empty()) { + continue; + } + + bool extraJointConnections = (k == 17 || k == 18); + if (k == 0) { + subset = std::vector<HumanPoseByPeaksIndices>(connections.size(), HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t i = 0; i < connections.size(); i++) { + const int& indexA = connections[i].firstJointIdx; + const int& indexB = connections[i].secondJointIdx; + subset[i].peaksIndices[idxJointA] = indexA; + subset[i].peaksIndices[idxJointB] = indexB; + subset[i].nJoints = 2; + subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + } + } else if (extraJointConnections) { + for (size_t i = 0; i < connections.size(); i++) { + const int& indexA = connections[i].firstJointIdx; + const int& indexB = connections[i].secondJointIdx; + for (size_t j = 0; j < subset.size(); j++) { + if (subset[j].peaksIndices[idxJointA] == indexA && subset[j].peaksIndices[idxJointB] == -1) { + subset[j].peaksIndices[idxJointB] = indexB; + } else if (subset[j].peaksIndices[idxJointB] == indexB && subset[j].peaksIndices[idxJointA] == -1) { + subset[j].peaksIndices[idxJointA] = indexA; + } + } + } + continue; + } else { + for (size_t i = 0; i < connections.size(); i++) { + const int& indexA = connections[i].firstJointIdx; + const int& indexB = connections[i].secondJointIdx; + bool num = false; + for (size_t j = 0; j < subset.size(); j++) { + if (subset[j].peaksIndices[idxJointA] == indexA) { + subset[j].peaksIndices[idxJointB] = indexB; + subset[j].nJoints++; + subset[j].score += candidates[indexB].score + connections[i].score; + num = true; + } + } + if (!num) { + HumanPoseByPeaksIndices hpWithScore(keypointsNumber); + hpWithScore.peaksIndices[idxJointA] = indexA; + hpWithScore.peaksIndices[idxJointB] = indexB; + hpWithScore.nJoints = 2; + hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + subset.push_back(hpWithScore); + } + } + } + } + std::vector<HumanPose> poses; + for (const auto& subsetI : subset) { + if (subsetI.nJoints < minJointsNumber || subsetI.score / subsetI.nJoints < minSubsetScore) { + continue; + } + int position = -1; + HumanPose pose{std::vector<cv::Point2f>(keypointsNumber, cv::Point2f(-1.0f, -1.0f)), + subsetI.score * std::max(0, subsetI.nJoints - 1)}; + for (const auto& peakIdx : subsetI.peaksIndices) { + position++; + if (peakIdx >= 0) { + pose.keypoints[position] = candidates[peakIdx].pos; + pose.keypoints[position].x += 0.5; + pose.keypoints[position].y += 0.5; + } + } + poses.push_back(pose); + } + return poses; +} diff --git a/python/openvino/runtime/common/models/src/segmentation_model.cpp b/python/openvino/runtime/common/models/src/segmentation_model.cpp new file mode 100644 index 0000000..82a153b --- /dev/null +++ b/python/openvino/runtime/common/models/src/segmentation_model.cpp @@ -0,0 +1,157 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/segmentation_model.h" + +#include <stddef.h> +#include <stdint.h> + +#include <fstream> +#include <stdexcept> +#include <string> +#include <vector> + +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <openvino/openvino.hpp> + +#include "models/internal_model_data.h" +#include "models/results.h" + +SegmentationModel::SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout) + : ImageModel(modelFileName, useAutoResize, layout) {} + +std::vector<std::string> SegmentationModel::loadLabels(const std::string& labelFilename) { + std::vector<std::string> labelsList; + + /* Read labels (if any) */ + if (!labelFilename.empty()) { + std::ifstream inputFile(labelFilename); + if (!inputFile.is_open()) + throw std::runtime_error("Can't open the labels file: " + labelFilename); + std::string label; + while (std::getline(inputFile, label)) { + labelsList.push_back(label); + } + if (labelsList.empty()) + throw std::logic_error("File is empty: " + labelFilename); + } + + return labelsList; +} + +void SegmentationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output --------------------------------------------- + // --------------------------- Prepare input ----------------------------------------------------- + if (model->inputs().size() != 1) { + throw std::logic_error("Segmentation model wrapper supports topologies with only 1 input"); + } + const auto& input = model->input(); + inputsNames.push_back(input.get_any_name()); + + const ov::Layout& inputLayout = getInputLayout(input); + const ov::Shape& inputShape = input.get_shape(); + if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("3-channel 4-dimensional model's input is expected"); + } + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); + + if (useAutoResize) { + ppp.input().tensor().set_spatial_dynamic_shape(); + + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + } + + ppp.input().model().set_layout(inputLayout); + model = ppp.build(); + // --------------------------- Prepare output ----------------------------------------------------- + if (model->outputs().size() != 1) { + throw std::logic_error("Segmentation model wrapper supports topologies with only 1 output"); + } + + const auto& output = model->output(); + outputsNames.push_back(output.get_any_name()); + + const ov::Shape& outputShape = output.get_shape(); + ov::Layout outputLayout(""); + switch (outputShape.size()) { + case 3: + outputLayout = "CHW"; + outChannels = 1; + outHeight = static_cast<int>(outputShape[ov::layout::height_idx(outputLayout)]); + outWidth = static_cast<int>(outputShape[ov::layout::width_idx(outputLayout)]); + break; + case 4: + outputLayout = "NCHW"; + outChannels = static_cast<int>(outputShape[ov::layout::channels_idx(outputLayout)]); + outHeight = static_cast<int>(outputShape[ov::layout::height_idx(outputLayout)]); + outWidth = static_cast<int>(outputShape[ov::layout::width_idx(outputLayout)]); + break; + default: + throw std::logic_error("Unexpected output tensor shape. Only 4D and 3D outputs are supported."); + } +} + +std::unique_ptr<ResultBase> SegmentationModel::postprocess(InferenceResult& infResult) { + ImageResult* result = new ImageResult(infResult.frameId, infResult.metaData); + const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>(); + const auto& outTensor = infResult.getFirstOutputTensor(); + + result->resultImage = cv::Mat(outHeight, outWidth, CV_8UC1); + + if (outChannels == 1 && outTensor.get_element_type() == ov::element::i32) { + cv::Mat predictions(outHeight, outWidth, CV_32SC1, outTensor.data<int32_t>()); + predictions.convertTo(result->resultImage, CV_8UC1); + } else if (outChannels == 1 && outTensor.get_element_type() == ov::element::i64) { + cv::Mat predictions(outHeight, outWidth, CV_32SC1); + const auto data = outTensor.data<int64_t>(); + for (size_t i = 0; i < predictions.total(); ++i) { + reinterpret_cast<int32_t*>(predictions.data)[i] = int32_t(data[i]); + } + predictions.convertTo(result->resultImage, CV_8UC1); + } else if (outTensor.get_element_type() == ov::element::f32) { + const float* data = outTensor.data<float>(); + for (int rowId = 0; rowId < outHeight; ++rowId) { + for (int colId = 0; colId < outWidth; ++colId) { + int classId = 0; + float maxProb = -1.0f; + for (int chId = 0; chId < outChannels; ++chId) { + float prob = data[chId * outHeight * outWidth + rowId * outWidth + colId]; + if (prob > maxProb) { + classId = chId; + maxProb = prob; + } + } // nChannels + + result->resultImage.at<uint8_t>(rowId, colId) = classId; + } // width + } // height + } + + cv::resize(result->resultImage, + result->resultImage, + cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight), + 0, + 0, + cv::INTER_NEAREST); + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/style_transfer_model.cpp b/python/openvino/runtime/common/models/src/style_transfer_model.cpp new file mode 100644 index 0000000..53e8561 --- /dev/null +++ b/python/openvino/runtime/common/models/src/style_transfer_model.cpp @@ -0,0 +1,107 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/style_transfer_model.h" + +#include <stddef.h> + +#include <memory> +#include <stdexcept> +#include <string> +#include <vector> + +#include <opencv2/core.hpp> +#include <opencv2/imgproc.hpp> +#include <openvino/openvino.hpp> + +#include <utils/image_utils.h> +#include <utils/ocv_common.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" + +StyleTransferModel::StyleTransferModel(const std::string& modelFileName, const std::string& layout) + : ImageModel(modelFileName, false, layout) {} + +void StyleTransferModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output --------------------------------------------- + // --------------------------- Prepare input -------------------------------------------------- + if (model->inputs().size() != 1) { + throw std::logic_error("Style transfer model wrapper supports topologies with only 1 input"); + } + + inputsNames.push_back(model->input().get_any_name()); + + const ov::Shape& inputShape = model->input().get_shape(); + ov::Layout inputLayout = getInputLayout(model->input()); + + if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 || + inputShape[ov::layout::channels_idx(inputLayout)] != 3) { + throw std::logic_error("3-channel 4-dimensional model's input is expected"); + } + + netInputWidth = inputShape[ov::layout::width_idx(inputLayout)]; + netInputHeight = inputShape[ov::layout::height_idx(inputLayout)]; + + ov::preprocess::PrePostProcessor ppp(model); + ppp.input().preprocess().convert_element_type(ov::element::f32); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); + + ppp.input().model().set_layout(inputLayout); + + // --------------------------- Prepare output ----------------------------------------------------- + const ov::OutputVector& outputs = model->outputs(); + if (outputs.size() != 1) { + throw std::logic_error("Style transfer model wrapper supports topologies with only 1 output"); + } + outputsNames.push_back(model->output().get_any_name()); + + const ov::Shape& outputShape = model->output().get_shape(); + ov::Layout outputLayout{"NCHW"}; + if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 || + outputShape[ov::layout::channels_idx(outputLayout)] != 3) { + throw std::logic_error("3-channel 4-dimensional model's output is expected"); + } + + ppp.output().tensor().set_element_type(ov::element::f32); + model = ppp.build(); +} + +std::unique_ptr<ResultBase> StyleTransferModel::postprocess(InferenceResult& infResult) { + ImageResult* result = new ImageResult; + *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult); + + const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>(); + const auto outputData = infResult.getFirstOutputTensor().data<float>(); + + const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape(); + size_t outHeight = static_cast<int>(outputShape[2]); + size_t outWidth = static_cast<int>(outputShape[3]); + size_t numOfPixels = outWidth * outHeight; + + std::vector<cv::Mat> imgPlanes; + imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))}; + cv::Mat resultImg; + cv::merge(imgPlanes, resultImg); + cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight)); + + result->resultImage.convertTo(result->resultImage, CV_8UC3); + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/models/src/super_resolution_model.cpp b/python/openvino/runtime/common/models/src/super_resolution_model.cpp new file mode 100644 index 0000000..164991a --- /dev/null +++ b/python/openvino/runtime/common/models/src/super_resolution_model.cpp @@ -0,0 +1,207 @@ +/* +// Copyright (C) 2021-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "models/super_resolution_model.h" + +#include <stddef.h> + +#include <map> +#include <stdexcept> +#include <string> +#include <utility> +#include <vector> + +#include <opencv2/imgproc.hpp> +#include <openvino/openvino.hpp> + +#include <utils/image_utils.h> +#include <utils/ocv_common.hpp> +#include <utils/slog.hpp> + +#include "models/input_data.h" +#include "models/internal_model_data.h" +#include "models/results.h" + +SuperResolutionModel::SuperResolutionModel(const std::string& modelFileName, + const cv::Size& inputImgSize, + const std::string& layout) + : ImageModel(modelFileName, false, layout) { + netInputHeight = inputImgSize.height; + netInputWidth = inputImgSize.width; +} + +void SuperResolutionModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { + // --------------------------- Configure input & output --------------------------------------------- + // --------------------------- Prepare input -------------------------------------------------- + const ov::OutputVector& inputs = model->inputs(); + if (inputs.size() != 1 && inputs.size() != 2) { + throw std::logic_error("Super resolution model wrapper supports topologies with 1 or 2 inputs only"); + } + std::string lrInputTensorName = inputs.begin()->get_any_name(); + inputsNames.push_back(lrInputTensorName); + ov::Shape lrShape = inputs.begin()->get_shape(); + if (lrShape.size() != 4) { + throw std::logic_error("Number of dimensions for an input must be 4"); + } + // in case of 2 inputs they have the same layouts + ov::Layout inputLayout = getInputLayout(model->inputs().front()); + + auto channelsId = ov::layout::channels_idx(inputLayout); + auto heightId = ov::layout::height_idx(inputLayout); + auto widthId = ov::layout::width_idx(inputLayout); + + if (lrShape[channelsId] != 1 && lrShape[channelsId] != 3) { + throw std::logic_error("Input layer is expected to have 1 or 3 channels"); + } + + // A model like single-image-super-resolution-???? may take bicubic interpolation of the input image as the + // second input + std::string bicInputTensorName; + if (inputs.size() == 2) { + bicInputTensorName = (++inputs.begin())->get_any_name(); + inputsNames.push_back(bicInputTensorName); + ov::Shape bicShape = (++inputs.begin())->get_shape(); + if (bicShape.size() != 4) { + throw std::logic_error("Number of dimensions for both inputs must be 4"); + } + if (lrShape[widthId] >= bicShape[widthId] && lrShape[heightId] >= bicShape[heightId]) { + std::swap(bicShape, lrShape); + inputsNames[0].swap(inputsNames[1]); + } else if (!(lrShape[widthId] <= bicShape[widthId] && lrShape[heightId] <= bicShape[heightId])) { + throw std::logic_error("Each spatial dimension of one input must surpass or be equal to a spatial" + "dimension of another input"); + } + } + + ov::preprocess::PrePostProcessor ppp(model); + for (const auto& input : inputs) { + ppp.input(input.get_any_name()).tensor().set_element_type(ov::element::u8).set_layout("NHWC"); + + ppp.input(input.get_any_name()).model().set_layout(inputLayout); + } + + // --------------------------- Prepare output ----------------------------------------------------- + const ov::OutputVector& outputs = model->outputs(); + if (outputs.size() != 1) { + throw std::logic_error("Super resolution model wrapper supports topologies with only 1 output"); + } + + outputsNames.push_back(outputs.begin()->get_any_name()); + ppp.output().tensor().set_element_type(ov::element::f32); + model = ppp.build(); + + const ov::Shape& outShape = model->output().get_shape(); + + const ov::Layout outputLayout("NCHW"); + const auto outWidth = outShape[ov::layout::width_idx(outputLayout)]; + const auto inWidth = lrShape[ov::layout::width_idx(outputLayout)]; + changeInputSize(model, static_cast<int>(outWidth / inWidth)); +} + +void SuperResolutionModel::changeInputSize(std::shared_ptr<ov::Model>& model, int coeff) { + std::map<std::string, ov::PartialShape> shapes; + const ov::Layout& layout = ov::layout::get_layout(model->inputs().front()); + const auto batchId = ov::layout::batch_idx(layout); + const auto heightId = ov::layout::height_idx(layout); + const auto widthId = ov::layout::width_idx(layout); + + const ov::OutputVector& inputs = model->inputs(); + std::string lrInputTensorName = inputs.begin()->get_any_name(); + ov::Shape lrShape = inputs.begin()->get_shape(); + + if (inputs.size() == 2) { + std::string bicInputTensorName = (++inputs.begin())->get_any_name(); + ov::Shape bicShape = (++inputs.begin())->get_shape(); + if (lrShape[heightId] >= bicShape[heightId] && lrShape[widthId] >= bicShape[widthId]) { + std::swap(bicShape, lrShape); + std::swap(bicInputTensorName, lrInputTensorName); + } + bicShape[batchId] = 1; + bicShape[heightId] = coeff * netInputHeight; + bicShape[widthId] = coeff * netInputWidth; + shapes[bicInputTensorName] = ov::PartialShape(bicShape); + } + + lrShape[batchId] = 1; + lrShape[heightId] = netInputHeight; + lrShape[widthId] = netInputWidth; + shapes[lrInputTensorName] = ov::PartialShape(lrShape); + + model->reshape(shapes); +} + +std::shared_ptr<InternalModelData> SuperResolutionModel::preprocess(const InputData& inputData, + ov::InferRequest& request) { + auto imgData = inputData.asRef<ImageInputData>(); + auto& img = imgData.inputImage; + + const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]); + const ov::Layout layout("NHWC"); + + if (img.channels() != static_cast<int>(lrInputTensor.get_shape()[ov::layout::channels_idx(layout)])) { + cv::cvtColor(img, img, cv::COLOR_BGR2GRAY); + } + + if (static_cast<size_t>(img.cols) != netInputWidth || static_cast<size_t>(img.rows) != netInputHeight) { + slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl; + } + const size_t height = lrInputTensor.get_shape()[ov::layout::height_idx(layout)]; + const size_t width = lrInputTensor.get_shape()[ov::layout::width_idx(layout)]; + img = resizeImageExt(img, width, height); + request.set_tensor(inputsNames[0], wrapMat2Tensor(img)); + + if (inputsNames.size() == 2) { + const ov::Tensor bicInputTensor = request.get_tensor(inputsNames[1]); + const int h = static_cast<int>(bicInputTensor.get_shape()[ov::layout::height_idx(layout)]); + const int w = static_cast<int>(bicInputTensor.get_shape()[ov::layout::width_idx(layout)]); + cv::Mat resized; + cv::resize(img, resized, cv::Size(w, h), 0, 0, cv::INTER_CUBIC); + request.set_tensor(inputsNames[1], wrapMat2Tensor(resized)); + } + + return std::make_shared<InternalImageModelData>(img.cols, img.rows); +} + +std::unique_ptr<ResultBase> SuperResolutionModel::postprocess(InferenceResult& infResult) { + ImageResult* result = new ImageResult; + *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult); + const auto outputData = infResult.getFirstOutputTensor().data<float>(); + + std::vector<cv::Mat> imgPlanes; + const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape(); + const size_t outChannels = static_cast<int>(outShape[1]); + const size_t outHeight = static_cast<int>(outShape[2]); + const size_t outWidth = static_cast<int>(outShape[3]); + const size_t numOfPixels = outWidth * outHeight; + if (outChannels == 3) { + imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])), + cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))}; + } else { + imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))}; + // Post-processing for text-image-super-resolution models + cv::threshold(imgPlanes[0], imgPlanes[0], 0.5f, 1.0f, cv::THRESH_BINARY); + } + + for (auto& img : imgPlanes) { + img.convertTo(img, CV_8UC1, 255); + } + cv::Mat resultImg; + cv::merge(imgPlanes, resultImg); + result->resultImage = resultImg; + + return std::unique_ptr<ResultBase>(result); +} diff --git a/python/openvino/runtime/common/monitors/CMakeLists.txt b/python/openvino/runtime/common/monitors/CMakeLists.txt new file mode 100644 index 0000000..1bfe0b9 --- /dev/null +++ b/python/openvino/runtime/common/monitors/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (C) 2018-2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +find_package(OpenCV REQUIRED COMPONENTS core imgproc) + +set(SOURCES + src/cpu_monitor.cpp + src/memory_monitor.cpp + src/presenter.cpp) + +set(HEADERS + include/monitors/cpu_monitor.h + include/monitors/memory_monitor.h + include/monitors/presenter.h) + +if(WIN32) + list(APPEND SOURCES src/query_wrapper.cpp) + list(APPEND HEADERS include/monitors/query_wrapper.h) +endif() +# Create named folders for the sources within the .vcproj +# Empty name lists them directly under the .vcproj +source_group("src" FILES ${SOURCES}) +source_group("include" FILES ${HEADERS}) + +add_library(monitors STATIC ${SOURCES} ${HEADERS}) +target_include_directories(monitors PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +target_link_libraries(monitors PRIVATE opencv_core opencv_imgproc) +if(WIN32) + target_link_libraries(monitors PRIVATE pdh) + + target_compile_definitions(monitors PRIVATE + # Prevents Windows.h from adding unnecessary includes + WIN32_LEAN_AND_MEAN + # Prevents Windows.h from defining min/max as macros + NOMINMAX + ) +endif() diff --git a/python/openvino/runtime/common/monitors/include/monitors/cpu_monitor.h b/python/openvino/runtime/common/monitors/include/monitors/cpu_monitor.h new file mode 100644 index 0000000..38d2845 --- /dev/null +++ b/python/openvino/runtime/common/monitors/include/monitors/cpu_monitor.h @@ -0,0 +1,28 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <deque> +#include <memory> +#include <vector> + +class CpuMonitor { +public: + CpuMonitor(); + ~CpuMonitor(); + void setHistorySize(std::size_t size); + std::size_t getHistorySize() const; + void collectData(); + std::deque<std::vector<double>> getLastHistory() const; + std::vector<double> getMeanCpuLoad() const; + +private: + unsigned samplesNumber; + unsigned historySize; + std::vector<double> cpuLoadSum; + std::deque<std::vector<double>> cpuLoadHistory; + class PerformanceCounter; + std::unique_ptr<PerformanceCounter> performanceCounter; +}; diff --git a/python/openvino/runtime/common/monitors/include/monitors/memory_monitor.h b/python/openvino/runtime/common/monitors/include/monitors/memory_monitor.h new file mode 100644 index 0000000..9eda10f --- /dev/null +++ b/python/openvino/runtime/common/monitors/include/monitors/memory_monitor.h @@ -0,0 +1,34 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <deque> +#include <memory> + +class MemoryMonitor { +public: + MemoryMonitor(); + ~MemoryMonitor(); + void setHistorySize(std::size_t size); + std::size_t getHistorySize() const; + void collectData(); + std::deque<std::pair<double, double>> getLastHistory() const; + double getMeanMem() const; // in GiB + double getMeanSwap() const; + double getMaxMem() const; + double getMaxSwap() const; + double getMemTotal() const; + double getMaxMemTotal() const; // a system may have hotpluggable memory +private: + unsigned samplesNumber; + std::size_t historySize; + double memSum, swapSum; + double maxMem, maxSwap; + double memTotal; + double maxMemTotal; + std::deque<std::pair<double, double>> memSwapUsageHistory; + class PerformanceCounter; + std::unique_ptr<PerformanceCounter> performanceCounter; +}; diff --git a/python/openvino/runtime/common/monitors/include/monitors/presenter.h b/python/openvino/runtime/common/monitors/include/monitors/presenter.h new file mode 100644 index 0000000..c6587a0 --- /dev/null +++ b/python/openvino/runtime/common/monitors/include/monitors/presenter.h @@ -0,0 +1,44 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <chrono> +#include <map> +#include <ostream> +#include <set> + +#include <opencv2/imgproc.hpp> + +#include "cpu_monitor.h" +#include "memory_monitor.h" + +enum class MonitorType{CpuAverage, DistributionCpu, Memory}; + +class Presenter { +public: + explicit Presenter(std::set<MonitorType> enabledMonitors = {}, + int yPos = 20, + cv::Size graphSize = {150, 60}, + std::size_t historySize = 20); + explicit Presenter(const std::string& keys, + int yPos = 20, + cv::Size graphSize = {150, 60}, + std::size_t historySize = 20); + void addRemoveMonitor(MonitorType monitor); + void handleKey(int key); // handles C, D, M, H keys + void drawGraphs(cv::Mat& frame); + std::vector<std::string> reportMeans() const; + + const int yPos; + const cv::Size graphSize; + const int graphPadding; +private: + std::chrono::steady_clock::time_point prevTimeStamp; + std::size_t historySize; + CpuMonitor cpuMonitor; + bool distributionCpuEnabled; + MemoryMonitor memoryMonitor; + std::ostringstream strStream; +}; diff --git a/python/openvino/runtime/common/monitors/include/monitors/query_wrapper.h b/python/openvino/runtime/common/monitors/include/monitors/query_wrapper.h new file mode 100644 index 0000000..d69f548 --- /dev/null +++ b/python/openvino/runtime/common/monitors/include/monitors/query_wrapper.h @@ -0,0 +1,17 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <Pdh.h> +class QueryWrapper { +public: + QueryWrapper(); + ~QueryWrapper(); + QueryWrapper(const QueryWrapper&) = delete; + QueryWrapper& operator=(const QueryWrapper&) = delete; + operator PDH_HQUERY() const; +private: + PDH_HQUERY query; +}; diff --git a/python/openvino/runtime/common/monitors/src/cpu_monitor.cpp b/python/openvino/runtime/common/monitors/src/cpu_monitor.cpp new file mode 100644 index 0000000..e5172a2 --- /dev/null +++ b/python/openvino/runtime/common/monitors/src/cpu_monitor.cpp @@ -0,0 +1,206 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "monitors/cpu_monitor.h" + +#include <algorithm> +#ifdef _WIN32 +#include "monitors/query_wrapper.h" +#include <string> +#include <system_error> +#include <PdhMsg.h> +#include <Windows.h> + +namespace { +const std::size_t nCores = []() { + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + return sysinfo.dwNumberOfProcessors; + }(); +} + +class CpuMonitor::PerformanceCounter { +public: + PerformanceCounter() : coreTimeCounters(nCores) { + PDH_STATUS status; + for (std::size_t i = 0; i < nCores; ++i) { + std::wstring fullCounterPath{L"\\Processor(" + std::to_wstring(i) + L")\\% Processor Time"}; + status = PdhAddCounterW(query, fullCounterPath.c_str(), 0, &coreTimeCounters[i]); + if (ERROR_SUCCESS != status) { + throw std::system_error(status, std::system_category(), "PdhAddCounterW() failed"); + } + status = PdhSetCounterScaleFactor(coreTimeCounters[i], -2); // scale counter to [0, 1] + if (ERROR_SUCCESS != status) { + throw std::system_error(status, std::system_category(), "PdhSetCounterScaleFactor() failed"); + } + } + status = PdhCollectQueryData(query); + if (ERROR_SUCCESS != status) { + throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed"); + } + } + + std::vector<double> getCpuLoad() { + PDH_STATUS status; + status = PdhCollectQueryData(query); + if (ERROR_SUCCESS != status) { + throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed"); + } + + PDH_FMT_COUNTERVALUE displayValue; + std::vector<double> cpuLoad(coreTimeCounters.size()); + for (std::size_t i = 0; i < coreTimeCounters.size(); ++i) { + status = PdhGetFormattedCounterValue(coreTimeCounters[i], PDH_FMT_DOUBLE, NULL, + &displayValue); + switch (status) { + case ERROR_SUCCESS: break; + // PdhGetFormattedCounterValue() can sometimes return PDH_CALC_NEGATIVE_DENOMINATOR for some reason + case PDH_CALC_NEGATIVE_DENOMINATOR: return {}; + default: + throw std::system_error(status, std::system_category(), "PdhGetFormattedCounterValue() failed"); + } + if (PDH_CSTATUS_VALID_DATA != displayValue.CStatus && PDH_CSTATUS_NEW_DATA != displayValue.CStatus) { + throw std::runtime_error("Error in counter data"); + } + + cpuLoad[i] = displayValue.doubleValue; + } + return cpuLoad; + } + +private: + QueryWrapper query; + std::vector<PDH_HCOUNTER> coreTimeCounters; +}; + +#elif __linux__ +#include <chrono> +#include <regex> +#include <utility> +#include <fstream> +#include <unistd.h> + +namespace { +const long clockTicks = sysconf(_SC_CLK_TCK); + +const std::size_t nCores = sysconf(_SC_NPROCESSORS_CONF); + +std::vector<unsigned long> getIdleCpuStat() { + std::vector<unsigned long> idleCpuStat(nCores); + std::ifstream procStat("/proc/stat"); + std::string line; + std::smatch match; + std::regex coreJiffies("^cpu(\\d+)\\s+" + "(\\d+)\\s+" + "(\\d+)\\s+" + "(\\d+)\\s+" + "(\\d+)\\s+" // idle + "(\\d+)"); // iowait + + while (std::getline(procStat, line)) { + if (std::regex_search(line, match, coreJiffies)) { + // it doesn't handle overflow of sum and overflows of /proc/stat values + unsigned long idleInfo = stoul(match[5]) + stoul(match[6]), + coreId = stoul(match[1]); + if (nCores <= coreId) { + throw std::runtime_error("The number of cores has changed"); + } + idleCpuStat[coreId] = idleInfo; + } + } + return idleCpuStat; +} +} + +class CpuMonitor::PerformanceCounter { +public: + PerformanceCounter() : prevIdleCpuStat{getIdleCpuStat()}, prevTimePoint{std::chrono::steady_clock::now()} {} + + std::vector<double> getCpuLoad() { + std::vector<unsigned long> idleCpuStat = getIdleCpuStat(); + auto timePoint = std::chrono::steady_clock::now(); + // don't update data too frequently which may result in negative values for cpuLoad. + // It may happen when collectData() is called just after setHistorySize(). + if (timePoint - prevTimePoint > std::chrono::milliseconds{100}) { + std::vector<double> cpuLoad(nCores); + for (std::size_t i = 0; i < idleCpuStat.size(); ++i) { + double idleDiff = idleCpuStat[i] - prevIdleCpuStat[i]; + typedef std::chrono::duration<double, std::chrono::seconds::period> Sec; + cpuLoad[i] = 1.0 + - idleDiff / clockTicks / std::chrono::duration_cast<Sec>(timePoint - prevTimePoint).count(); + } + prevIdleCpuStat = std::move(idleCpuStat); + prevTimePoint = timePoint; + return cpuLoad; + } + return {}; + } +private: + std::vector<unsigned long> prevIdleCpuStat; + std::chrono::steady_clock::time_point prevTimePoint; +}; + +#else +// not implemented +namespace { +const std::size_t nCores{0}; +} + +class CpuMonitor::PerformanceCounter { +public: + std::vector<double> getCpuLoad() {return {};}; +}; +#endif + +CpuMonitor::CpuMonitor() : + samplesNumber{0}, + historySize{0}, + cpuLoadSum(nCores, 0) {} + +// PerformanceCounter is incomplete in header and destructor can't be defined implicitly +CpuMonitor::~CpuMonitor() = default; + +void CpuMonitor::setHistorySize(std::size_t size) { + if (0 == historySize && 0 != size) { + performanceCounter.reset(new PerformanceCounter); + } else if (0 != historySize && 0 == size) { + performanceCounter.reset(); + } + historySize = size; + std::ptrdiff_t newSize = static_cast<std::ptrdiff_t>(std::min(size, cpuLoadHistory.size())); + cpuLoadHistory.erase(cpuLoadHistory.begin(), cpuLoadHistory.end() - newSize); +} + +void CpuMonitor::collectData() { + std::vector<double> cpuLoad = performanceCounter->getCpuLoad(); + + if (!cpuLoad.empty()) { + for (std::size_t i = 0; i < cpuLoad.size(); ++i) { + cpuLoadSum[i] += cpuLoad[i]; + } + ++samplesNumber; + + cpuLoadHistory.push_back(std::move(cpuLoad)); + if (cpuLoadHistory.size() > historySize) { + cpuLoadHistory.pop_front(); + } + } +} + +std::size_t CpuMonitor::getHistorySize() const { + return historySize; +} + +std::deque<std::vector<double>> CpuMonitor::getLastHistory() const { + return cpuLoadHistory; +} + +std::vector<double> CpuMonitor::getMeanCpuLoad() const { + std::vector<double> meanCpuLoad; + meanCpuLoad.reserve(cpuLoadSum.size()); + for (double coreLoad : cpuLoadSum) { + meanCpuLoad.push_back(samplesNumber ? coreLoad / samplesNumber : 0); + } + return meanCpuLoad; +} diff --git a/python/openvino/runtime/common/monitors/src/memory_monitor.cpp b/python/openvino/runtime/common/monitors/src/memory_monitor.cpp new file mode 100644 index 0000000..70879d6 --- /dev/null +++ b/python/openvino/runtime/common/monitors/src/memory_monitor.cpp @@ -0,0 +1,213 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "monitors/memory_monitor.h" + +struct MemState { + double memTotal, usedMem, usedSwap; +}; + +#ifdef _WIN32 +#include "monitors/query_wrapper.h" +#include <algorithm> +#define PSAPI_VERSION 2 +#include <system_error> +#include <Windows.h> +#include <PdhMsg.h> +#include <Psapi.h> + +namespace { +double getMemTotal() { + PERFORMANCE_INFORMATION performanceInformation; + if (!GetPerformanceInfo(&performanceInformation, sizeof(performanceInformation))) { + throw std::runtime_error("GetPerformanceInfo() failed"); + } + return static_cast<double>(performanceInformation.PhysicalTotal * performanceInformation.PageSize) + / (1024 * 1024 * 1024); +} +} + +class MemoryMonitor::PerformanceCounter { +public: + PerformanceCounter() { + PDH_STATUS status = PdhAddCounterW(query, L"\\Paging File(_Total)\\% Usage", 0, &pagingFileUsageCounter); + if (ERROR_SUCCESS != status) { + throw std::system_error(status, std::system_category(), "PdhAddCounterW() failed"); + } + status = PdhSetCounterScaleFactor(pagingFileUsageCounter, -2); // scale counter to [0, 1] + if (ERROR_SUCCESS != status) { + throw std::system_error(status, std::system_category(), "PdhSetCounterScaleFactor() failed"); + } + } + + MemState getMemState() { + PERFORMANCE_INFORMATION performanceInformation; + if (!GetPerformanceInfo(&performanceInformation, sizeof(performanceInformation))) { + throw std::runtime_error("GetPerformanceInfo() failed"); + } + + PDH_STATUS status; + status = PdhCollectQueryData(query); + if (ERROR_SUCCESS != status) { + throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed"); + } + PDH_FMT_COUNTERVALUE displayValue; + status = PdhGetFormattedCounterValue(pagingFileUsageCounter, PDH_FMT_DOUBLE, NULL, &displayValue); + if (ERROR_SUCCESS != status) { + throw std::system_error(status, std::system_category(), "PdhGetFormattedCounterValue() failed"); + } + if (PDH_CSTATUS_VALID_DATA != displayValue.CStatus && PDH_CSTATUS_NEW_DATA != displayValue.CStatus) { + throw std::runtime_error("Error in counter data"); + } + + double pagingFilesSize = static_cast<double>( + (performanceInformation.CommitLimit - performanceInformation.PhysicalTotal) + * performanceInformation.PageSize) / (1024 * 1024 * 1024); + return {static_cast<double>(performanceInformation.PhysicalTotal * performanceInformation.PageSize) + / (1024 * 1024 * 1024), + static_cast<double>( + (performanceInformation.PhysicalTotal - performanceInformation.PhysicalAvailable) + * performanceInformation.PageSize) / (1024 * 1024 * 1024), + pagingFilesSize * displayValue.doubleValue}; + } +private: + QueryWrapper query; + PDH_HCOUNTER pagingFileUsageCounter; +}; + +#elif __linux__ +#include <fstream> +#include <utility> +#include <vector> +#include <regex> + +namespace { +std::pair<std::pair<double, double>, std::pair<double, double>> getAvailableMemSwapTotalMemSwap() { + double memAvailable = 0, swapFree = 0, memTotal = 0, swapTotal = 0; + std::regex memRegex("^(.+):\\s+(\\d+) kB$"); + std::string line; + std::smatch match; + std::ifstream meminfo("/proc/meminfo"); + while (std::getline(meminfo, line)) { + if (std::regex_match(line, match, memRegex)) { + if ("MemAvailable" == match[1]) { + memAvailable = stod(match[2]) / (1024 * 1024); + } else if ("SwapFree" == match[1]) { + swapFree = stod(match[2]) / (1024 * 1024); + } else if ("MemTotal" == match[1]) { + memTotal = stod(match[2]) / (1024 * 1024); + } else if ("SwapTotal" == match[1]) { + swapTotal = stod(match[2]) / (1024 * 1024); + } + } + } + if (0 == memTotal) { + throw std::runtime_error("Can't get MemTotal"); + } + return {{memAvailable, swapFree}, {memTotal, swapTotal}}; +} + +double getMemTotal() { + return getAvailableMemSwapTotalMemSwap().second.first; +} +} + +class MemoryMonitor::PerformanceCounter { +public: + MemState getMemState() { + std::pair<std::pair<double, double>, std::pair<double, double>> availableMemSwapTotalMemSwap + = getAvailableMemSwapTotalMemSwap(); + double memTotal = availableMemSwapTotalMemSwap.second.first; + double swapTotal = availableMemSwapTotalMemSwap.second.second; + return {memTotal, memTotal - availableMemSwapTotalMemSwap.first.first, swapTotal - availableMemSwapTotalMemSwap.first.second}; + } +}; + +#else +// not implemented +namespace { +double getMemTotal() {return 0.0;} +} + +class MemoryMonitor::PerformanceCounter { +public: + MemState getMemState() {return {0.0, 0.0, 0.0};} +}; +#endif + +MemoryMonitor::MemoryMonitor() : + samplesNumber{0}, + historySize{0}, + memSum{0.0}, + swapSum{0.0}, + maxMem{0.0}, + maxSwap{0.0}, + memTotal{0.0}, + maxMemTotal{0.0} {} + +// PerformanceCounter is incomplete in header and destructor can't be defined implicitly +MemoryMonitor::~MemoryMonitor() = default; + +void MemoryMonitor::setHistorySize(std::size_t size) { + if (0 == historySize && 0 != size) { + performanceCounter.reset(new MemoryMonitor::PerformanceCounter); + // memTotal is not initialized in constructor because for linux its initialization involves constructing + // std::regex which is unimplemented and throws an exception for gcc 4.8.5 (default for CentOS 7.4). + // Delaying initialization triggers the error only when the monitor is used + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53631 + memTotal = ::getMemTotal(); + } else if (0 != historySize && 0 == size) { + performanceCounter.reset(); + } + historySize = size; + std::size_t newSize = std::min(size, memSwapUsageHistory.size()); + memSwapUsageHistory.erase(memSwapUsageHistory.begin(), memSwapUsageHistory.end() - newSize); +} + +void MemoryMonitor::collectData() { + MemState memState = performanceCounter->getMemState(); + maxMemTotal = std::max(maxMemTotal, memState.memTotal); + memSum += memState.usedMem; + swapSum += memState.usedSwap; + ++samplesNumber; + maxMem = std::max(maxMem, memState.usedMem); + maxSwap = std::max(maxSwap, memState.usedSwap); + + memSwapUsageHistory.emplace_back(memState.usedMem, memState.usedSwap); + if (memSwapUsageHistory.size() > historySize) { + memSwapUsageHistory.pop_front(); + } +} + +std::size_t MemoryMonitor::getHistorySize() const { + return historySize; +} + +std::deque<std::pair<double, double>> MemoryMonitor::getLastHistory() const { + return memSwapUsageHistory; +} + +double MemoryMonitor::getMeanMem() const { + return samplesNumber ? memSum / samplesNumber : 0; +} + +double MemoryMonitor::getMeanSwap() const { + return samplesNumber ? swapSum / samplesNumber : 0; +} + +double MemoryMonitor::getMaxMem() const { + return maxMem; +} + +double MemoryMonitor::getMaxSwap() const { + return maxSwap; +} + +double MemoryMonitor::getMemTotal() const { + return memTotal; +} + +double MemoryMonitor::getMaxMemTotal() const { + return maxMemTotal; +} diff --git a/python/openvino/runtime/common/monitors/src/presenter.cpp b/python/openvino/runtime/common/monitors/src/presenter.cpp new file mode 100644 index 0000000..61f5e15 --- /dev/null +++ b/python/openvino/runtime/common/monitors/src/presenter.cpp @@ -0,0 +1,330 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include <cctype> +#include <chrono> +#include <iomanip> +#include <numeric> +#include <string> +#include <vector> + +#include "monitors/presenter.h" + +namespace { +const std::map<int, MonitorType> keyToMonitorType{ + {'C', MonitorType::CpuAverage}, + {'D', MonitorType::DistributionCpu}, + {'M', MonitorType::Memory}}; + +std::set<MonitorType> strKeysToMonitorSet(const std::string& keys) { + std::set<MonitorType> enabledMonitors; + if (keys == "h") { + return enabledMonitors; + } + for (unsigned char key: keys) { + if (key == 'h') { + throw std::runtime_error("Unacceptable combination of monitor types-can't show and hide info at the same time"); + } + auto iter = keyToMonitorType.find(std::toupper(key)); + if (keyToMonitorType.end() == iter) { + throw std::runtime_error("Unknown monitor type"); + } else { + enabledMonitors.insert(iter->second); + } + } + return enabledMonitors; +} +} + +Presenter::Presenter(std::set<MonitorType> enabledMonitors, + int yPos, + cv::Size graphSize, + std::size_t historySize) : + yPos{yPos}, + graphSize{graphSize}, + graphPadding{std::max(1, static_cast<int>(graphSize.width * 0.05))}, + historySize{historySize}, + distributionCpuEnabled{false}, + strStream{std::ios_base::app} { + for (MonitorType monitor : enabledMonitors) { + addRemoveMonitor(monitor); + } +} + +Presenter::Presenter(const std::string& keys, int yPos, cv::Size graphSize, std::size_t historySize) : + Presenter{strKeysToMonitorSet(keys), yPos, graphSize, historySize} {} + +void Presenter::addRemoveMonitor(MonitorType monitor) { + unsigned updatedHistorySize = 1; + if (historySize > 1) { + int sampleStep = std::max(1, static_cast<int>(graphSize.width / (historySize - 1))); + // +1 to plot graphSize.width/sampleStep segments + // add round up to and an extra element if don't reach graph edge + updatedHistorySize = (graphSize.width + sampleStep - 1) / sampleStep + 1; + } + switch(monitor) { + case MonitorType::CpuAverage: { + if (cpuMonitor.getHistorySize() > 1 && distributionCpuEnabled) { + cpuMonitor.setHistorySize(1); + } else if (cpuMonitor.getHistorySize() > 1 && !distributionCpuEnabled) { + cpuMonitor.setHistorySize(0); + } else { // cpuMonitor.getHistorySize() <= 1 + cpuMonitor.setHistorySize(updatedHistorySize); + } + break; + } + case MonitorType::DistributionCpu: { + if (distributionCpuEnabled) { + distributionCpuEnabled = false; + if (1 == cpuMonitor.getHistorySize()) { // cpuMonitor was used only for DistributionCpu => disable it + cpuMonitor.setHistorySize(0); + } + } else { + distributionCpuEnabled = true; + cpuMonitor.setHistorySize(std::max(std::size_t{1}, cpuMonitor.getHistorySize())); + } + break; + } + case MonitorType::Memory: { + if (memoryMonitor.getHistorySize() > 1) { + memoryMonitor.setHistorySize(0); + } else { + memoryMonitor.setHistorySize(updatedHistorySize); + } + break; + } + } +} + +void Presenter::handleKey(int key) { + key = std::toupper(key); + if ('H' == key) { + if (0 == cpuMonitor.getHistorySize() && memoryMonitor.getHistorySize() <= 1) { + addRemoveMonitor(MonitorType::CpuAverage); + addRemoveMonitor(MonitorType::DistributionCpu); + addRemoveMonitor(MonitorType::Memory); + } else { + cpuMonitor.setHistorySize(0); + distributionCpuEnabled = false; + memoryMonitor.setHistorySize(0); + } + } else { + auto iter = keyToMonitorType.find(key); + if (keyToMonitorType.end() != iter) { + addRemoveMonitor(iter->second); + } + } +} + +void Presenter::drawGraphs(cv::Mat& frame) { + const std::chrono::steady_clock::time_point curTimeStamp = std::chrono::steady_clock::now(); + if (curTimeStamp - prevTimeStamp >= std::chrono::milliseconds{1000}) { + prevTimeStamp = curTimeStamp; + if (0 != cpuMonitor.getHistorySize()) { + cpuMonitor.collectData(); + } + if (memoryMonitor.getHistorySize() > 1) { + memoryMonitor.collectData(); + } + } + + int numberOfEnabledMonitors = (cpuMonitor.getHistorySize() > 1) + distributionCpuEnabled + + (memoryMonitor.getHistorySize() > 1); + int panelWidth = graphSize.width * numberOfEnabledMonitors + + std::max(0, numberOfEnabledMonitors - 1) * graphPadding; + while (panelWidth > frame.cols) { + panelWidth = std::max(0, panelWidth - graphSize.width - graphPadding); + --numberOfEnabledMonitors; // can't draw all monitors + } + int graphPos = std::max(0, (frame.cols - 1 - panelWidth) / 2); + int textGraphSplittingLine = graphSize.height / 5; + int graphRectHeight = graphSize.height - textGraphSplittingLine; + int sampleStep = 1; + unsigned possibleHistorySize = 1; + if (historySize > 1) { + sampleStep = std::max(1, static_cast<int>(graphSize.width / (historySize - 1))); + possibleHistorySize = (graphSize.width + sampleStep - 1) / sampleStep + 1; + } + + if (cpuMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) { + std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory(); + cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows}; + if (!intersection.area()) { + return; + } + cv::Mat graph = frame(intersection); + graph = graph / 2 + cv::Scalar{127, 127, 127}; + + int lineXPos = graph.cols - 1; + std::vector<cv::Point> averageLoad(lastHistory.size()); + + for (int i = lastHistory.size() - 1; i >= 0; --i) { + double mean = std::accumulate(lastHistory[i].begin(), lastHistory[i].end(), 0.0) / lastHistory[i].size(); + averageLoad[i] = {lineXPos, graphSize.height - static_cast<int>(mean * graphRectHeight)}; + lineXPos -= sampleStep; + } + + cv::polylines(graph, averageLoad, false, {255, 0, 0}, 2); + cv::rectangle(frame, cv::Rect{ + cv::Point{graphPos, yPos + textGraphSplittingLine}, + cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine} + }, {0, 0, 0}); + strStream.str("CPU"); + if (!lastHistory.empty()) { + strStream << ": " << std::fixed << std::setprecision(1) + << std::accumulate(lastHistory.back().begin(), lastHistory.back().end(), 0.0) + / lastHistory.back().size() * 100 << '%'; + } + int baseline; + int textWidth = cv::getTextSize(strStream.str(), + cv::FONT_HERSHEY_SIMPLEX, + textGraphSplittingLine * 0.04, + 1, + &baseline).width; + cv::putText(graph, + strStream.str(), + cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1}, + cv::FONT_HERSHEY_SIMPLEX, + textGraphSplittingLine * 0.04, + {70, 0, 0}, + 1); + graphPos += graphSize.width + graphPadding; + } + + if (distributionCpuEnabled && --numberOfEnabledMonitors >= 0) { + std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory(); + cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows}; + if (!intersection.area()) { + return; + } + cv::Mat graph = frame(intersection); + graph = graph / 2 + cv::Scalar{127, 127, 127}; + + if (!lastHistory.empty()) { + int rectXPos = 0; + int step = (graph.cols + lastHistory.back().size() - 1) / lastHistory.back().size(); // round up + double sum = 0; + for (double coreLoad : lastHistory.back()) { + sum += coreLoad; + int height = static_cast<int>(graphRectHeight * coreLoad); + cv::Rect pillar{cv::Point{rectXPos, graph.rows - height}, cv::Size{step, height}}; + cv::rectangle(graph, pillar, {255, 0, 0}, cv::FILLED); + cv::rectangle(graph, pillar, {0, 0, 0}); + rectXPos += step; + } + sum /= lastHistory.back().size(); + int yLine = graph.rows - static_cast<int>(graphRectHeight * sum); + cv::line(graph, cv::Point{0, yLine}, cv::Point{graph.cols, yLine}, {0, 255, 0}, 2); + } + cv::Rect border{cv::Point{graphPos, yPos + textGraphSplittingLine}, + cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}}; + cv::rectangle(frame, border, {0, 0, 0}); + strStream.str("Core load"); + if (!lastHistory.empty()) { + strStream << ": " << std::fixed << std::setprecision(1) + << std::accumulate(lastHistory.back().begin(), lastHistory.back().end(), 0.0) + / lastHistory.back().size() * 100 << '%'; + } + int baseline; + int textWidth = cv::getTextSize(strStream.str(), + cv::FONT_HERSHEY_SIMPLEX, + textGraphSplittingLine * 0.04, + 1, + &baseline).width; + cv::putText(graph, + strStream.str(), + cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1}, + cv::FONT_HERSHEY_SIMPLEX, + textGraphSplittingLine * 0.04, + {0, 70, 0}); + graphPos += graphSize.width + graphPadding; + } + + if (memoryMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) { + std::deque<std::pair<double, double>> lastHistory = memoryMonitor.getLastHistory(); + cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows}; + if (!intersection.area()) { + return; + } + cv::Mat graph = frame(intersection); + graph = graph / 2 + cv::Scalar{127, 127, 127}; + int histxPos = graph.cols - 1; + double range = std::min(memoryMonitor.getMaxMemTotal() + memoryMonitor.getMaxSwap(), + (memoryMonitor.getMaxMem() + memoryMonitor.getMaxSwap()) * 1.2); + if (lastHistory.size() > 1) { + for (auto memUsageIt = lastHistory.rbegin(); memUsageIt != lastHistory.rend() - 1; ++memUsageIt) { + constexpr double SWAP_THRESHOLD = 10.0 / 1024; // 10 MiB + cv::Vec3b color = + (memoryMonitor.getMemTotal() * 0.95 > memUsageIt->first) || (memUsageIt->second < SWAP_THRESHOLD) ? + cv::Vec3b{0, 255, 255} : + cv::Vec3b{0, 0, 255}; + cv::Point right{histxPos, + graph.rows - static_cast<int>(graphRectHeight * (memUsageIt->first + memUsageIt->second) / range)}; + cv::Point left{histxPos - sampleStep, + graph.rows - static_cast<int>( + graphRectHeight * ((memUsageIt + 1)->first + (memUsageIt + 1)->second) / range)}; + cv::line(graph, right, left, color, 2); + histxPos -= sampleStep; + } + } + + cv::Rect border{cv::Point{graphPos, yPos + textGraphSplittingLine}, + cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}}; + cv::rectangle(frame, {border}, {0, 0, 0}); + if (lastHistory.empty()) { + strStream.str("Memory"); + } else { + strStream.str(""); + strStream << std::fixed << std::setprecision(1) << lastHistory.back().first << " + " + << lastHistory.back().second << " GiB"; + } + int baseline; + int textWidth = cv::getTextSize(strStream.str(), + cv::FONT_HERSHEY_SIMPLEX, + textGraphSplittingLine * 0.04, + 1, + &baseline).width; + cv::putText(graph, + strStream.str(), + cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1}, + cv::FONT_HERSHEY_SIMPLEX, + textGraphSplittingLine * 0.04, + {0, 35, 35}); + } +} + +std::vector<std::string> Presenter::reportMeans() const { + std::vector<std::string> collectedData; + if (cpuMonitor.getHistorySize() > 1 || distributionCpuEnabled || memoryMonitor.getHistorySize() > 1) { + collectedData.push_back("Resources usage:"); + } + if (cpuMonitor.getHistorySize() > 1) { + std::ostringstream collectedDataStream; + collectedDataStream << std::fixed << std::setprecision(1); + collectedDataStream << "\tMean core utilization: "; + for (double mean : cpuMonitor.getMeanCpuLoad()) { + collectedDataStream << mean * 100 << "% "; + } + collectedData.push_back(collectedDataStream.str()); + } + if (distributionCpuEnabled) { + std::ostringstream collectedDataStream; + collectedDataStream << std::fixed << std::setprecision(1); + std::vector<double> meanCpuLoad = cpuMonitor.getMeanCpuLoad(); + double mean = std::accumulate(meanCpuLoad.begin(), meanCpuLoad.end(), 0.0) / meanCpuLoad.size(); + collectedDataStream << "\tMean CPU utilization: " << mean * 100 << "%"; + collectedData.push_back(collectedDataStream.str()); + } + if (memoryMonitor.getHistorySize() > 1) { + std::ostringstream collectedDataStream; + collectedDataStream << std::fixed << std::setprecision(1); + collectedDataStream << "\tMemory mean usage: " << memoryMonitor.getMeanMem() << " GiB"; + collectedData.push_back(collectedDataStream.str()); + collectedDataStream.str(""); + collectedDataStream << "\tMean swap usage: " << memoryMonitor.getMeanSwap() << " GiB"; + collectedData.push_back(collectedDataStream.str()); + } + + return collectedData; +} diff --git a/python/openvino/runtime/common/monitors/src/query_wrapper.cpp b/python/openvino/runtime/common/monitors/src/query_wrapper.cpp new file mode 100644 index 0000000..5c238d1 --- /dev/null +++ b/python/openvino/runtime/common/monitors/src/query_wrapper.cpp @@ -0,0 +1,22 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "monitors/query_wrapper.h" + +#include <Windows.h> +#include <system_error> + +QueryWrapper::QueryWrapper() { + PDH_STATUS status = PdhOpenQuery(NULL, NULL, &query); + if (ERROR_SUCCESS != status) { + throw std::system_error(status, std::system_category(), "PdhOpenQuery() failed"); + } +} +QueryWrapper::~QueryWrapper() { + PdhCloseQuery(query); +} + +QueryWrapper::operator PDH_HQUERY() const { + return query; +} diff --git a/python/openvino/runtime/common/pipelines/CMakeLists.txt b/python/openvino/runtime/common/pipelines/CMakeLists.txt new file mode 100644 index 0000000..b8b128a --- /dev/null +++ b/python/openvino/runtime/common/pipelines/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (C) 2018-2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +file(GLOB SOURCES ./src/*.cpp) +file(GLOB HEADERS ./include/pipelines/*.h) + +# Create named folders for the sources within the .vcproj +# Empty name lists them directly under the .vcproj +source_group("src" FILES ${SOURCES}) +source_group("include" FILES ${HEADERS}) + +add_library(pipelines STATIC ${SOURCES} ${HEADERS}) +target_include_directories(pipelines PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +target_link_libraries(pipelines PRIVATE openvino::runtime models utils opencv_core opencv_imgproc) diff --git a/python/openvino/runtime/common/pipelines/include/pipelines/async_pipeline.h b/python/openvino/runtime/common/pipelines/include/pipelines/async_pipeline.h new file mode 100644 index 0000000..6661c00 --- /dev/null +++ b/python/openvino/runtime/common/pipelines/include/pipelines/async_pipeline.h @@ -0,0 +1,121 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <stdint.h> + +#include <condition_variable> +#include <exception> +#include <memory> +#include <mutex> +#include <unordered_map> + +#include <openvino/openvino.hpp> + +#include <models/results.h> +#include <utils/performance_metrics.hpp> + +#include "pipelines/requests_pool.h" + +class ModelBase; +struct InputData; +struct MetaData; +struct ModelConfig; + +/// This is base class for asynchronous pipeline +/// Derived classes should add functions for data submission and output processing +class AsyncPipeline { +public: + /// Loads model and performs required initialization + /// @param modelInstance pointer to model object. Object it points to should not be destroyed manually after passing + /// pointer to this function. + /// @param config - fine tuning configuration for model + /// @param core - reference to ov::Core instance to use. + /// If it is omitted, new instance of ov::Core will be created inside. + AsyncPipeline(std::unique_ptr<ModelBase>&& modelInstance, const ModelConfig& config, ov::Core& core); + virtual ~AsyncPipeline(); + + /// Waits until either output data becomes available or pipeline allows to submit more input data. + /// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is + /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if + /// any result is ready. + void waitForData(bool shouldKeepOrder = true); + + /// @returns true if there's available infer requests in the pool + /// and next frame can be submitted for processing, false otherwise. + bool isReadyToProcess() { + return requestsPool->isIdleRequestAvailable(); + } + + /// Waits for all currently submitted requests to be completed. + /// + void waitForTotalCompletion() { + if (requestsPool) + requestsPool->waitForTotalCompletion(); + } + + /// Submits data to the model for inference + /// @param inputData - input data to be submitted + /// @param metaData - shared pointer to metadata container. + /// Might be null. This pointer will be passed through pipeline and put to the final result structure. + /// @returns -1 if image cannot be scheduled for processing (there's no free InferRequest available). + /// Otherwise returns unique sequential frame ID for this particular request. Same frame ID will be written in the + /// result structure. + virtual int64_t submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData); + + /// Gets available data from the queue + /// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is + /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if + /// any result is ready. + virtual std::unique_ptr<ResultBase> getResult(bool shouldKeepOrder = true); + + PerformanceMetrics getInferenceMetircs() { + return inferenceMetrics; + } + PerformanceMetrics getPreprocessMetrics() { + return preprocessMetrics; + } + PerformanceMetrics getPostprocessMetrics() { + return postprocessMetrics; + } + +protected: + /// Returns processed result, if available + /// @param shouldKeepOrder if true, function will return processed data sequentially, + /// keeping original frames order (as they were submitted). Otherwise, function will return processed data in random + /// order. + /// @returns InferenceResult with processed information or empty InferenceResult (with negative frameID) if there's + /// no any results yet. + virtual InferenceResult getInferenceResult(bool shouldKeepOrder); + + std::unique_ptr<RequestsPool> requestsPool; + std::unordered_map<int64_t, InferenceResult> completedInferenceResults; + + ov::CompiledModel compiledModel; + + std::mutex mtx; + std::condition_variable condVar; + + int64_t inputFrameId = 0; + int64_t outputFrameId = 0; + + std::exception_ptr callbackException = nullptr; + + std::unique_ptr<ModelBase> model; + PerformanceMetrics inferenceMetrics; + PerformanceMetrics preprocessMetrics; + PerformanceMetrics postprocessMetrics; +}; diff --git a/python/openvino/runtime/common/pipelines/include/pipelines/metadata.h b/python/openvino/runtime/common/pipelines/include/pipelines/metadata.h new file mode 100644 index 0000000..aca18ee --- /dev/null +++ b/python/openvino/runtime/common/pipelines/include/pipelines/metadata.h @@ -0,0 +1,51 @@ +/* +// Copyright (C) 2018-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once +#include <utils/ocv_common.hpp> + +struct MetaData { + virtual ~MetaData() {} + + template <class T> + T& asRef() { + return dynamic_cast<T&>(*this); + } + + template <class T> + const T& asRef() const { + return dynamic_cast<const T&>(*this); + } +}; + +struct ImageMetaData : public MetaData { + cv::Mat img; + std::chrono::steady_clock::time_point timeStamp; + + ImageMetaData() {} + + ImageMetaData(cv::Mat img, std::chrono::steady_clock::time_point timeStamp) : img(img), timeStamp(timeStamp) {} +}; + +struct ClassificationImageMetaData : public ImageMetaData { + unsigned int groundTruthId; + + ClassificationImageMetaData(cv::Mat img, + std::chrono::steady_clock::time_point timeStamp, + unsigned int groundTruthId) + : ImageMetaData(img, timeStamp), + groundTruthId(groundTruthId) {} +}; diff --git a/python/openvino/runtime/common/pipelines/include/pipelines/requests_pool.h b/python/openvino/runtime/common/pipelines/include/pipelines/requests_pool.h new file mode 100644 index 0000000..d9b220e --- /dev/null +++ b/python/openvino/runtime/common/pipelines/include/pipelines/requests_pool.h @@ -0,0 +1,67 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#pragma once + +#include <stddef.h> + +#include <mutex> +#include <utility> +#include <vector> + +#include <openvino/openvino.hpp> + +/// This is class storing requests pool for asynchronous pipeline +/// +class RequestsPool { +public: + RequestsPool(ov::CompiledModel& compiledModel, unsigned int size); + ~RequestsPool(); + + /// Returns idle request from the pool. Returned request is automatically marked as In Use (this status will be + /// reset after request processing completion) This function is thread safe as long as request is used only until + /// setRequestIdle call + /// @returns pointer to request with idle state or nullptr if all requests are in use. + ov::InferRequest getIdleRequest(); + + /// Sets particular request to Idle state + /// This function is thread safe as long as request provided is not used after call to this function + /// @param request - request to be returned to idle state + void setRequestIdle(const ov::InferRequest& request); + + /// Returns number of requests in use. This function is thread safe. + /// @returns number of requests in use + size_t getInUseRequestsCount(); + + /// Returns number of requests in use. This function is thread safe. + /// @returns number of requests in use + bool isIdleRequestAvailable(); + + /// Waits for completion of every non-idle requests in pool. + /// getIdleRequest should not be called together with this function or after it to avoid race condition or invalid + /// state + /// @returns number of requests in use + void waitForTotalCompletion(); + + /// Returns list of all infer requests in the pool. + /// @returns list of all infer requests in the pool. + std::vector<ov::InferRequest> getInferRequestsList(); + +private: + std::vector<std::pair<ov::InferRequest, bool>> requests; + size_t numRequestsInUse; + std::mutex mtx; +}; diff --git a/python/openvino/runtime/common/pipelines/src/async_pipeline.cpp b/python/openvino/runtime/common/pipelines/src/async_pipeline.cpp new file mode 100644 index 0000000..3259280 --- /dev/null +++ b/python/openvino/runtime/common/pipelines/src/async_pipeline.cpp @@ -0,0 +1,166 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "pipelines/async_pipeline.h" + +#include <chrono> +#include <cstdint> +#include <map> +#include <memory> +#include <stdexcept> +#include <string> +#include <utility> +#include <vector> + +#include <openvino/openvino.hpp> + +#include <models/model_base.h> +#include <models/results.h> +#include <utils/config_factory.h> +#include <utils/performance_metrics.hpp> +#include <utils/slog.hpp> + +struct InputData; +struct MetaData; + +AsyncPipeline::AsyncPipeline(std::unique_ptr<ModelBase>&& modelInstance, const ModelConfig& config, ov::Core& core) + : model(std::move(modelInstance)) { + compiledModel = model->compileModel(config, core); + // --------------------------- Create infer requests ------------------------------------------------ + unsigned int nireq = config.maxAsyncRequests; + if (nireq == 0) { + try { + nireq = compiledModel.get_property(ov::optimal_number_of_infer_requests); + } catch (const ov::Exception& ex) { + throw std::runtime_error( + std::string("Every device used with the demo should support compiled model's property " + "'OPTIMAL_NUMBER_OF_INFER_REQUESTS'. Failed to query the property with error: ") + + ex.what()); + } + } + slog::info << "\tNumber of inference requests: " << nireq << slog::endl; + requestsPool.reset(new RequestsPool(compiledModel, nireq)); + // --------------------------- Call onLoadCompleted to complete initialization of model ------------- + model->onLoadCompleted(requestsPool->getInferRequestsList()); +} + +AsyncPipeline::~AsyncPipeline() { + waitForTotalCompletion(); +} + +void AsyncPipeline::waitForData(bool shouldKeepOrder) { + std::unique_lock<std::mutex> lock(mtx); + + condVar.wait(lock, [&]() { + return callbackException != nullptr || requestsPool->isIdleRequestAvailable() || + (shouldKeepOrder ? completedInferenceResults.find(outputFrameId) != completedInferenceResults.end() + : !completedInferenceResults.empty()); + }); + + if (callbackException) { + std::rethrow_exception(callbackException); + } +} + +int64_t AsyncPipeline::submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData) { + auto frameID = inputFrameId; + + auto request = requestsPool->getIdleRequest(); + if (!request) { + return -1; + } + + auto startTime = std::chrono::steady_clock::now(); + auto internalModelData = model->preprocess(inputData, request); + preprocessMetrics.update(startTime); + + request.set_callback( + [this, request, frameID, internalModelData, metaData, startTime](std::exception_ptr ex) mutable { + { + const std::lock_guard<std::mutex> lock(mtx); + inferenceMetrics.update(startTime); + try { + if (ex) { + std::rethrow_exception(ex); + } + InferenceResult result; + + result.frameId = frameID; + result.metaData = std::move(metaData); + result.internalModelData = std::move(internalModelData); + + for (const auto& outName : model->getOutputsNames()) { + auto tensor = request.get_tensor(outName); + result.outputsData.emplace(outName, tensor); + } + + completedInferenceResults.emplace(frameID, result); + requestsPool->setRequestIdle(request); + } catch (...) { + if (!callbackException) { + callbackException = std::current_exception(); + } + } + } + condVar.notify_one(); + }); + + inputFrameId++; + if (inputFrameId < 0) + inputFrameId = 0; + + request.start_async(); + + return frameID; +} + +std::unique_ptr<ResultBase> AsyncPipeline::getResult(bool shouldKeepOrder) { + auto infResult = AsyncPipeline::getInferenceResult(shouldKeepOrder); + if (infResult.IsEmpty()) { + return std::unique_ptr<ResultBase>(); + } + auto startTime = std::chrono::steady_clock::now(); + auto result = model->postprocess(infResult); + postprocessMetrics.update(startTime); + + *result = static_cast<ResultBase&>(infResult); + return result; +} + +InferenceResult AsyncPipeline::getInferenceResult(bool shouldKeepOrder) { + InferenceResult retVal; + { + const std::lock_guard<std::mutex> lock(mtx); + + const auto& it = + shouldKeepOrder ? completedInferenceResults.find(outputFrameId) : completedInferenceResults.begin(); + + if (it != completedInferenceResults.end()) { + retVal = std::move(it->second); + completedInferenceResults.erase(it); + } + } + + if (!retVal.IsEmpty()) { + outputFrameId = retVal.frameId; + outputFrameId++; + if (outputFrameId < 0) { + outputFrameId = 0; + } + } + + return retVal; +} diff --git a/python/openvino/runtime/common/pipelines/src/requests_pool.cpp b/python/openvino/runtime/common/pipelines/src/requests_pool.cpp new file mode 100644 index 0000000..93230c9 --- /dev/null +++ b/python/openvino/runtime/common/pipelines/src/requests_pool.cpp @@ -0,0 +1,94 @@ +/* +// Copyright (C) 2020-2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +*/ + +#include "pipelines/requests_pool.h" + +#include <algorithm> +#include <exception> +#include <vector> + +#include <openvino/openvino.hpp> + +RequestsPool::RequestsPool(ov::CompiledModel& compiledModel, unsigned int size) : numRequestsInUse(0) { + for (unsigned int infReqId = 0; infReqId < size; ++infReqId) { + requests.emplace_back(compiledModel.create_infer_request(), false); + } +} + +RequestsPool::~RequestsPool() { + // Setting empty callback to free resources allocated for previously assigned lambdas + for (auto& pair : requests) { + pair.first.set_callback([](std::exception_ptr) {}); + } +} + +ov::InferRequest RequestsPool::getIdleRequest() { + std::lock_guard<std::mutex> lock(mtx); + + const auto& it = std::find_if(requests.begin(), requests.end(), [](const std::pair<ov::InferRequest, bool>& x) { + return !x.second; + }); + if (it == requests.end()) { + return ov::InferRequest(); + } else { + it->second = true; + numRequestsInUse++; + return it->first; + } +} + +void RequestsPool::setRequestIdle(const ov::InferRequest& request) { + std::lock_guard<std::mutex> lock(mtx); + const auto& it = std::find_if(this->requests.begin(), + this->requests.end(), + [&request](const std::pair<ov::InferRequest, bool>& x) { + return x.first == request; + }); + it->second = false; + numRequestsInUse--; +} + +size_t RequestsPool::getInUseRequestsCount() { + std::lock_guard<std::mutex> lock(mtx); + return numRequestsInUse; +} + +bool RequestsPool::isIdleRequestAvailable() { + std::lock_guard<std::mutex> lock(mtx); + return numRequestsInUse < requests.size(); +} + +void RequestsPool::waitForTotalCompletion() { + // Do not synchronize here to avoid deadlock (despite synchronization in other functions) + // Request status will be changed to idle in callback, + // upon completion of request we're waiting for. Synchronization is applied there + for (auto pair : requests) { + if (pair.second) { + pair.first.wait(); + } + } +} + +std::vector<ov::InferRequest> RequestsPool::getInferRequestsList() { + std::lock_guard<std::mutex> lock(mtx); + std::vector<ov::InferRequest> retVal; + retVal.reserve(requests.size()); + for (auto& pair : requests) { + retVal.push_back(pair.first); + } + + return retVal; +} diff --git a/python/openvino/runtime/common/utils/CMakeLists.txt b/python/openvino/runtime/common/utils/CMakeLists.txt new file mode 100644 index 0000000..e1e7293 --- /dev/null +++ b/python/openvino/runtime/common/utils/CMakeLists.txt @@ -0,0 +1,61 @@ +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +set(TARGET_NAME "ie_samples_utils") + +file(GLOB_RECURSE SOURCES "*.cpp" "*.hpp" "*.h") +source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCES}) + +add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${SOURCES}) +set_target_properties(${TARGET_NAME} PROPERTIES FOLDER "src") + +target_include_directories(${TARGET_NAME} + PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") + +find_package(OpenVINO REQUIRED COMPONENTS Runtime) + +if(TARGET gflags) + set(GFLAGS_TARGET gflags) +else() + if(EXISTS /etc/debian_version) + set(gflags_component nothreads_static) + else() + find_package(gflags QUIET OPTIONAL_COMPONENTS nothreads_static) + if(NOT gflags_FOUND) + set(gflags_component shared) + else() + set(gflags_component nothreads_static) + endif() + endif() + find_package(gflags QUIET OPTIONAL_COMPONENTS ${gflags_component}) + if(gflags_FOUND) + if(TARGET ${GFLAGS_TARGET}) + # nothing + elseif(TARGET gflags_nothreads-static) + # Debian 9: gflag_component is ignored + set(GFLAGS_TARGET gflags_nothreads-static) + elseif(TARGET gflags-shared) + # gflags shared case for CentOS / RHEL / Fedora + set(GFLAGS_TARGET gflags-shared) + else() + message(FATAL_ERROR "Internal error: failed to find imported target 'gflags' using '${gflags_component}' component") + endif() + + message(STATUS "gflags (${gflags_VERSION}) is found at ${gflags_DIR} using '${gflags_component}' component") + endif() + + if(NOT gflags_FOUND) + if(EXISTS "$ENV{INTEL_OPENVINO_DIR}/samples/cpp/thirdparty/gflags") + add_subdirectory("$ENV{INTEL_OPENVINO_DIR}/samples/cpp/thirdparty/gflags" "${CMAKE_CURRENT_BINARY_DIR}/gflag") + set(GFLAGS_TARGET gflags_nothreads_static) + else() + message(FATAL_ERROR "Failed to find 'gflags' library using '${gflags_component}' component") + endif() + endif() +endif() + +target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime ${GFLAGS_TARGET}) + +if(COMMAND add_clang_format_target) + add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) +endif() diff --git a/python/openvino/runtime/common/utils/include/samples/args_helper.hpp b/python/openvino/runtime/common/utils/include/samples/args_helper.hpp new file mode 100644 index 0000000..6626140 --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/args_helper.hpp @@ -0,0 +1,112 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with common samples functionality + * @file args_helper.hpp + */ + +#pragma once + +// clang-format off +#include <string> +#include <vector> + +#include "openvino/openvino.hpp" + +#include "samples/slog.hpp" +// clang-format on + +/** + * @brief This function checks input args and existence of specified files in a given folder + * @param arg path to a file to be checked for existence + * @return files updated vector of verified input files + */ +void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg); + +/** + * @brief This function find -i/--images key in input args + * It's necessary to process multiple values for single key + * @return files updated vector of verified input files + */ +void parseInputFilesArguments(std::vector<std::string>& files); +std::map<std::string, std::string> parseArgMap(std::string argMap); + +void printInputAndOutputsInfo(const ov::Model& network); + +void configurePrePostProcessing(std::shared_ptr<ov::Model>& function, + const std::string& ip, + const std::string& op, + const std::string& iop, + const std::string& il, + const std::string& ol, + const std::string& iol, + const std::string& iml, + const std::string& oml, + const std::string& ioml); + +void printInputAndOutputsInfo(const ov::Model& network); +ov::element::Type getPrecision2(const std::string& value); + +template <class T> +void printInputAndOutputsInfoShort(const T& network) { + slog::info << "Network inputs:" << slog::endl; + for (auto&& input : network.inputs()) { + std::string in_name; + std::string node_name; + + // Workaround for "tensor has no name" issue + try { + for (const auto& name : input.get_names()) { + in_name += name + " , "; + } + in_name = in_name.substr(0, in_name.size() - 3); + } catch (const ov::Exception&) { + } + + try { + node_name = input.get_node()->get_friendly_name(); + } catch (const ov::Exception&) { + } + + if (in_name == "") { + in_name = "***NO_NAME***"; + } + if (node_name == "") { + node_name = "***NO_NAME***"; + } + + slog::info << " " << in_name << " (node: " << node_name << ") : " << input.get_element_type() << " / " + << ov::layout::get_layout(input).to_string() << " / " << input.get_partial_shape() << slog::endl; + } + + slog::info << "Network outputs:" << slog::endl; + for (auto&& output : network.outputs()) { + std::string out_name; + std::string node_name; + + // Workaround for "tensor has no name" issue + try { + for (const auto& name : output.get_names()) { + out_name += name + " , "; + } + out_name = out_name.substr(0, out_name.size() - 3); + } catch (const ov::Exception&) { + } + try { + node_name = output.get_node()->get_input_node_ptr(0)->get_friendly_name(); + } catch (const ov::Exception&) { + } + + if (out_name == "") { + out_name = "***NO_NAME***"; + } + if (node_name == "") { + node_name = "***NO_NAME***"; + } + + slog::info << " " << out_name << " (node: " << node_name << ") : " << output.get_element_type() << " / " + << ov::layout::get_layout(output).to_string() << " / " << output.get_partial_shape() << slog::endl; + } +} diff --git a/python/openvino/runtime/common/utils/include/samples/classification_results.h b/python/openvino/runtime/common/utils/include/samples/classification_results.h new file mode 100644 index 0000000..e1bc20f --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/classification_results.h @@ -0,0 +1,205 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with output classification results + * @file classification_results.h + */ +#pragma once + +#include <algorithm> +#include <iomanip> +#include <iostream> +#include <string> +#include <utility> +#include <vector> + +#include "openvino/openvino.hpp" + +/** + * @class ClassificationResult + * @brief A ClassificationResult creates an output table with results + */ +class ClassificationResult { +private: + const std::string _classidStr = "classid"; + const std::string _probabilityStr = "probability"; + const std::string _labelStr = "label"; + size_t _nTop; + ov::Tensor _outTensor; + const std::vector<std::string> _labels; + const std::vector<std::string> _imageNames; + const size_t _batchSize; + std::vector<unsigned> _results; + + void printHeader() { + std::cout << _classidStr << " " << _probabilityStr; + if (!_labels.empty()) + std::cout << " " << _labelStr; + std::string classidColumn(_classidStr.length(), '-'); + std::string probabilityColumn(_probabilityStr.length(), '-'); + std::string labelColumn(_labelStr.length(), '-'); + std::cout << std::endl << classidColumn << " " << probabilityColumn; + if (!_labels.empty()) + std::cout << " " << labelColumn; + std::cout << std::endl; + } + + /** + * @brief Gets the top n results from a tensor + * + * @param n Top n count + * @param input 1D tensor that contains probabilities + * @param output Vector of indexes for the top n places + */ + template <class T> + void topResults(unsigned int n, const ov::Tensor& input, std::vector<unsigned>& output) { + ov::Shape shape = input.get_shape(); + size_t input_rank = shape.size(); + OPENVINO_ASSERT(input_rank != 0 && shape[0] != 0, "Input tensor has incorrect dimensions!"); + size_t batchSize = shape[0]; + std::vector<unsigned> indexes(input.get_size() / batchSize); + + n = static_cast<unsigned>(std::min<size_t>((size_t)n, input.get_size())); + output.resize(n * batchSize); + + for (size_t i = 0; i < batchSize; i++) { + const size_t offset = i * (input.get_size() / batchSize); + const T* batchData = input.data<const T>(); + batchData += offset; + + std::iota(std::begin(indexes), std::end(indexes), 0); + std::partial_sort(std::begin(indexes), + std::begin(indexes) + n, + std::end(indexes), + [&batchData](unsigned l, unsigned r) { + return batchData[l] > batchData[r]; + }); + for (unsigned j = 0; j < n; j++) { + output.at(i * n + j) = indexes.at(j); + } + } + } + + /** + * @brief Gets the top n results from a blob + * + * @param n Top n count + * @param input 1D blob that contains probabilities + * @param output Vector of indexes for the top n places + */ + void topResults(unsigned int n, const ov::Tensor& input, std::vector<unsigned>& output) { +#define TENSOR_TOP_RESULT(elem_type) \ + case ov::element::Type_t::elem_type: { \ + using tensor_type = ov::fundamental_type_for<ov::element::Type_t::elem_type>; \ + topResults<tensor_type>(n, input, output); \ + break; \ + } + + switch (input.get_element_type()) { + TENSOR_TOP_RESULT(f32); + TENSOR_TOP_RESULT(f64); + TENSOR_TOP_RESULT(f16); + TENSOR_TOP_RESULT(i16); + TENSOR_TOP_RESULT(u8); + TENSOR_TOP_RESULT(i8); + TENSOR_TOP_RESULT(u16); + TENSOR_TOP_RESULT(i32); + TENSOR_TOP_RESULT(u32); + TENSOR_TOP_RESULT(i64); + TENSOR_TOP_RESULT(u64); + default: + OPENVINO_ASSERT(false, "cannot locate tensor with element type: ", input.get_element_type()); + } + +#undef TENSOR_TOP_RESULT + } + +public: + explicit ClassificationResult(const ov::Tensor& output_tensor, + const std::vector<std::string>& image_names = {}, + size_t batch_size = 1, + size_t num_of_top = 10, + const std::vector<std::string>& labels = {}) + : _nTop(num_of_top), + _outTensor(output_tensor), + _labels(labels), + _imageNames(image_names), + _batchSize(batch_size), + _results() { + OPENVINO_ASSERT(_imageNames.size() == _batchSize, "Batch size should be equal to the number of images."); + + topResults(_nTop, _outTensor, _results); + } + + /** + * @brief prints formatted classification results + */ + void show() { + /** Print the result iterating over each batch **/ + std::ios::fmtflags fmt(std::cout.flags()); + std::cout << std::endl << "Top " << _nTop << " results:" << std::endl << std::endl; + for (size_t image_id = 0; image_id < _batchSize; ++image_id) { + std::string out(_imageNames[image_id].begin(), _imageNames[image_id].end()); + std::cout << "Image " << out; + std::cout.flush(); + std::cout.clear(); + std::cout << std::endl << std::endl; + printHeader(); + + for (size_t id = image_id * _nTop, cnt = 0; id < (image_id + 1) * _nTop; ++cnt, ++id) { + std::cout.precision(7); + // Getting probability for resulting class + const auto index = _results.at(id) + image_id * (_outTensor.get_size() / _batchSize); + const auto result = _outTensor.data<const float>()[index]; + + std::cout << std::setw(static_cast<int>(_classidStr.length())) << std::left << _results.at(id) << " "; + std::cout << std::left << std::setw(static_cast<int>(_probabilityStr.length())) << std::fixed << result; + + if (!_labels.empty()) { + std::cout << " " + _labels[_results.at(id)]; + } + std::cout << std::endl; + } + std::cout << std::endl; + } + std::cout.flags(fmt); + } + + void print() { + /** Print the result iterating over each batch **/ + std::ios::fmtflags fmt(std::cout.flags()); + std::cout << std::endl << "Top " << _nTop << " results:" << std::endl << std::endl; + for (size_t image_id = 0; image_id < _batchSize; ++image_id) { + std::string out(_imageNames[image_id].begin(), _imageNames[image_id].end()); + std::cout << "Image " << out; + std::cout.flush(); + std::cout.clear(); + std::cout << std::endl << std::endl; + printHeader(); + + for (size_t id = image_id * _nTop, cnt = 0; id < (image_id + 1) * _nTop; ++cnt, ++id) { + std::cout.precision(7); + // Getting probability for resulting class + const auto result = _outTensor.data<float>(); + std::cout << std::setw(static_cast<int>(_classidStr.length())) << std::left << _results.at(id) << " "; + std::cout << std::left << std::setw(static_cast<int>(_probabilityStr.length())) << std::fixed << result; + + if (!_labels.empty()) { + std::cout << " " + _labels[_results.at(id)]; + } + std::cout << std::endl; + } + std::cout << std::endl; + } + std::cout.flags(fmt); + } + + /** + * @brief returns the classification results in a vector + */ + std::vector<unsigned> getResults() { + return _results; + } +}; diff --git a/python/openvino/runtime/common/utils/include/samples/common.hpp b/python/openvino/runtime/common/utils/include/samples/common.hpp new file mode 100644 index 0000000..448fd96 --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/common.hpp @@ -0,0 +1,1429 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with common samples functionality + * @file common.hpp + */ + +#pragma once + +#include <algorithm> +#include <cctype> +#include <fstream> +#include <functional> +#include <iomanip> +#include <iostream> +#include <limits> +#include <list> +#include <map> +#include <random> +#include <string> +#include <utility> +#include <vector> + +using std::setprecision; + +// clang-format off +#include <inference_engine.hpp> +#include "openvino/openvino.hpp" +#include "slog.hpp" +// clang-format on + +// @brief performance counters sort +static constexpr char pcSort[] = "sort"; +static constexpr char pcNoSort[] = "no_sort"; +static constexpr char pcSimpleSort[] = "simple_sort"; + +#ifndef UNUSED +# if defined(_MSC_VER) && !defined(__clang__) +# define UNUSED +# else +# define UNUSED __attribute__((unused)) +# endif +#endif + +/** + * @brief Unicode string wrappers + */ +#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) +# define tchar wchar_t +# define tstring std::wstring +# define tmain wmain +# define TSTRING2STRING(tstr) wstring2string(tstr) +#else +# define tchar char +# define tstring std::string +# define tmain main +# define TSTRING2STRING(tstr) tstr +#endif + +#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) + +/** + * @brief Convert wstring to string + * @param ref on wstring + * @return string + */ +inline std::string wstring2string(const std::wstring& wstr) { + std::string str; + for (auto&& wc : wstr) + str += static_cast<char>(wc); + return str; +} +#endif + +/** + * @brief trim from start (in place) + * @param s - string to trim + */ +inline void ltrim(std::string& s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { + return !std::isspace(c); + })); +} + +/** + * @brief trim from end (in place) + * @param s - string to trim + */ +inline void rtrim(std::string& s) { + s.erase(std::find_if(s.rbegin(), + s.rend(), + [](int c) { + return !std::isspace(c); + }) + .base(), + s.end()); +} + +/** + * @brief trim from both ends (in place) + * @param s - string to trim + */ +inline std::string& trim(std::string& s) { + ltrim(s); + rtrim(s); + return s; +} +/** + * @brief Gets filename without extension + * @param filepath - full file name + * @return filename without extension + */ +inline std::string fileNameNoExt(const std::string& filepath) { + auto pos = filepath.rfind('.'); + if (pos == std::string::npos) + return filepath; + return filepath.substr(0, pos); +} + +/** + * @brief Get extension from filename + * @param filename - name of the file which extension should be extracted + * @return string with extracted file extension + */ +inline std::string fileExt(const std::string& filename) { + auto pos = filename.rfind('.'); + if (pos == std::string::npos) + return ""; + return filename.substr(pos + 1); +} + +inline slog::LogStream& operator<<(slog::LogStream& os, const ov::Version& version) { + os << "Build ................................. "; + os << version.buildNumber << slog::endl; + + return os; +} + +inline slog::LogStream& operator<<(slog::LogStream& os, const std::map<std::string, ov::Version>& versions) { + for (auto&& version : versions) { + os << version.first << slog::endl; + os << version.second << slog::endl; + } + + return os; +} + +/** + * @class Color + * @brief A Color class stores channels of a given color + */ +class Color { +private: + unsigned char _r; + unsigned char _g; + unsigned char _b; + +public: + /** + * A default constructor. + * @param r - value for red channel + * @param g - value for green channel + * @param b - value for blue channel + */ + Color(unsigned char r, unsigned char g, unsigned char b) : _r(r), _g(g), _b(b) {} + + inline unsigned char red() { + return _r; + } + + inline unsigned char blue() { + return _b; + } + + inline unsigned char green() { + return _g; + } +}; + +// TODO : keep only one version of writeOutputBMP + +/** + * @brief Writes output data to image + * @param name - image name + * @param data - output data + * @param classesNum - the number of classes + * @return false if error else true + */ +static UNUSED void writeOutputBmp(std::vector<std::vector<size_t>> data, size_t classesNum, std::ostream& outFile) { + unsigned int seed = (unsigned int)time(NULL); + // Known colors for training classes from Cityscape dataset + static std::vector<Color> colors = { + {128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, {153, 153, 153}, + {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152}, {180, 130, 70}, {60, 20, 220}, + {0, 0, 255}, {142, 0, 0}, {70, 0, 0}, {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, + {32, 11, 119}, {0, 74, 111}, {81, 0, 81}}; + + while (classesNum > colors.size()) { + static std::mt19937 rng(seed); + std::uniform_int_distribution<int> dist(0, 255); + Color color(dist(rng), dist(rng), dist(rng)); + colors.push_back(color); + } + + unsigned char file[14] = { + 'B', + 'M', // magic + 0, + 0, + 0, + 0, // size in bytes + 0, + 0, // app data + 0, + 0, // app data + 40 + 14, + 0, + 0, + 0 // start of data offset + }; + unsigned char info[40] = { + 40, 0, 0, 0, // info hd size + 0, 0, 0, 0, // width + 0, 0, 0, 0, // height + 1, 0, // number color planes + 24, 0, // bits per pixel + 0, 0, 0, 0, // compression is none + 0, 0, 0, 0, // image bits size + 0x13, 0x0B, 0, 0, // horz resolution in pixel / m + 0x13, 0x0B, 0, 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi) + 0, 0, 0, 0, // #colors in palette + 0, 0, 0, 0, // #important colors + }; + + auto height = data.size(); + auto width = data.at(0).size(); + + OPENVINO_ASSERT( + height < (size_t)std::numeric_limits<int32_t>::max && width < (size_t)std::numeric_limits<int32_t>::max, + "File size is too big: ", + height, + " X ", + width); + + int padSize = static_cast<int>(4 - (width * 3) % 4) % 4; + int sizeData = static_cast<int>(width * height * 3 + height * padSize); + int sizeAll = sizeData + sizeof(file) + sizeof(info); + + file[2] = (unsigned char)(sizeAll); + file[3] = (unsigned char)(sizeAll >> 8); + file[4] = (unsigned char)(sizeAll >> 16); + file[5] = (unsigned char)(sizeAll >> 24); + + info[4] = (unsigned char)(width); + info[5] = (unsigned char)(width >> 8); + info[6] = (unsigned char)(width >> 16); + info[7] = (unsigned char)(width >> 24); + + int32_t negativeHeight = -(int32_t)height; + info[8] = (unsigned char)(negativeHeight); + info[9] = (unsigned char)(negativeHeight >> 8); + info[10] = (unsigned char)(negativeHeight >> 16); + info[11] = (unsigned char)(negativeHeight >> 24); + + info[20] = (unsigned char)(sizeData); + info[21] = (unsigned char)(sizeData >> 8); + info[22] = (unsigned char)(sizeData >> 16); + info[23] = (unsigned char)(sizeData >> 24); + + outFile.write(reinterpret_cast<char*>(file), sizeof(file)); + outFile.write(reinterpret_cast<char*>(info), sizeof(info)); + + unsigned char pad[3] = {0, 0, 0}; + + for (size_t y = 0; y < height; y++) { + for (size_t x = 0; x < width; x++) { + unsigned char pixel[3]; + size_t index = data.at(y).at(x); + pixel[0] = colors.at(index).red(); + pixel[1] = colors.at(index).green(); + pixel[2] = colors.at(index).blue(); + outFile.write(reinterpret_cast<char*>(pixel), 3); + } + outFile.write(reinterpret_cast<char*>(pad), padSize); + } +} + +/** + * @brief Writes output data to BMP image + * @param name - image name + * @param data - output data + * @param height - height of the target image + * @param width - width of the target image + * @return false if error else true + */ +static UNUSED bool writeOutputBmp(std::string name, unsigned char* data, size_t height, size_t width) { + std::ofstream outFile; + outFile.open(name, std::ofstream::binary); + if (!outFile.is_open()) { + return false; + } + + unsigned char file[14] = { + 'B', + 'M', // magic + 0, + 0, + 0, + 0, // size in bytes + 0, + 0, // app data + 0, + 0, // app data + 40 + 14, + 0, + 0, + 0 // start of data offset + }; + unsigned char info[40] = { + 40, 0, 0, 0, // info hd size + 0, 0, 0, 0, // width + 0, 0, 0, 0, // height + 1, 0, // number color planes + 24, 0, // bits per pixel + 0, 0, 0, 0, // compression is none + 0, 0, 0, 0, // image bits size + 0x13, 0x0B, 0, 0, // horz resolution in pixel / m + 0x13, 0x0B, 0, 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi) + 0, 0, 0, 0, // #colors in palette + 0, 0, 0, 0, // #important colors + }; + + OPENVINO_ASSERT( + height < (size_t)std::numeric_limits<int32_t>::max && width < (size_t)std::numeric_limits<int32_t>::max, + "File size is too big: ", + height, + " X ", + width); + + int padSize = static_cast<int>(4 - (width * 3) % 4) % 4; + int sizeData = static_cast<int>(width * height * 3 + height * padSize); + int sizeAll = sizeData + sizeof(file) + sizeof(info); + + file[2] = (unsigned char)(sizeAll); + file[3] = (unsigned char)(sizeAll >> 8); + file[4] = (unsigned char)(sizeAll >> 16); + file[5] = (unsigned char)(sizeAll >> 24); + + info[4] = (unsigned char)(width); + info[5] = (unsigned char)(width >> 8); + info[6] = (unsigned char)(width >> 16); + info[7] = (unsigned char)(width >> 24); + + int32_t negativeHeight = -(int32_t)height; + info[8] = (unsigned char)(negativeHeight); + info[9] = (unsigned char)(negativeHeight >> 8); + info[10] = (unsigned char)(negativeHeight >> 16); + info[11] = (unsigned char)(negativeHeight >> 24); + + info[20] = (unsigned char)(sizeData); + info[21] = (unsigned char)(sizeData >> 8); + info[22] = (unsigned char)(sizeData >> 16); + info[23] = (unsigned char)(sizeData >> 24); + + outFile.write(reinterpret_cast<char*>(file), sizeof(file)); + outFile.write(reinterpret_cast<char*>(info), sizeof(info)); + + unsigned char pad[3] = {0, 0, 0}; + + for (size_t y = 0; y < height; y++) { + for (size_t x = 0; x < width; x++) { + unsigned char pixel[3]; + pixel[0] = data[y * width * 3 + x * 3]; + pixel[1] = data[y * width * 3 + x * 3 + 1]; + pixel[2] = data[y * width * 3 + x * 3 + 2]; + + outFile.write(reinterpret_cast<char*>(pixel), 3); + } + outFile.write(reinterpret_cast<char*>(pad), padSize); + } + return true; +} + +/** + * @brief Adds colored rectangles to the image + * @param data - data where rectangles are put + * @param height - height of the rectangle + * @param width - width of the rectangle + * @param rectangles - vector points for the rectangle, should be 4x compared to num classes + * @param classes - vector of classes + * @param thickness - thickness of a line (in pixels) to be used for bounding boxes + */ +static UNUSED void addRectangles(unsigned char* data, + size_t height, + size_t width, + std::vector<int> rectangles, + std::vector<int> classes, + int thickness = 1) { + std::vector<Color> colors = {// colors to be used for bounding boxes + {128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, + {153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152}, + {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {70, 0, 0}, + {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111}, + {81, 0, 81}}; + + if (rectangles.size() % 4 != 0 || rectangles.size() / 4 != classes.size()) { + return; + } + + for (size_t i = 0; i < classes.size(); i++) { + int x = rectangles.at(i * 4); + int y = rectangles.at(i * 4 + 1); + int w = rectangles.at(i * 4 + 2); + int h = rectangles.at(i * 4 + 3); + + int cls = classes.at(i) % colors.size(); // color of a bounding box line + + if (x < 0) + x = 0; + if (y < 0) + y = 0; + if (w < 0) + w = 0; + if (h < 0) + h = 0; + + if (static_cast<std::size_t>(x) >= width) { + x = static_cast<int>(width - 1); + w = 0; + thickness = 1; + } + if (static_cast<std::size_t>(y) >= height) { + y = static_cast<int>(height - 1); + h = 0; + thickness = 1; + } + + if (static_cast<std::size_t>(x + w) >= width) { + w = static_cast<int>(width - x - 1); + } + if (static_cast<std::size_t>(y + h) >= height) { + h = static_cast<int>(height - y - 1); + } + + thickness = std::min(std::min(thickness, w / 2 + 1), h / 2 + 1); + + size_t shift_first; + size_t shift_second; + for (int t = 0; t < thickness; t++) { + shift_first = (y + t) * width * 3; + shift_second = (y + h - t) * width * 3; + for (int ii = x; ii < x + w + 1; ii++) { + data[shift_first + ii * 3] = colors.at(cls).red(); + data[shift_first + ii * 3 + 1] = colors.at(cls).green(); + data[shift_first + ii * 3 + 2] = colors.at(cls).blue(); + data[shift_second + ii * 3] = colors.at(cls).red(); + data[shift_second + ii * 3 + 1] = colors.at(cls).green(); + data[shift_second + ii * 3 + 2] = colors.at(cls).blue(); + } + } + + for (int t = 0; t < thickness; t++) { + shift_first = (x + t) * 3; + shift_second = (x + w - t) * 3; + for (int ii = y; ii < y + h + 1; ii++) { + data[shift_first + ii * width * 3] = colors.at(cls).red(); + data[shift_first + ii * width * 3 + 1] = colors.at(cls).green(); + data[shift_first + ii * width * 3 + 2] = colors.at(cls).blue(); + data[shift_second + ii * width * 3] = colors.at(cls).red(); + data[shift_second + ii * width * 3 + 1] = colors.at(cls).green(); + data[shift_second + ii * width * 3 + 2] = colors.at(cls).blue(); + } + } + } +} + +// DLA PATCH BEGIN - Re-implement functions needed for dla_benchmark that was removed from OPENVINO 2022.3.0 +inline std::size_t getTensorWidth(const InferenceEngine::TensorDesc& desc) { + const auto& layout = desc.getLayout(); + const auto& dims = desc.getDims(); + const auto& size = dims.size(); + if ((size >= 2) && (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW || + layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::OIHW || layout == InferenceEngine::Layout::GOIHW || + layout == InferenceEngine::Layout::OIDHW || layout == InferenceEngine::Layout::GOIDHW || layout == InferenceEngine::Layout::CHW || + layout == InferenceEngine::Layout::HW)) { + // Regardless of layout, dimensions are stored in fixed order + return dims.back(); + } else { + IE_THROW() << "Tensor does not have width dimension"; + } + return 0; +} + +inline std::size_t getTensorHeight(const InferenceEngine::TensorDesc& desc) { + const auto& layout = desc.getLayout(); + const auto& dims = desc.getDims(); + const auto& size = dims.size(); + if ((size >= 2) && (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW || + layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::OIHW || layout == InferenceEngine::Layout::GOIHW || + layout == InferenceEngine::Layout::OIDHW || layout == InferenceEngine::Layout::GOIDHW || layout == InferenceEngine::Layout::CHW || + layout == InferenceEngine::Layout::HW)) { + // Regardless of layout, dimensions are stored in fixed order + return dims.at(size - 2); + } else { + IE_THROW() << "Tensor does not have height dimension"; + } + return 0; +} + +inline std::size_t getTensorChannels(const InferenceEngine::TensorDesc& desc) { + const auto& layout = desc.getLayout(); + if (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW || + layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::C || layout == InferenceEngine::Layout::CHW || + layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::CN) { + // Regardless of layout, dimensions are stored in fixed order + const auto& dims = desc.getDims(); + switch (desc.getLayoutByDims(dims)) { + case InferenceEngine::Layout::C: + return dims.at(0); + case InferenceEngine::Layout::NC: + return dims.at(1); + case InferenceEngine::Layout::CHW: + return dims.at(0); + case InferenceEngine::Layout::NCHW: + return dims.at(1); + case InferenceEngine::Layout::NCDHW: + return dims.at(1); + case InferenceEngine::Layout::SCALAR: // [[fallthrough]] + case InferenceEngine::Layout::BLOCKED: // [[fallthrough]] + default: + IE_THROW() << "Tensor does not have channels dimension"; + } + } else { + IE_THROW() << "Tensor does not have channels dimension"; + } + return 0; +} + +inline std::size_t getTensorBatch(const InferenceEngine::TensorDesc& desc) { + const auto& layout = desc.getLayout(); + if (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW || + layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::CN) { + // Regardless of layout, dimensions are stored in fixed order + const auto& dims = desc.getDims(); + switch (desc.getLayoutByDims(dims)) { + case InferenceEngine::Layout::NC: + return dims.at(0); + case InferenceEngine::Layout::NCHW: + return dims.at(0); + case InferenceEngine::Layout::NCDHW: + return dims.at(0); + case InferenceEngine::Layout::CHW: // [[fallthrough]] + case InferenceEngine::Layout::C: // [[fallthrough]] + case InferenceEngine::Layout::SCALAR: // [[fallthrough]] + case InferenceEngine::Layout::BLOCKED: // [[fallthrough]] + default: + IE_THROW() << "Tensor does not have channels dimension"; + } + } else { + IE_THROW() << "Tensor does not have channels dimension"; + } + return 0; +} + +// DLA PATCH END + +/** + * Write output data to image + * \param name - image name + * \param data - output data + * \param classesNum - the number of classes + * \return false if error else true + */ + +static UNUSED bool writeOutputBmp(unsigned char* data, size_t height, size_t width, std::ostream& outFile) { + unsigned char file[14] = { + 'B', + 'M', // magic + 0, + 0, + 0, + 0, // size in bytes + 0, + 0, // app data + 0, + 0, // app data + 40 + 14, + 0, + 0, + 0 // start of data offset + }; + unsigned char info[40] = { + 40, 0, 0, 0, // info hd size + 0, 0, 0, 0, // width + 0, 0, 0, 0, // height + 1, 0, // number color planes + 24, 0, // bits per pixel + 0, 0, 0, 0, // compression is none + 0, 0, 0, 0, // image bits size + 0x13, 0x0B, 0, 0, // horz resolution in pixel / m + 0x13, 0x0B, 0, 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi) + 0, 0, 0, 0, // #colors in palette + 0, 0, 0, 0, // #important colors + }; + + OPENVINO_ASSERT( + height < (size_t)std::numeric_limits<int32_t>::max && width < (size_t)std::numeric_limits<int32_t>::max, + "File size is too big: ", + height, + " X ", + width); + + int padSize = static_cast<int>(4 - (width * 3) % 4) % 4; + int sizeData = static_cast<int>(width * height * 3 + height * padSize); + int sizeAll = sizeData + sizeof(file) + sizeof(info); + + file[2] = (unsigned char)(sizeAll); + file[3] = (unsigned char)(sizeAll >> 8); + file[4] = (unsigned char)(sizeAll >> 16); + file[5] = (unsigned char)(sizeAll >> 24); + + info[4] = (unsigned char)(width); + info[5] = (unsigned char)(width >> 8); + info[6] = (unsigned char)(width >> 16); + info[7] = (unsigned char)(width >> 24); + + int32_t negativeHeight = -(int32_t)height; + info[8] = (unsigned char)(negativeHeight); + info[9] = (unsigned char)(negativeHeight >> 8); + info[10] = (unsigned char)(negativeHeight >> 16); + info[11] = (unsigned char)(negativeHeight >> 24); + + info[20] = (unsigned char)(sizeData); + info[21] = (unsigned char)(sizeData >> 8); + info[22] = (unsigned char)(sizeData >> 16); + info[23] = (unsigned char)(sizeData >> 24); + + outFile.write(reinterpret_cast<char*>(file), sizeof(file)); + outFile.write(reinterpret_cast<char*>(info), sizeof(info)); + + unsigned char pad[3] = {0, 0, 0}; + + for (size_t y = 0; y < height; y++) { + for (size_t x = 0; x < width; x++) { + unsigned char pixel[3]; + pixel[0] = data[y * width * 3 + x * 3]; + pixel[1] = data[y * width * 3 + x * 3 + 1]; + pixel[2] = data[y * width * 3 + x * 3 + 2]; + outFile.write(reinterpret_cast<char*>(pixel), 3); + } + outFile.write(reinterpret_cast<char*>(pad), padSize); + } + + return true; +} + +static UNUSED void printPerformanceCounts(const std::map<std::string, ov::ProfilingInfo>& performanceMap, + std::ostream& stream, + std::string deviceName, + bool bshowHeader = true) { + std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); + // Print performance counts + if (bshowHeader) { + stream << std::endl << "performance counts:" << std::endl << std::endl; + } + std::ios::fmtflags fmt(std::cout.flags()); + + for (const auto& it : performanceMap) { + std::string toPrint(it.first); + const int maxLayerName = 30; + + if (it.first.length() >= maxLayerName) { + toPrint = it.first.substr(0, maxLayerName - 4); + toPrint += "..."; + } + + stream << std::setw(maxLayerName) << std::left << toPrint; + switch (it.second.status) { + case ov::ProfilingInfo::Status::EXECUTED: + stream << std::setw(15) << std::left << "EXECUTED"; + break; + case ov::ProfilingInfo::Status::NOT_RUN: + stream << std::setw(15) << std::left << "NOT_RUN"; + break; + case ov::ProfilingInfo::Status::OPTIMIZED_OUT: + stream << std::setw(15) << std::left << "OPTIMIZED_OUT"; + break; + } + stream << std::setw(30) << std::left << "layerType: " + std::string(it.second.node_type) + " "; + stream << std::setw(20) << std::left << "realTime: " + std::to_string(it.second.real_time.count()); + stream << std::setw(20) << std::left << "cpu: " + std::to_string(it.second.cpu_time.count()); + stream << " execType: " << it.second.exec_type << std::endl; + if (it.second.real_time.count() > 0) { + totalTime += it.second.real_time; + } + } + stream << std::setw(20) << std::left << "Total time: " + std::to_string(totalTime.count()) << " microseconds" + << std::endl; + std::cout << std::endl; + std::cout << "Full device name: " << deviceName << std::endl; + std::cout << std::endl; + std::cout.flags(fmt); +} + +/** + * @brief This class represents an object that is found by an object detection net + */ +class DetectedObject { +public: + int objectType; + float xmin, xmax, ymin, ymax, prob; + bool difficult; + + DetectedObject(int _objectType, + float _xmin, + float _ymin, + float _xmax, + float _ymax, + float _prob, + bool _difficult = false) + : objectType(_objectType), + xmin(_xmin), + xmax(_xmax), + ymin(_ymin), + ymax(_ymax), + prob(_prob), + difficult(_difficult) {} + + DetectedObject(const DetectedObject& other) = default; + + static float ioU(const DetectedObject& detectedObject1_, const DetectedObject& detectedObject2_) { + // Add small space to eliminate empty squares + float epsilon = 0; // 1e-5f; + + DetectedObject detectedObject1(detectedObject1_.objectType, + (detectedObject1_.xmin - epsilon), + (detectedObject1_.ymin - epsilon), + (detectedObject1_.xmax - epsilon), + (detectedObject1_.ymax - epsilon), + detectedObject1_.prob); + DetectedObject detectedObject2(detectedObject2_.objectType, + (detectedObject2_.xmin + epsilon), + (detectedObject2_.ymin + epsilon), + (detectedObject2_.xmax), + (detectedObject2_.ymax), + detectedObject2_.prob); + + if (detectedObject1.objectType != detectedObject2.objectType) { + // objects are different, so the result is 0 + return 0.0f; + } + + if (detectedObject1.xmax < detectedObject1.xmin) + return 0.0; + if (detectedObject1.ymax < detectedObject1.ymin) + return 0.0; + if (detectedObject2.xmax < detectedObject2.xmin) + return 0.0; + if (detectedObject2.ymax < detectedObject2.ymin) + return 0.0; + + float xmin = (std::max)(detectedObject1.xmin, detectedObject2.xmin); + float ymin = (std::max)(detectedObject1.ymin, detectedObject2.ymin); + float xmax = (std::min)(detectedObject1.xmax, detectedObject2.xmax); + float ymax = (std::min)(detectedObject1.ymax, detectedObject2.ymax); + + // Caffe adds 1 to every length if the box isn't normalized. So do we... + float addendum; + if (xmax > 1 || ymax > 1) + addendum = 1; + else + addendum = 0; + + // intersection + float intr; + if ((xmax >= xmin) && (ymax >= ymin)) { + intr = (addendum + xmax - xmin) * (addendum + ymax - ymin); + } else { + intr = 0.0f; + } + + // union + float square1 = (addendum + detectedObject1.xmax - detectedObject1.xmin) * + (addendum + detectedObject1.ymax - detectedObject1.ymin); + float square2 = (addendum + detectedObject2.xmax - detectedObject2.xmin) * + (addendum + detectedObject2.ymax - detectedObject2.ymin); + + float unn = square1 + square2 - intr; + + return static_cast<float>(intr) / unn; + } + + DetectedObject scale(float scale_x, float scale_y) const { + return DetectedObject(objectType, + xmin * scale_x, + ymin * scale_y, + xmax * scale_x, + ymax * scale_y, + prob, + difficult); + } +}; + +class ImageDescription { +public: + const std::list<DetectedObject> alist; + const bool check_probs; + + explicit ImageDescription(const std::list<DetectedObject>& _alist, bool _check_probs = false) + : alist(_alist), + check_probs(_check_probs) {} + + static float ioUMultiple(const ImageDescription& detectedObjects, const ImageDescription& desiredObjects) { + const ImageDescription *detectedObjectsSmall, *detectedObjectsBig; + bool check_probs = desiredObjects.check_probs; + + if (detectedObjects.alist.size() < desiredObjects.alist.size()) { + detectedObjectsSmall = &detectedObjects; + detectedObjectsBig = &desiredObjects; + } else { + detectedObjectsSmall = &desiredObjects; + detectedObjectsBig = &detectedObjects; + } + + std::list<DetectedObject> doS = detectedObjectsSmall->alist; + std::list<DetectedObject> doB = detectedObjectsBig->alist; + + float fullScore = 0.0f; + while (doS.size() > 0) { + float score = 0.0f; + std::list<DetectedObject>::iterator bestJ = doB.end(); + for (auto j = doB.begin(); j != doB.end(); j++) { + float curscore = DetectedObject::ioU(*doS.begin(), *j); + if (score < curscore) { + score = curscore; + bestJ = j; + } + } + + float coeff = 1.0; + if (check_probs) { + if (bestJ != doB.end()) { + float mn = std::min((*bestJ).prob, (*doS.begin()).prob); + float mx = std::max((*bestJ).prob, (*doS.begin()).prob); + + coeff = mn / mx; + } + } + + doS.pop_front(); + if (bestJ != doB.end()) + doB.erase(bestJ); + fullScore += coeff * score; + } + fullScore /= detectedObjectsBig->alist.size(); + + return fullScore; + } + + ImageDescription scale(float scale_x, float scale_y) const { + std::list<DetectedObject> slist; + for (auto& dob : alist) { + slist.push_back(dob.scale(scale_x, scale_y)); + } + return ImageDescription(slist, check_probs); + } +}; + +struct AveragePrecisionCalculator { +private: + enum MatchKind { TruePositive, FalsePositive }; + + /** + * Here we count all TP and FP matches for all the classes in all the images. + */ + std::map<int, std::vector<std::pair<double, MatchKind>>> matches; + + std::map<int, int> N; + + double threshold; + + static bool SortBBoxDescend(const DetectedObject& bbox1, const DetectedObject& bbox2) { + return bbox1.prob > bbox2.prob; + } + + static bool SortPairDescend(const std::pair<double, MatchKind>& p1, const std::pair<double, MatchKind>& p2) { + return p1.first > p2.first; + } + +public: + explicit AveragePrecisionCalculator(double _threshold) : threshold(_threshold) {} + + // gt_bboxes -> des + // bboxes -> det + + void consumeImage(const ImageDescription& detectedObjects, const ImageDescription& desiredObjects) { + // Collecting IoU values + std::vector<bool> visited(desiredObjects.alist.size(), false); + std::vector<DetectedObject> bboxes{std::begin(detectedObjects.alist), std::end(detectedObjects.alist)}; + std::sort(bboxes.begin(), bboxes.end(), SortBBoxDescend); + + for (auto&& detObj : bboxes) { + // Searching for the best match to this detection + // Searching for desired object + float overlap_max = -1; + int jmax = -1; + auto desmax = desiredObjects.alist.end(); + + int j = 0; + for (auto desObj = desiredObjects.alist.begin(); desObj != desiredObjects.alist.end(); desObj++, j++) { + double iou = DetectedObject::ioU(detObj, *desObj); + if (iou > overlap_max) { + overlap_max = static_cast<float>(iou); + jmax = j; + desmax = desObj; + } + } + + MatchKind mk; + if (overlap_max >= threshold) { + if (!desmax->difficult) { + if (!visited[jmax]) { + mk = TruePositive; + visited[jmax] = true; + } else { + mk = FalsePositive; + } + matches[detObj.objectType].push_back(std::make_pair(detObj.prob, mk)); + } + } else { + mk = FalsePositive; + matches[detObj.objectType].push_back(std::make_pair(detObj.prob, mk)); + } + } + + for (auto desObj = desiredObjects.alist.begin(); desObj != desiredObjects.alist.end(); desObj++) { + if (!desObj->difficult) { + N[desObj->objectType]++; + } + } + } + + std::map<int, double> calculateAveragePrecisionPerClass() const { + /** + * Precision-to-TP curve per class (a variation of precision-to-recall curve without + * dividing into N) + */ + std::map<int, std::map<int, double>> precisionToTP; + + std::map<int, double> res; + + for (auto m : matches) { + // Sorting + std::sort(m.second.begin(), m.second.end(), SortPairDescend); + + int clazz = m.first; + int TP = 0, FP = 0; + + std::vector<double> prec; + std::vector<double> rec; + + for (auto mm : m.second) { + // Here we are descending in a probability value + MatchKind mk = mm.second; + if (mk == TruePositive) + TP++; + else if (mk == FalsePositive) + FP++; + + double precision = static_cast<double>(TP) / (TP + FP); + double recall = 0; + if (N.find(clazz) != N.end()) { + recall = static_cast<double>(TP) / N.at(clazz); + } + + prec.push_back(precision); + rec.push_back(recall); + } + + int num = static_cast<int>(rec.size()); + + // 11point from Caffe + double ap = 0; + std::vector<float> max_precs(11, 0.); + int start_idx = num - 1; + for (int j = 10; j >= 0; --j) { + for (int i = start_idx; i >= 0; --i) { + if (rec[i] < j / 10.) { + start_idx = i; + if (j > 0) { + max_precs[j - 1] = max_precs[j]; + } + break; + } else { + if (max_precs[j] < prec[i]) { + max_precs[j] = static_cast<float>(prec[i]); + } + } + } + } + for (int j = 10; j >= 0; --j) { + ap += max_precs[j] / 11; + } + res[clazz] = ap; + } + + return res; + } +}; + +/** + * @brief Adds colored rectangles to the image + * @param data - data where rectangles are put + * @param height - height of the rectangle + * @param width - width of the rectangle + * @param detectedObjects - vector of detected objects + */ +static UNUSED void addRectangles(unsigned char* data, + size_t height, + size_t width, + std::vector<DetectedObject> detectedObjects) { + std::vector<Color> colors = {{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, + {153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152}, + {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {70, 0, 0}, + {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111}, + {81, 0, 81}}; + + for (size_t i = 0; i < detectedObjects.size(); i++) { + int cls = detectedObjects[i].objectType % colors.size(); + + int xmin = static_cast<int>(detectedObjects[i].xmin * width); + int xmax = static_cast<int>(detectedObjects[i].xmax * width); + int ymin = static_cast<int>(detectedObjects[i].ymin * height); + int ymax = static_cast<int>(detectedObjects[i].ymax * height); + + size_t shift_first = ymin * width * 3; + size_t shift_second = ymax * width * 3; + for (int x = xmin; x < xmax; x++) { + data[shift_first + x * 3] = colors.at(cls).red(); + data[shift_first + x * 3 + 1] = colors.at(cls).green(); + data[shift_first + x * 3 + 2] = colors.at(cls).blue(); + data[shift_second + x * 3] = colors.at(cls).red(); + data[shift_second + x * 3 + 1] = colors.at(cls).green(); + data[shift_second + x * 3 + 2] = colors.at(cls).blue(); + } + + shift_first = xmin * 3; + shift_second = xmax * 3; + for (int y = ymin; y < ymax; y++) { + data[shift_first + y * width * 3] = colors.at(cls).red(); + data[shift_first + y * width * 3 + 1] = colors.at(cls).green(); + data[shift_first + y * width * 3 + 2] = colors.at(cls).blue(); + data[shift_second + y * width * 3] = colors.at(cls).red(); + data[shift_second + y * width * 3 + 1] = colors.at(cls).green(); + data[shift_second + y * width * 3 + 2] = colors.at(cls).blue(); + } + } +} + +inline void showAvailableDevices() { + ov::Core core; + std::vector<std::string> devices = core.get_available_devices(); + + std::cout << std::endl; + std::cout << "Available target devices:"; + for (const auto& device : devices) { + std::cout << " " << device; + } + std::cout << std::endl; +} + +/** + * @brief Parse text config file. The file must have the following format (with space a delimeter): + * CONFIG_NAME1 CONFIG_VALUE1 + * CONFIG_NAME2 CONFIG_VALUE2 + * + * @param configName - filename for a file with config options + * @param comment - lines starting with symbol `comment` are skipped + */ +std::map<std::string, std::string> parseConfig(const std::string& configName, char comment = '#'); + +inline std::string getFullDeviceName(ov::Core& core, std::string device) { + try { + return core.get_property(device, ov::device::full_name); + } catch (ov::Exception&) { + return {}; + } +} + +static UNUSED void printPerformanceCounts(std::vector<ov::ProfilingInfo> performanceData, + std::ostream& stream, + std::string deviceName, + bool bshowHeader = true) { + std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); + // Print performance counts + if (bshowHeader) { + stream << std::endl << "performance counts:" << std::endl << std::endl; + } + std::ios::fmtflags fmt(std::cout.flags()); + for (const auto& it : performanceData) { + std::string toPrint(it.node_name); + const int maxLayerName = 30; + + if (it.node_name.length() >= maxLayerName) { + toPrint = it.node_name.substr(0, maxLayerName - 5); + toPrint += "..."; + } + + stream << std::setw(maxLayerName) << std::left << toPrint << " "; + switch (it.status) { + case ov::ProfilingInfo::Status::EXECUTED: + stream << std::setw(15) << std::left << "EXECUTED "; + break; + case ov::ProfilingInfo::Status::NOT_RUN: + stream << std::setw(15) << std::left << "NOT_RUN "; + break; + case ov::ProfilingInfo::Status::OPTIMIZED_OUT: + stream << std::setw(15) << std::left << "OPTIMIZED_OUT "; + break; + } + stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " "; + stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " "; + stream << std::setw(25) << std::left << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " "; + stream << std::setw(25) << std::left << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " "; + stream << std::endl; + if (it.real_time.count() > 0) { + totalTime += it.real_time; + } + } + stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0) + << " milliseconds" << std::endl; + std::cout << std::endl; + std::cout << "Full device name: " << deviceName << std::endl; + std::cout << std::endl; + std::cout.flags(fmt); +} + +static UNUSED void printPerformanceCounts(ov::InferRequest request, + std::ostream& stream, + std::string deviceName, + bool bshowHeader = true) { + auto performanceMap = request.get_profiling_info(); + printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader); +} + +static inline std::string double_to_string(const double number) { + std::stringstream ss; + ss << std::fixed << std::setprecision(2) << number; + return ss.str(); +} + +template <typename T> +using uniformDistribution = typename std::conditional< + std::is_floating_point<T>::value, + std::uniform_real_distribution<T>, + typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type; + +template <typename T, typename T2> +static inline void fill_random(ov::Tensor& tensor, + T rand_min = std::numeric_limits<uint8_t>::min(), + T rand_max = std::numeric_limits<uint8_t>::max()) { + std::mt19937 gen(0); + size_t tensor_size = tensor.get_size(); + if (0 == tensor_size) { + throw std::runtime_error( + "Models with dynamic shapes aren't supported. Input tensors must have specific shapes before inference"); + } + T* data = tensor.data<T>(); + uniformDistribution<T2> distribution(rand_min, rand_max); + for (size_t i = 0; i < tensor_size; i++) { + data[i] = static_cast<T>(distribution(gen)); + } +} + +static inline void fill_tensor_random(ov::Tensor tensor) { + switch (tensor.get_element_type()) { + case ov::element::f32: + fill_random<float, float>(tensor); + break; + case ov::element::f64: + fill_random<double, double>(tensor); + break; + case ov::element::f16: + fill_random<short, short>(tensor); + break; + case ov::element::i32: + fill_random<int32_t, int32_t>(tensor); + break; + case ov::element::i64: + fill_random<int64_t, int64_t>(tensor); + break; + case ov::element::u8: + // uniform_int_distribution<uint8_t> is not allowed in the C++17 + // standard and vs2017/19 + fill_random<uint8_t, uint32_t>(tensor); + break; + case ov::element::i8: + // uniform_int_distribution<int8_t> is not allowed in the C++17 standard + // and vs2017/19 + fill_random<int8_t, int32_t>(tensor, std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); + break; + case ov::element::u16: + fill_random<uint16_t, uint16_t>(tensor); + break; + case ov::element::i16: + fill_random<int16_t, int16_t>(tensor); + break; + case ov::element::boolean: + fill_random<uint8_t, uint32_t>(tensor, 0, 1); + break; + default: + throw ov::Exception("Input type is not supported for a tensor"); + } +} + +static UNUSED void printPerformanceCountsNoSort(std::vector<ov::ProfilingInfo> performanceData, + std::ostream& stream, + std::string deviceName, + bool bshowHeader = true) { + std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); + // Print performance counts + if (bshowHeader) { + stream << std::endl << "performance counts:" << std::endl << std::endl; + } + std::ios::fmtflags fmt(std::cout.flags()); + + for (const auto& it : performanceData) { + if (it.real_time.count() > 0) { + totalTime += it.real_time; + } + } + if (totalTime.count() != 0) { + for (const auto& it : performanceData) { + std::string toPrint(it.node_name); + const int maxLayerName = 30; + + if (it.node_name.length() >= maxLayerName) { + toPrint = it.node_name.substr(0, maxLayerName - 5); + toPrint += "..."; + } + + stream << std::setw(maxLayerName) << std::left << toPrint << " "; + switch (it.status) { + case ov::ProfilingInfo::Status::EXECUTED: + stream << std::setw(15) << std::left << "EXECUTED "; + break; + case ov::ProfilingInfo::Status::NOT_RUN: + stream << std::setw(15) << std::left << "NOT_RUN "; + break; + case ov::ProfilingInfo::Status::OPTIMIZED_OUT: + stream << std::setw(15) << std::left << "OPTIMIZED_OUT "; + break; + } + stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " "; + stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " "; + stream << std::setw(25) << std::left + << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " "; + stream << std::setw(25) << std::left + << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " "; + + double opt_proportion = it.real_time.count() * 100.0 / totalTime.count(); + std::stringstream opt_proportion_ss; + opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion; + std::string opt_proportion_str = opt_proportion_ss.str(); + if (opt_proportion_str == "0.00") { + opt_proportion_str = "N/A"; + } + stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%"; + + stream << std::endl; + } + } + stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0) + << " milliseconds" << std::endl; + std::cout << std::endl; + std::cout << "Full device name: " << deviceName << std::endl; + std::cout << std::endl; + std::cout.flags(fmt); +} + +static UNUSED bool sort_pc_descend(const ov::ProfilingInfo& profiling1, const ov::ProfilingInfo& profiling2) { + return profiling1.real_time > profiling2.real_time; +} + +static UNUSED void printPerformanceCountsDescendSort(std::vector<ov::ProfilingInfo> performanceData, + std::ostream& stream, + std::string deviceName, + bool bshowHeader = true) { + std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); + // Print performance counts + if (bshowHeader) { + stream << std::endl << "performance counts:" << std::endl << std::endl; + } + std::ios::fmtflags fmt(std::cout.flags()); + + for (const auto& it : performanceData) { + if (it.real_time.count() > 0) { + totalTime += it.real_time; + } + } + if (totalTime.count() != 0) { + // sort perfcounter + std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)}; + std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend); + + for (const auto& it : sortPerfCounts) { + std::string toPrint(it.node_name); + const int maxLayerName = 30; + + if (it.node_name.length() >= maxLayerName) { + toPrint = it.node_name.substr(0, maxLayerName - 5); + toPrint += "..."; + } + + stream << std::setw(maxLayerName) << std::left << toPrint << " "; + switch (it.status) { + case ov::ProfilingInfo::Status::EXECUTED: + stream << std::setw(15) << std::left << "EXECUTED "; + break; + case ov::ProfilingInfo::Status::NOT_RUN: + stream << std::setw(15) << std::left << "NOT_RUN "; + break; + case ov::ProfilingInfo::Status::OPTIMIZED_OUT: + stream << std::setw(15) << std::left << "OPTIMIZED_OUT "; + break; + } + stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " "; + stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " "; + stream << std::setw(25) << std::left + << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " "; + stream << std::setw(25) << std::left + << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " "; + + double opt_proportion = it.real_time.count() * 100.0 / totalTime.count(); + std::stringstream opt_proportion_ss; + opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion; + std::string opt_proportion_str = opt_proportion_ss.str(); + if (opt_proportion_str == "0.00") { + opt_proportion_str = "N/A"; + } + stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%"; + + stream << std::endl; + } + } + stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0) + << " milliseconds" << std::endl; + std::cout << std::endl; + std::cout << "Full device name: " << deviceName << std::endl; + std::cout << std::endl; + std::cout.flags(fmt); +} + +static UNUSED void printPerformanceCountsSimpleSort(std::vector<ov::ProfilingInfo> performanceData, + std::ostream& stream, + std::string deviceName, + bool bshowHeader = true) { + std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); + // Print performance counts + if (bshowHeader) { + stream << std::endl << "performance counts:" << std::endl << std::endl; + } + std::ios::fmtflags fmt(std::cout.flags()); + + for (const auto& it : performanceData) { + if (it.real_time.count() > 0) { + totalTime += it.real_time; + } + } + if (totalTime.count() != 0) { + // sort perfcounter + std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)}; + std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend); + + for (const auto& it : sortPerfCounts) { + if (it.status == ov::ProfilingInfo::Status::EXECUTED) { + std::string toPrint(it.node_name); + const int maxLayerName = 30; + + if (it.node_name.length() >= maxLayerName) { + toPrint = it.node_name.substr(0, maxLayerName - 5); + toPrint += "..."; + } + + stream << std::setw(maxLayerName) << std::left << toPrint << " "; + stream << std::setw(15) << std::left << "EXECUTED "; + stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " "; + stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " "; + stream << std::setw(25) << std::left + << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " "; + stream << std::setw(25) << std::left + << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " "; + + double opt_proportion = it.real_time.count() * 100.0 / totalTime.count(); + std::stringstream opt_proportion_ss; + opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion; + std::string opt_proportion_str = opt_proportion_ss.str(); + if (opt_proportion_str == "0.00") { + opt_proportion_str = "N/A"; + } + stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%"; + + stream << std::endl; + } + } + } + stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0) + << " milliseconds" << std::endl; + std::cout << std::endl; + std::cout << "Full device name: " << deviceName << std::endl; + std::cout << std::endl; + std::cout.flags(fmt); +} + +static UNUSED void printPerformanceCountsSort(std::vector<ov::ProfilingInfo> performanceData, + std::ostream& stream, + std::string deviceName, + std::string sorttype, + bool bshowHeader = true) { + if (sorttype == pcNoSort) { + printPerformanceCountsNoSort(performanceData, stream, deviceName, bshowHeader); + } else if (sorttype == pcSort) { + printPerformanceCountsDescendSort(performanceData, stream, deviceName, bshowHeader); + } else if (sorttype == pcSimpleSort) { + printPerformanceCountsSimpleSort(performanceData, stream, deviceName, bshowHeader); + } +} diff --git a/python/openvino/runtime/common/utils/include/samples/console_progress.hpp b/python/openvino/runtime/common/utils/include/samples/console_progress.hpp new file mode 100644 index 0000000..f62aeed --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/console_progress.hpp @@ -0,0 +1,107 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <cstdio> +#include <iomanip> +#include <sstream> + +/** + * @class ConsoleProgress + * @brief A ConsoleProgress class provides functionality for printing progress dynamics + */ +class ConsoleProgress { + static const size_t DEFAULT_DETALIZATION = 20; + static const size_t DEFAULT_PERCENT_TO_UPDATE_PROGRESS = 1; + + size_t total; + size_t cur_progress = 0; + size_t prev_progress = 0; + bool stream_output; + size_t detalization; + size_t percent_to_update; + +public: + /** + * @brief A constructor of ConsoleProgress class + * @param _total - maximum value that is correspondent to 100% + * @param _detalization - number of symbols(.) to use to represent progress + */ + explicit ConsoleProgress(size_t _total, + bool _stream_output = false, + size_t _percent_to_update = DEFAULT_PERCENT_TO_UPDATE_PROGRESS, + size_t _detalization = DEFAULT_DETALIZATION) + : total(_total), + detalization(_detalization), + percent_to_update(_percent_to_update) { + stream_output = _stream_output; + if (total == 0) { + total = 1; + } + } + + /** + * @brief Shows progress with current data. Progress is shown from the beginning of the current + * line. + */ + void showProgress() const { + std::stringstream strm; + if (!stream_output) { + strm << '\r'; + } + strm << "Progress: ["; + size_t i = 0; + for (; i < detalization * cur_progress / total; i++) { + strm << "."; + } + for (; i < detalization; i++) { + strm << " "; + } + strm << "] " << std::setw(3) << 100 * cur_progress / total << "% done"; + if (stream_output) { + strm << std::endl; + } + std::fputs(strm.str().c_str(), stdout); + std::fflush(stdout); + } + + /** + * @brief Updates current value and progressbar + */ + void updateProgress() { + if (cur_progress > total) + cur_progress = total; + size_t prev_percent = 100 * prev_progress / total; + size_t cur_percent = 100 * cur_progress / total; + + if (prev_progress == 0 || cur_progress == total || prev_percent + percent_to_update <= cur_percent) { + showProgress(); + prev_progress = cur_progress; + } + } + + /** + * @brief Adds value to currently represented and redraw progressbar + * @param add - value to add + */ + void addProgress(int add) { + if (add < 0 && -add > static_cast<int>(cur_progress)) { + add = -static_cast<int>(cur_progress); + } + cur_progress += add; + updateProgress(); + } + + /** + * @brief Output end line. + * @return + */ + void finish() { + std::stringstream strm; + strm << std::endl; + std::fputs(strm.str().c_str(), stdout); + std::fflush(stdout); + } +}; diff --git a/python/openvino/runtime/common/utils/include/samples/csv_dumper.hpp b/python/openvino/runtime/common/utils/include/samples/csv_dumper.hpp new file mode 100644 index 0000000..5c80134 --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/csv_dumper.hpp @@ -0,0 +1,98 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <ctime> +#include <fstream> +#include <iostream> +#include <sstream> +#include <string> + +#include "samples/slog.hpp" + +/** + * @class CsvDumper + * @brief A CsvDumper class provides functionality for dumping the values in CSV files + */ +class CsvDumper { + std::ofstream file; + std::string filename; + bool canDump = true; + char delimiter = ';'; + + std::string generateFilename() { + std::stringstream filename; + filename << "dumpfile-"; + filename << time(nullptr); + filename << ".csv"; + return filename.str(); + } + +public: + /** + * @brief A constructor. Disables dumping in case dump file cannot be created + * @param enabled - True if dumping is enabled by default. + * @param name - name of file to dump to. File won't be created if first parameter is false. + */ + explicit CsvDumper(bool enabled = true, const std::string& name = "") : canDump(enabled) { + if (!canDump) { + return; + } + filename = (name == "" ? generateFilename() : name); + file.open(filename, std::ios::out); + if (!file) { + slog::warn << "Cannot create dump file! Disabling dump." << slog::endl; + canDump = false; + } + } + + /** + * @brief Sets a delimiter to use in csv file + * @param c - Delimiter char + * @return + */ + void setDelimiter(char c) { + delimiter = c; + } + + /** + * @brief Overloads operator to organize streaming values to file. Does nothing if dumping is + * disabled Adds delimiter at the end of value provided + * @param add - value to add to dump + * @return reference to same object + */ + template <class T> + CsvDumper& operator<<(const T& add) { + if (canDump) { + file << add << delimiter; + } + return *this; + } + + /** + * @brief Finishes line in dump file. Does nothing if dumping is disabled + */ + void endLine() { + if (canDump) { + file << "\n"; + } + } + + /** + * @brief Gets information if dump is enabled. + * @return true if dump is enabled and file was successfully created + */ + bool dumpEnabled() { + return canDump; + } + + /** + * @brief Gets name of a dump file + * @return name of a dump file + */ + std::string getFilename() const { + return filename; + } +}; diff --git a/python/openvino/runtime/common/utils/include/samples/latency_metrics.hpp b/python/openvino/runtime/common/utils/include/samples/latency_metrics.hpp new file mode 100644 index 0000000..bca39d0 --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/latency_metrics.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <map> +#include <string> +#include <utility> +#include <vector> + +// clang-format off +#include "samples/common.hpp" +#include "samples/slog.hpp" +// clang-format on + +/// @brief Responsible for calculating different latency metrics +class LatencyMetrics { +public: + LatencyMetrics() {} + + LatencyMetrics(const std::vector<double>& latencies, + const std::string& data_shape = "", + size_t percentile_boundary = 50) + : data_shape(data_shape), + percentile_boundary(percentile_boundary) { + fill_data(latencies, percentile_boundary); + } + + void write_to_stream(std::ostream& stream) const; + void write_to_slog() const; + + double median_or_percentile = 0; + double avg = 0; + double min = 0; + double max = 0; + std::string data_shape; + +private: + void fill_data(std::vector<double> latencies, size_t percentile_boundary); + size_t percentile_boundary = 50; +}; diff --git a/python/openvino/runtime/common/utils/include/samples/ocv_common.hpp b/python/openvino/runtime/common/utils/include/samples/ocv_common.hpp new file mode 100644 index 0000000..94f3b1f --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/ocv_common.hpp @@ -0,0 +1,92 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with common samples functionality using OpenCV + * @file ocv_common.hpp + */ + +#pragma once + +#include <opencv2/opencv.hpp> + +#include "openvino/openvino.hpp" +#include "samples/common.hpp" + +/** + * @brief Sets image data stored in cv::Mat object to a given Blob object. + * @param orig_image - given cv::Mat object with an image data. + * @param blob - Blob object which to be filled by an image data. + * @param batchIndex - batch index of an image inside of the blob. + */ +template <typename T> +void matU8ToBlob(const cv::Mat& orig_image, InferenceEngine::Blob::Ptr& blob, int batchIndex = 0) { + InferenceEngine::SizeVector blobSize = blob->getTensorDesc().getDims(); + const size_t width = blobSize[3]; + const size_t height = blobSize[2]; + const size_t channels = blobSize[1]; + InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob); + OPENVINO_ASSERT(mblob, + "We expect blob to be inherited from MemoryBlob in matU8ToBlob, " + "but by fact we were not able to cast inputBlob to MemoryBlob"); + // locked memory holder should be alive all time while access to its buffer happens + auto mblobHolder = mblob->wmap(); + + T* blob_data = mblobHolder.as<T*>(); + + cv::Mat resized_image(orig_image); + if (static_cast<int>(width) != orig_image.size().width || static_cast<int>(height) != orig_image.size().height) { + cv::resize(orig_image, resized_image, cv::Size(width, height)); + } + + int batchOffset = batchIndex * width * height * channels; + + for (size_t c = 0; c < channels; c++) { + for (size_t h = 0; h < height; h++) { + for (size_t w = 0; w < width; w++) { + blob_data[batchOffset + c * width * height + h * width + w] = resized_image.at<cv::Vec3b>(h, w)[c]; + } + } + } +} + +/** + * @brief Wraps data stored inside of a passed cv::Mat object by new Blob pointer. + * @note: No memory allocation is happened. The blob just points to already existing + * cv::Mat data. + * @param mat - given cv::Mat object with an image data. + * @return resulting Blob pointer. + */ +static UNUSED InferenceEngine::Blob::Ptr wrapMat2Blob(const cv::Mat& mat) { + size_t channels = mat.channels(); + size_t height = mat.size().height; + size_t width = mat.size().width; + + size_t strideH = mat.step.buf[0]; + size_t strideW = mat.step.buf[1]; + + bool is_dense = strideW == channels && strideH == channels * width; + + OPENVINO_ASSERT(is_dense, "Doesn't support conversion from not dense cv::Mat"); + + InferenceEngine::TensorDesc tDesc(InferenceEngine::Precision::U8, + {1, channels, height, width}, + InferenceEngine::Layout::NHWC); + + return InferenceEngine::make_shared_blob<uint8_t>(tDesc, mat.data); +} + +static UNUSED ov::Tensor wrapMat2Tensor(const cv::Mat& mat) { + const size_t channels = mat.channels(); + const size_t height = mat.size().height; + const size_t width = mat.size().width; + + const size_t strideH = mat.step.buf[0]; + const size_t strideW = mat.step.buf[1]; + + const bool is_dense = strideW == channels && strideH == channels * width; + OPENVINO_ASSERT(is_dense, "Doesn't support conversion from not dense cv::Mat"); + + return ov::Tensor(ov::element::u8, ov::Shape{1, height, width, channels}, mat.data); +} diff --git a/python/openvino/runtime/common/utils/include/samples/os/windows/w_dirent.h b/python/openvino/runtime/common/utils/include/samples/os/windows/w_dirent.h new file mode 100644 index 0000000..40d1c5b --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/os/windows/w_dirent.h @@ -0,0 +1,176 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#if defined(_WIN32) + +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN_UNDEF +# endif + +# ifndef NOMINMAX +# define NOMINMAX +# define NOMINMAX_UNDEF +# endif + +# if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_) +# define _X86_ +# endif + +# if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_) +# define _AMD64_ +# endif + +# if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_) +# define _ARM_ +# endif + +# if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_) +# define _ARM64_ +# endif + +// clang-format off + #include <string.h> + #include <windef.h> + #include <fileapi.h> + #include <Winbase.h> + #include <sys/stat.h> +// clang-format on + +// Copied from linux libc sys/stat.h: +# define S_ISREG(m) (((m)&S_IFMT) == S_IFREG) +# define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) + +/// @brief structure to store directory names +struct dirent { + char* d_name; + + explicit dirent(const wchar_t* wsFilePath) { + size_t i; + auto slen = wcslen(wsFilePath); + d_name = static_cast<char*>(malloc(slen + 1)); + wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen); + } + ~dirent() { + free(d_name); + } +}; + +/// @brief class to store directory data (files meta) +class DIR { + WIN32_FIND_DATAA FindFileData; + HANDLE hFind; + dirent* next; + + static inline bool endsWith(const std::string& src, const char* with) { + int wl = static_cast<int>(strlen(with)); + int so = static_cast<int>(src.length()) - wl; + if (so < 0) + return false; + return 0 == strncmp(with, &src[so], wl); + } + +public: + DIR(const DIR& other) = delete; + DIR(DIR&& other) = delete; + DIR& operator=(const DIR& other) = delete; + DIR& operator=(DIR&& other) = delete; + + explicit DIR(const char* dirPath) : next(nullptr) { + std::string ws = dirPath; + if (endsWith(ws, "\\")) + ws += "*"; + else + ws += "\\*"; + hFind = FindFirstFileA(ws.c_str(), &FindFileData); + FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE; + } + + ~DIR() { + if (!next) + delete next; + next = nullptr; + FindClose(hFind); + } + + /** + * @brief Check file handler is valid + * @return status True(success) or False(fail) + */ + bool isValid() const { + return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0); + } + + /** + * @brief Add directory to directory names struct + * @return pointer to directory names struct + */ + dirent* nextEnt() { + if (next != nullptr) + delete next; + next = nullptr; + + if (!FindFileData.dwReserved0) + return nullptr; + + wchar_t wbuf[4096]; + + size_t outSize; + mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094); + next = new dirent(wbuf); + FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData); + return next; + } +}; + +/** + * @brief Create directory data struct element + * @param string directory path + * @return pointer to directory data struct element + */ +static DIR* opendir(const char* dirPath) { + auto dp = new DIR(dirPath); + if (!dp->isValid()) { + delete dp; + return nullptr; + } + return dp; +} + +/** + * @brief Walk throw directory data struct + * @param pointer to directory data struct + * @return pointer to directory data struct next element + */ +static struct dirent* readdir(DIR* dp) { + return dp->nextEnt(); +} + +/** + * @brief Remove directory data struct + * @param pointer to struct directory data + * @return void + */ +static void closedir(DIR* dp) { + delete dp; +} + +# ifdef WIN32_LEAN_AND_MEAN_UNDEF +# undef WIN32_LEAN_AND_MEAN +# undef WIN32_LEAN_AND_MEAN_UNDEF +# endif + +# ifdef NOMINMAX_UNDEF +# undef NOMINMAX_UNDEF +# undef NOMINMAX +# endif + +#else + +# include <dirent.h> +# include <sys/types.h> + +#endif diff --git a/python/openvino/runtime/common/utils/include/samples/slog.hpp b/python/openvino/runtime/common/utils/include/samples/slog.hpp new file mode 100644 index 0000000..3f237e5 --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/slog.hpp @@ -0,0 +1,102 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with logging facility for common samples + * @file log.hpp + */ + +#pragma once + +#include <ostream> +#include <string> +#include <vector> + +namespace slog { +/** + * @class LogStreamEndLine + * @brief The LogStreamEndLine class implements an end line marker for a log stream + */ +class LogStreamEndLine {}; + +static constexpr LogStreamEndLine endl; + +/** + * @class LogStreamBoolAlpha + * @brief The LogStreamBoolAlpha class implements bool printing for a log stream + */ +class LogStreamBoolAlpha {}; + +static constexpr LogStreamBoolAlpha boolalpha; + +/** + * @class LogStreamFlush + * @brief The LogStreamFlush class implements flushing for a log stream + */ +class LogStreamFlush {}; + +static constexpr LogStreamFlush flush; + +/** + * @class LogStream + * @brief The LogStream class implements a stream for sample logging + */ +class LogStream { + std::string _prefix; + std::ostream* _log_stream; + bool _new_line; + +public: + /** + * @brief A constructor. Creates an LogStream object + * @param prefix The prefix to print + */ + LogStream(const std::string& prefix, std::ostream& log_stream); + + /** + * @brief A stream output operator to be used within the logger + * @param arg Object for serialization in the logger message + */ + template <class T> + LogStream& operator<<(const T& arg) { + if (_new_line) { + (*_log_stream) << "[ " << _prefix << " ] "; + _new_line = false; + } + + (*_log_stream) << arg; + return *this; + } + + /** + * @brief Overload output stream operator to print vectors in pretty form + * [value1, value2, ...] + */ + template <typename T> + LogStream& operator<<(const std::vector<T>& v) { + (*_log_stream) << "[ "; + + for (auto&& value : v) + (*_log_stream) << value << " "; + + (*_log_stream) << "]"; + + return *this; + } + + // Specializing for LogStreamEndLine to support slog::endl + LogStream& operator<<(const LogStreamEndLine&); + + // Specializing for LogStreamBoolAlpha to support slog::boolalpha + LogStream& operator<<(const LogStreamBoolAlpha&); + + // Specializing for LogStreamFlush to support slog::flush + LogStream& operator<<(const LogStreamFlush&); +}; + +extern LogStream info; +extern LogStream warn; +extern LogStream err; + +} // namespace slog diff --git a/python/openvino/runtime/common/utils/include/samples/vpu/vpu_tools_common.hpp b/python/openvino/runtime/common/utils/include/samples/vpu/vpu_tools_common.hpp new file mode 100644 index 0000000..ba0665f --- /dev/null +++ b/python/openvino/runtime/common/utils/include/samples/vpu/vpu_tools_common.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <fstream> +#include <map> +#include <string> + +static std::map<std::string, std::string> parseConfig(const std::string& configName, char comment = '#') { + std::map<std::string, std::string> config = {}; + + std::ifstream file(configName); + if (!file.is_open()) { + return config; + } + + std::string key, value; + while (file >> key >> value) { + if (key.empty() || key[0] == comment) { + continue; + } + config[key] = value; + } + + return config; +} diff --git a/python/openvino/runtime/common/utils/src/args_helper.cpp b/python/openvino/runtime/common/utils/src/args_helper.cpp new file mode 100644 index 0000000..ae7fa67 --- /dev/null +++ b/python/openvino/runtime/common/utils/src/args_helper.cpp @@ -0,0 +1,390 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// clang-format off +#include <sys/stat.h> +#include <fcntl.h> + +#include <iostream> + +#ifdef _WIN32 +# include "samples/os/windows/w_dirent.h" +#else +# include <dirent.h> +# include <unistd.h> +#endif + +#include "openvino/openvino.hpp" + +#include "gflags/gflags.h" +#include "samples/args_helper.hpp" +#include "samples/slog.hpp" +// clang-format on + +/** + * @brief Checks input file argument and add it to files vector + * @param files reference to vector to store file names + * @param arg file or folder name + * @return none + */ +void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg) { + struct stat sb; + +#if defined(_WIN32) + FILE* fd = fopen(arg.c_str(), "r"); + if (!fd) { + slog::warn << "File " << arg << " cannot be opened!" << slog::endl; + return; + } + + if (fstat(fileno(fd), &sb) != 0) { + fclose(fd); + slog::warn << "File " << arg << " cannot be opened!" << slog::endl; + return; + } + fclose(fd); +#else + int fd = open(arg.c_str(), O_RDONLY); + if (fd == -1) { + slog::warn << "File " << arg << " cannot be opened!" << slog::endl; + return; + } + + if (fstat(fd, &sb) != 0) { + close(fd); + slog::warn << "File " << arg << " cannot be opened!" << slog::endl; + return; + } + close(fd); +#endif + + if (S_ISDIR(sb.st_mode)) { + struct CloseDir { + void operator()(DIR* d) const noexcept { + if (d) { + closedir(d); + } + } + }; + using Dir = std::unique_ptr<DIR, CloseDir>; + Dir dp(opendir(arg.c_str())); + if (dp == nullptr) { + slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl; + return; + } + + struct dirent* ep; + while (nullptr != (ep = readdir(dp.get()))) { + std::string fileName = ep->d_name; + if (fileName == "." || fileName == "..") + continue; + files.push_back(arg + "/" + ep->d_name); + } + } else { + files.push_back(arg); + } +} + +/** + * @brief This function find -i key in input args. It's necessary to process multiple values for + * single key + * @param files reference to vector + * @return none. + */ +void parseInputFilesArguments(std::vector<std::string>& files) { + std::vector<std::string> args = gflags::GetArgvs(); + auto args_it = begin(args); + const auto is_image_arg = [](const std::string& s) { + return s == "-i" || s == "--images"; + }; + const auto is_arg = [](const std::string& s) { + return s.front() == '-'; + }; + + while (args_it != args.end()) { + const auto img_start = std::find_if(args_it, end(args), is_image_arg); + if (img_start == end(args)) { + break; + } + const auto img_begin = std::next(img_start); + const auto img_end = std::find_if(img_begin, end(args), is_arg); + for (auto img = img_begin; img != img_end; ++img) { + readInputFilesArguments(files, *img); + } + args_it = img_end; + } + + if (files.empty()) { + return; + } + size_t max_files = 20; + if (files.size() < max_files) { + slog::info << "Files were added: " << files.size() << slog::endl; + for (const auto& filePath : files) { + slog::info << " " << filePath << slog::endl; + } + } else { + slog::info << "Files were added: " << files.size() << ". Too many to display each of them." << slog::endl; + } +} + +std::vector<std::string> splitStringList(const std::string& str, char delim) { + if (str.empty()) + return {}; + + std::istringstream istr(str); + + std::vector<std::string> result; + std::string elem; + while (std::getline(istr, elem, delim)) { + if (elem.empty()) { + continue; + } + result.emplace_back(std::move(elem)); + } + + return result; +} + +std::map<std::string, std::string> parseArgMap(std::string argMap) { + argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end()); + + const auto pairs = splitStringList(argMap, ','); + + std::map<std::string, std::string> parsedMap; + for (auto&& pair : pairs) { + const auto lastDelimPos = pair.find_last_of(':'); + auto key = pair.substr(0, lastDelimPos); + auto value = pair.substr(lastDelimPos + 1); + + if (lastDelimPos == std::string::npos || key.empty() || value.empty()) { + throw std::invalid_argument("Invalid key/value pair " + pair + ". Expected <layer_name>:<value>"); + } + + parsedMap[std::move(key)] = std::move(value); + } + + return parsedMap; +} + +using supported_type_t = std::unordered_map<std::string, ov::element::Type>; +ov::element::Type getType(std::string value, const supported_type_t& supported_precisions) { + std::transform(value.begin(), value.end(), value.begin(), ::toupper); + + const auto precision = supported_precisions.find(value); + if (precision == supported_precisions.end()) { + throw std::logic_error("\"" + value + "\"" + " is not a valid precision"); + } + + return precision->second; +} +ov::element::Type getType(const std::string& value) { + static const supported_type_t supported_types = { + {"FP32", ov::element::f32}, {"f32", ov::element::f32}, {"FP16", ov::element::f16}, + {"f16", ov::element::f16}, {"BF16", ov::element::bf16}, {"bf16", ov::element::bf16}, + {"U64", ov::element::u64}, {"u64", ov::element::u64}, {"I64", ov::element::i64}, + {"i64", ov::element::i64}, {"U32", ov::element::u32}, {"u32", ov::element::u32}, + {"I32", ov::element::i32}, {"i32", ov::element::i32}, {"U16", ov::element::u16}, + {"u16", ov::element::u16}, {"I16", ov::element::i16}, {"i16", ov::element::i16}, + {"U8", ov::element::u8}, {"u8", ov::element::u8}, {"I8", ov::element::i8}, + {"i8", ov::element::i8}, {"BOOL", ov::element::boolean}, {"boolean", ov::element::boolean}, + }; + + return getType(value, supported_types); +} + +void printInputAndOutputsInfo(const ov::Model& network) { + slog::info << "model name: " << network.get_friendly_name() << slog::endl; + + const std::vector<ov::Output<const ov::Node>> inputs = network.inputs(); + for (const ov::Output<const ov::Node> &input : inputs) { + slog::info << " inputs" << slog::endl; + + const std::string name = input.get_names().empty() ? "NONE" : input.get_any_name(); + slog::info << " input name: " << name << slog::endl; + + const ov::element::Type type = input.get_element_type(); + slog::info << " input type: " << type << slog::endl; + + const ov::Shape shape = input.get_shape(); + slog::info << " input shape: " << shape << slog::endl; + } + + const std::vector<ov::Output<const ov::Node>> outputs = network.outputs(); + for (const ov::Output<const ov::Node> &output : outputs) { + slog::info << " outputs" << slog::endl; + + const std::string name = output.get_names().empty() ? "NONE" : output.get_any_name(); + slog::info << " output name: " << name << slog::endl; + + const ov::element::Type type = output.get_element_type(); + slog::info << " output type: " << type << slog::endl; + + const ov::Shape shape = output.get_shape(); + slog::info << " output shape: " << shape << slog::endl; + } +} + +void configurePrePostProcessing(std::shared_ptr<ov::Model>& model, + const std::string& ip, + const std::string& op, + const std::string& iop, + const std::string& il, + const std::string& ol, + const std::string& iol, + const std::string& iml, + const std::string& oml, + const std::string& ioml) { + auto preprocessor = ov::preprocess::PrePostProcessor(model); + const auto inputs = model->inputs(); + const auto outputs = model->outputs(); + if (!ip.empty()) { + auto type = getType(ip); + for (size_t i = 0; i < inputs.size(); i++) { + preprocessor.input(i).tensor().set_element_type(type); + } + } + + if (!op.empty()) { + auto type = getType(op); + for (size_t i = 0; i < outputs.size(); i++) { + preprocessor.output(i).tensor().set_element_type(type); + } + } + + if (!iop.empty()) { + const auto user_precisions_map = parseArgMap(iop); + for (auto&& item : user_precisions_map) { + const auto& tensor_name = item.first; + const auto type = getType(item.second); + + bool tensorFound = false; + for (size_t i = 0; i < inputs.size(); i++) { + if (inputs[i].get_names().count(tensor_name)) { + preprocessor.input(i).tensor().set_element_type(type); + tensorFound = true; + break; + } + } + if (!tensorFound) { + for (size_t i = 0; i < outputs.size(); i++) { + if (outputs[i].get_names().count(tensor_name)) { + preprocessor.output(i).tensor().set_element_type(type); + tensorFound = true; + break; + } + } + } + OPENVINO_ASSERT(!tensorFound, "Model doesn't have input/output with tensor name: ", tensor_name); + } + } + if (!il.empty()) { + for (size_t i = 0; i < inputs.size(); i++) { + preprocessor.input(i).tensor().set_layout(ov::Layout(il)); + } + } + + if (!ol.empty()) { + for (size_t i = 0; i < outputs.size(); i++) { + preprocessor.output(i).tensor().set_layout(ov::Layout(ol)); + } + } + + if (!iol.empty()) { + const auto user_precisions_map = parseArgMap(iol); + for (auto&& item : user_precisions_map) { + const auto& tensor_name = item.first; + + bool tensorFound = false; + for (size_t i = 0; i < inputs.size(); i++) { + if (inputs[i].get_names().count(tensor_name)) { + preprocessor.input(i).tensor().set_layout(ov::Layout(item.second)); + tensorFound = true; + break; + } + } + if (!tensorFound) { + for (size_t i = 0; i < outputs.size(); i++) { + if (outputs[i].get_names().count(tensor_name)) { + preprocessor.output(i).tensor().set_layout(ov::Layout(item.second)); + tensorFound = true; + break; + } + } + } + OPENVINO_ASSERT(!tensorFound, "Model doesn't have input/output with tensor name: ", tensor_name); + } + } + + if (!iml.empty()) { + for (size_t i = 0; i < inputs.size(); i++) { + preprocessor.input(i).model().set_layout(ov::Layout(iml)); + } + } + + if (!oml.empty()) { + for (size_t i = 0; i < outputs.size(); i++) { + preprocessor.output(i).model().set_layout(ov::Layout(oml)); + } + } + + if (!ioml.empty()) { + const auto user_precisions_map = parseArgMap(ioml); + for (auto&& item : user_precisions_map) { + const auto& tensor_name = item.first; + + bool tensorFound = false; + for (size_t i = 0; i < inputs.size(); i++) { + if (inputs[i].get_names().count(tensor_name)) { + preprocessor.input(i).model().set_layout(ov::Layout(item.second)); + tensorFound = true; + break; + } + } + if (!tensorFound) { + for (size_t i = 0; i < outputs.size(); i++) { + if (outputs[i].get_names().count(tensor_name)) { + preprocessor.output(i).model().set_layout(ov::Layout(item.second)); + tensorFound = true; + break; + } + } + } + OPENVINO_ASSERT(!tensorFound, "Model doesn't have input/output with tensor name: ", tensor_name); + } + } + + model = preprocessor.build(); +} + +ov::element::Type getPrecision(std::string value, + const std::unordered_map<std::string, ov::element::Type>& supported_precisions) { + std::transform(value.begin(), value.end(), value.begin(), ::toupper); + + const auto precision = supported_precisions.find(value); + if (precision == supported_precisions.end()) { + throw std::logic_error("\"" + value + "\"" + " is not a valid precision"); + } + + return precision->second; +} + +ov::element::Type getPrecision2(const std::string& value) { + static const std::unordered_map<std::string, ov::element::Type> supported_precisions = { + {"FP32", ov::element::f32}, + {"FP16", ov::element::f16}, + {"BF16", ov::element::bf16}, + {"U64", ov::element::u64}, + {"I64", ov::element::i64}, + {"U32", ov::element::u32}, + {"I32", ov::element::i32}, + {"U16", ov::element::u16}, + {"I16", ov::element::i16}, + {"U8", ov::element::u8}, + {"I8", ov::element::i8}, + {"BOOL", ov::element::boolean}, + }; + + return getPrecision(value, supported_precisions); +} diff --git a/python/openvino/runtime/common/utils/src/common.cpp b/python/openvino/runtime/common/utils/src/common.cpp new file mode 100644 index 0000000..fb238c7 --- /dev/null +++ b/python/openvino/runtime/common/utils/src/common.cpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "samples/common.hpp" + +std::map<std::string, std::string> parseConfig(const std::string& configName, char comment) { + std::map<std::string, std::string> config = {}; + + std::ifstream file(configName); + if (!file.is_open()) { + return config; + } + + std::string key, value; + while (file >> key >> value) { + if (key.empty() || key[0] == comment) { + continue; + } + config[key] = value; + } + + return config; +} diff --git a/python/openvino/runtime/common/utils/src/latency_metrics.cpp b/python/openvino/runtime/common/utils/src/latency_metrics.cpp new file mode 100644 index 0000000..c6c3d15 --- /dev/null +++ b/python/openvino/runtime/common/utils/src/latency_metrics.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// clang-format off +#include <algorithm> +#include <map> +#include <string> +#include <utility> +#include <vector> + +#include "samples/latency_metrics.hpp" +// clang-format on + +void LatencyMetrics::write_to_stream(std::ostream& stream) const { + std::ios::fmtflags fmt(stream.flags()); + stream << data_shape << ";" << std::fixed << std::setprecision(2) << median_or_percentile << ";" << avg << ";" + << min << ";" << max; + stream.flags(fmt); +} + +void LatencyMetrics::write_to_slog() const { + std::string percentileStr = (percentile_boundary == 50) + ? " Median: " + : " " + std::to_string(percentile_boundary) + " percentile: "; + + slog::info << percentileStr << double_to_string(median_or_percentile) << " ms" << slog::endl; + slog::info << " Average: " << double_to_string(avg) << " ms" << slog::endl; + slog::info << " Min: " << double_to_string(min) << " ms" << slog::endl; + slog::info << " Max: " << double_to_string(max) << " ms" << slog::endl; +} + +void LatencyMetrics::fill_data(std::vector<double> latencies, size_t percentile_boundary) { + if (latencies.empty()) { + throw std::logic_error("Latency metrics class expects non-empty vector of latencies at consturction."); + } + std::sort(latencies.begin(), latencies.end()); + min = latencies[0]; + avg = std::accumulate(latencies.begin(), latencies.end(), 0.0) / latencies.size(); + median_or_percentile = latencies[size_t(latencies.size() / 100.0 * percentile_boundary)]; + max = latencies.back(); +}; diff --git a/python/openvino/runtime/common/utils/src/slog.cpp b/python/openvino/runtime/common/utils/src/slog.cpp new file mode 100644 index 0000000..df484ec --- /dev/null +++ b/python/openvino/runtime/common/utils/src/slog.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// clang-format off +#include <iostream> + +#include "samples/slog.hpp" +// clang-format on + +namespace slog { + +LogStream info("INFO", std::cout); +LogStream warn("WARNING", std::cout); +LogStream err("ERROR", std::cerr); + +LogStream::LogStream(const std::string& prefix, std::ostream& log_stream) : _prefix(prefix), _new_line(true) { + _log_stream = &log_stream; +} + +// Specializing for LogStreamEndLine to support slog::endl +LogStream& LogStream::operator<<(const LogStreamEndLine& /*arg*/) { + if (_new_line) + (*_log_stream) << "[ " << _prefix << " ] "; + _new_line = true; + + (*_log_stream) << std::endl; + return *this; +} + +// Specializing for LogStreamBoolAlpha to support slog::boolalpha +LogStream& LogStream::operator<<(const LogStreamBoolAlpha& /*arg*/) { + (*_log_stream) << std::boolalpha; + return *this; +} + +// Specializing for LogStreamFlush to support slog::flush +LogStream& LogStream::operator<<(const LogStreamFlush& /*arg*/) { + (*_log_stream) << std::flush; + return *this; +} + +} // namespace slog |
