summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/common
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/runtime/common')
-rw-r--r--python/openvino/runtime/common/CMakeLists.txt25
-rw-r--r--python/openvino/runtime/common/README.md7
-rw-r--r--python/openvino/runtime/common/demo_utils/CMakeLists.txt14
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/args_helper.hpp43
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/common.hpp190
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/config_factory.h52
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/default_flags.hpp21
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/grid_mat.hpp127
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/image_utils.h29
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/images_capture.h53
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/input_wrappers.hpp149
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/kuhn_munkres.hpp57
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/nms.hpp81
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp289
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/performance_metrics.hpp92
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/shared_tensor_allocator.hpp47
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/slog.hpp99
-rw-r--r--python/openvino/runtime/common/demo_utils/include/utils/threads_common.hpp165
-rw-r--r--python/openvino/runtime/common/demo_utils/src/args_helper.cpp155
-rw-r--r--python/openvino/runtime/common/demo_utils/src/config_factory.cpp111
-rw-r--r--python/openvino/runtime/common/demo_utils/src/image_utils.cpp55
-rw-r--r--python/openvino/runtime/common/demo_utils/src/images_capture.cpp327
-rw-r--r--python/openvino/runtime/common/demo_utils/src/kuhn_munkres.cpp169
-rw-r--r--python/openvino/runtime/common/demo_utils/src/performance_metrics.cpp114
-rw-r--r--python/openvino/runtime/common/demo_utils/src/w_dirent.hpp114
-rw-r--r--python/openvino/runtime/common/format_reader/CMakeLists.txt55
-rw-r--r--python/openvino/runtime/common/format_reader/MnistUbyte.cpp66
-rw-r--r--python/openvino/runtime/common/format_reader/MnistUbyte.h58
-rw-r--r--python/openvino/runtime/common/format_reader/bmp.cpp64
-rw-r--r--python/openvino/runtime/common/format_reader/bmp.h75
-rw-r--r--python/openvino/runtime/common/format_reader/format_reader.cpp44
-rw-r--r--python/openvino/runtime/common/format_reader/format_reader.h95
-rw-r--r--python/openvino/runtime/common/format_reader/format_reader_ptr.h43
-rw-r--r--python/openvino/runtime/common/format_reader/opencv_wrapper.cpp83
-rw-r--r--python/openvino/runtime/common/format_reader/opencv_wrapper.h58
-rw-r--r--python/openvino/runtime/common/format_reader/register.h58
-rw-r--r--python/openvino/runtime/common/format_reader/yuv_nv12.cpp36
-rw-r--r--python/openvino/runtime/common/format_reader/yuv_nv12.h57
-rw-r--r--python/openvino/runtime/common/models/CMakeLists.txt15
-rw-r--r--python/openvino/runtime/common/models/include/models/associative_embedding_decoder.h94
-rw-r--r--python/openvino/runtime/common/models/include/models/classification_model.h57
-rw-r--r--python/openvino/runtime/common/models/include/models/deblurring_model.h52
-rw-r--r--python/openvino/runtime/common/models/include/models/detection_model.h51
-rw-r--r--python/openvino/runtime/common/models/include/models/detection_model_centernet.h59
-rw-r--r--python/openvino/runtime/common/models/include/models/detection_model_faceboxes.h55
-rw-r--r--python/openvino/runtime/common/models/include/models/detection_model_retinaface.h74
-rw-r--r--python/openvino/runtime/common/models/include/models/detection_model_retinaface_pt.h81
-rw-r--r--python/openvino/runtime/common/models/include/models/detection_model_ssd.h63
-rw-r--r--python/openvino/runtime/common/models/include/models/detection_model_yolo.h107
-rw-r--r--python/openvino/runtime/common/models/include/models/detection_model_yolov3_onnx.h50
-rw-r--r--python/openvino/runtime/common/models/include/models/detection_model_yolox.h54
-rw-r--r--python/openvino/runtime/common/models/include/models/hpe_model_associative_embedding.h89
-rw-r--r--python/openvino/runtime/common/models/include/models/hpe_model_openpose.h78
-rw-r--r--python/openvino/runtime/common/models/include/models/image_model.h49
-rw-r--r--python/openvino/runtime/common/models/include/models/input_data.h41
-rw-r--r--python/openvino/runtime/common/models/include/models/internal_model_data.h48
-rw-r--r--python/openvino/runtime/common/models/include/models/jpeg_restoration_model.h55
-rw-r--r--python/openvino/runtime/common/models/include/models/model_base.h77
-rw-r--r--python/openvino/runtime/common/models/include/models/openpose_decoder.h62
-rw-r--r--python/openvino/runtime/common/models/include/models/results.h122
-rw-r--r--python/openvino/runtime/common/models/include/models/segmentation_model.h50
-rw-r--r--python/openvino/runtime/common/models/include/models/style_transfer_model.h43
-rw-r--r--python/openvino/runtime/common/models/include/models/super_resolution_model.h49
-rw-r--r--python/openvino/runtime/common/models/src/associative_embedding_decoder.cpp201
-rw-r--r--python/openvino/runtime/common/models/src/classification_model.cpp196
-rw-r--r--python/openvino/runtime/common/models/src/deblurring_model.cpp158
-rw-r--r--python/openvino/runtime/common/models/src/detection_model.cpp52
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_centernet.cpp302
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp261
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_retinaface.cpp394
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_retinaface_pt.cpp277
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_ssd.cpp281
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_yolo.cpp481
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp188
-rw-r--r--python/openvino/runtime/common/models/src/detection_model_yolox.cpp194
-rw-r--r--python/openvino/runtime/common/models/src/hpe_model_associative_embedding.cpp264
-rw-r--r--python/openvino/runtime/common/models/src/hpe_model_openpose.cpp256
-rw-r--r--python/openvino/runtime/common/models/src/image_model.cpp57
-rw-r--r--python/openvino/runtime/common/models/src/jpeg_restoration_model.cpp167
-rw-r--r--python/openvino/runtime/common/models/src/model_base.cpp67
-rw-r--r--python/openvino/runtime/common/models/src/openpose_decoder.cpp345
-rw-r--r--python/openvino/runtime/common/models/src/segmentation_model.cpp157
-rw-r--r--python/openvino/runtime/common/models/src/style_transfer_model.cpp107
-rw-r--r--python/openvino/runtime/common/models/src/super_resolution_model.cpp207
-rw-r--r--python/openvino/runtime/common/monitors/CMakeLists.txt38
-rw-r--r--python/openvino/runtime/common/monitors/include/monitors/cpu_monitor.h28
-rw-r--r--python/openvino/runtime/common/monitors/include/monitors/memory_monitor.h34
-rw-r--r--python/openvino/runtime/common/monitors/include/monitors/presenter.h44
-rw-r--r--python/openvino/runtime/common/monitors/include/monitors/query_wrapper.h17
-rw-r--r--python/openvino/runtime/common/monitors/src/cpu_monitor.cpp206
-rw-r--r--python/openvino/runtime/common/monitors/src/memory_monitor.cpp213
-rw-r--r--python/openvino/runtime/common/monitors/src/presenter.cpp330
-rw-r--r--python/openvino/runtime/common/monitors/src/query_wrapper.cpp22
-rw-r--r--python/openvino/runtime/common/pipelines/CMakeLists.txt15
-rw-r--r--python/openvino/runtime/common/pipelines/include/pipelines/async_pipeline.h121
-rw-r--r--python/openvino/runtime/common/pipelines/include/pipelines/metadata.h51
-rw-r--r--python/openvino/runtime/common/pipelines/include/pipelines/requests_pool.h67
-rw-r--r--python/openvino/runtime/common/pipelines/src/async_pipeline.cpp166
-rw-r--r--python/openvino/runtime/common/pipelines/src/requests_pool.cpp94
-rw-r--r--python/openvino/runtime/common/utils/CMakeLists.txt61
-rw-r--r--python/openvino/runtime/common/utils/include/samples/args_helper.hpp112
-rw-r--r--python/openvino/runtime/common/utils/include/samples/classification_results.h205
-rw-r--r--python/openvino/runtime/common/utils/include/samples/common.hpp1429
-rw-r--r--python/openvino/runtime/common/utils/include/samples/console_progress.hpp107
-rw-r--r--python/openvino/runtime/common/utils/include/samples/csv_dumper.hpp98
-rw-r--r--python/openvino/runtime/common/utils/include/samples/latency_metrics.hpp42
-rw-r--r--python/openvino/runtime/common/utils/include/samples/ocv_common.hpp92
-rw-r--r--python/openvino/runtime/common/utils/include/samples/os/windows/w_dirent.h176
-rw-r--r--python/openvino/runtime/common/utils/include/samples/slog.hpp102
-rw-r--r--python/openvino/runtime/common/utils/include/samples/vpu/vpu_tools_common.hpp28
-rw-r--r--python/openvino/runtime/common/utils/src/args_helper.cpp390
-rw-r--r--python/openvino/runtime/common/utils/src/common.cpp24
-rw-r--r--python/openvino/runtime/common/utils/src/latency_metrics.cpp42
-rw-r--r--python/openvino/runtime/common/utils/src/slog.cpp43
114 files changed, 13961 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/CMakeLists.txt b/python/openvino/runtime/common/CMakeLists.txt
new file mode 100644
index 0000000..8ea3028
--- /dev/null
+++ b/python/openvino/runtime/common/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# Add dependencies for the following modules
+find_package(OpenCV COMPONENTS core REQUIRED)
+
+# pull in plugin apis and preproc
+add_subdirectory(utils)
+add_subdirectory(format_reader)
+add_subdirectory(monitors)
+
+if(DEFINED BUILD_DEMO)
+ # This dependency defines CNN prototypes used by text-detection demos.
+ include_directories("$ENV{COREDLA_ROOT}/transformations/inc/")
+ add_subdirectory(demo_utils)
+ add_subdirectory(models)
+ # This dependency is needed for runtime demos. The config_factory is used
+ # to produce hardware configurations and is required by pipelines.
+ #add_subdirectory(utils)
+
+ # Following steps compile and link the pipelines library from OpenVINO 2021.4 installation folder.
+ # This dependency is required by segmentation demo. It implements a pipeline for sending streaming input and output for inference.
+ add_subdirectory(pipelines)
+endif()
diff --git a/python/openvino/runtime/common/README.md b/python/openvino/runtime/common/README.md
new file mode 100644
index 0000000..1953fed
--- /dev/null
+++ b/python/openvino/runtime/common/README.md
@@ -0,0 +1,7 @@
+## Patch Log
+
+This README documents the changes made to `runtime/common` so that they can be preserved and reapplied in future OpenVINO uplifts or updates.
+
+| Patch Name | PR Number | Description |
+| ------------------------- | ------------------------- | ------------------------- |
+| Make dla_benchmark less chatty | #3065 | Set the maximum number of printed warnings |
diff --git a/python/openvino/runtime/common/demo_utils/CMakeLists.txt b/python/openvino/runtime/common/demo_utils/CMakeLists.txt
new file mode 100644
index 0000000..b79d72a
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+file(GLOB_RECURSE HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/*")
+file(GLOB_RECURSE SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*")
+
+source_group("src" FILES ${SOURCES})
+source_group("include" FILES ${HEADERS})
+
+add_library(utils STATIC ${HEADERS} ${SOURCES})
+target_include_directories(utils PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include"
+ "$ENV{COREDLA_ROOT}/dla_plugin/inc/")
+target_link_libraries(utils PRIVATE openvino::runtime opencv_core opencv_imgcodecs opencv_videoio ie_samples_utils)
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/args_helper.hpp b/python/openvino/runtime/common/demo_utils/include/utils/args_helper.hpp
new file mode 100644
index 0000000..7a638cc
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/args_helper.hpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality
+ * @file args_helper.hpp
+ */
+
+#pragma once
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/types.hpp>
+#include <openvino/openvino.hpp>
+
+/**
+* @brief This function checks input args and existence of specified files in a given folder
+* @param arg path to a file to be checked for existence
+* @return files updated vector of verified input files
+*/
+void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg);
+
+/**
+* @brief This function finds -i/--i key in input args
+* It's necessary to process multiple values for single key
+* @return files updated vector of verified input files
+*/
+void parseInputFilesArguments(std::vector<std::string>& files);
+
+std::vector<std::string> split(const std::string& s, char delim);
+
+std::vector<std::string> parseDevices(const std::string& device_string);
+
+std::map<std::string, int32_t> parseValuePerDevice(const std::set<std::string>& devices,
+ const std::string& values_string);
+
+cv::Size stringToSize(const std::string& str);
+
+std::map<std::string, ov::Layout> parseLayoutString(const std::string& layout_string);
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/common.hpp b/python/openvino/runtime/common/demo_utils/include/utils/common.hpp
new file mode 100644
index 0000000..dbe7cf0
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/common.hpp
@@ -0,0 +1,190 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality
+ * @file common.hpp
+ */
+
+#pragma once
+
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+#include "utils/slog.hpp"
+#include "utils/args_helper.hpp"
+
+#ifndef UNUSED
+#ifdef _WIN32
+#define UNUSED
+#else
+#define UNUSED __attribute__((unused))
+#endif
+#endif
+
+template <typename T, std::size_t N>
+constexpr std::size_t arraySize(const T(&)[N]) noexcept {
+ return N;
+}
+
+static inline void catcher() noexcept {
+ if (std::current_exception()) {
+ try {
+ std::rethrow_exception(std::current_exception());
+ } catch (const std::exception& error) {
+ slog::err << error.what() << slog::endl;
+ } catch (...) {
+ slog::err << "Non-exception object thrown" << slog::endl;
+ }
+ std::exit(1);
+ }
+ std::abort();
+}
+
+template <typename T>
+T clamp(T value, T low, T high) {
+ return value < low ? low : (value > high ? high : value);
+}
+
+inline slog::LogStream& operator<<(slog::LogStream& os, const ov::Version& version) {
+ return os << "OpenVINO" << slog::endl
+ << "\tversion: " << OPENVINO_VERSION_MAJOR << "." << OPENVINO_VERSION_MINOR << "." << OPENVINO_VERSION_PATCH << slog::endl
+ << "\tbuild: " << version.buildNumber;
+}
+
+/**
+ * @class Color
+ * @brief A Color class stores channels of a given color
+ */
+class Color {
+private:
+ unsigned char _r;
+ unsigned char _g;
+ unsigned char _b;
+
+public:
+ /**
+ * A default constructor.
+ * @param r - value for red channel
+ * @param g - value for green channel
+ * @param b - value for blue channel
+ */
+ Color(unsigned char r,
+ unsigned char g,
+ unsigned char b) : _r(r), _g(g), _b(b) {}
+
+ inline unsigned char red() const {
+ return _r;
+ }
+
+ inline unsigned char blue() const {
+ return _b;
+ }
+
+ inline unsigned char green() const {
+ return _g;
+ }
+};
+
+// Known colors for training classes from the Cityscapes dataset
+static UNUSED const Color CITYSCAPES_COLORS[] = {
+ { 128, 64, 128 },
+ { 232, 35, 244 },
+ { 70, 70, 70 },
+ { 156, 102, 102 },
+ { 153, 153, 190 },
+ { 153, 153, 153 },
+ { 30, 170, 250 },
+ { 0, 220, 220 },
+ { 35, 142, 107 },
+ { 152, 251, 152 },
+ { 180, 130, 70 },
+ { 60, 20, 220 },
+ { 0, 0, 255 },
+ { 142, 0, 0 },
+ { 70, 0, 0 },
+ { 100, 60, 0 },
+ { 90, 0, 0 },
+ { 230, 0, 0 },
+ { 32, 11, 119 },
+ { 0, 74, 111 },
+ { 81, 0, 81 }
+};
+
+inline void showAvailableDevices() {
+ ov::Core core;
+ std::vector<std::string> devices = core.get_available_devices();
+
+ std::cout << "Available devices:";
+ for (const auto& device : devices) {
+ std::cout << ' ' << device;
+ }
+ std::cout << std::endl;
+}
+
+inline std::string fileNameNoExt(const std::string& filepath) {
+ auto pos = filepath.rfind('.');
+ if (pos == std::string::npos) return filepath;
+ return filepath.substr(0, pos);
+}
+
+inline void logCompiledModelInfo(
+ const ov::CompiledModel& compiledModel,
+ const std::string& modelName,
+ const std::string& deviceName,
+ const std::string& modelType = "") {
+ slog::info << "The " << modelType << (modelType.empty() ? "" : " ") << "model " << modelName << " is loaded to " << deviceName << slog::endl;
+ std::set<std::string> devices;
+ for (const std::string& device : parseDevices(deviceName)) {
+ devices.insert(device);
+ }
+
+ if (devices.find("AUTO") == devices.end()) { // do not print info for AUTO device
+ for (const auto& device : devices) {
+ try {
+ slog::info << "\tDevice: " << device << slog::endl;
+ int32_t nstreams = compiledModel.get_property(ov::streams::num);
+ slog::info << "\t\tNumber of streams: " << nstreams << slog::endl;
+ if (device == "CPU") {
+ int32_t nthreads = compiledModel.get_property(ov::inference_num_threads);
+ slog::info << "\t\tNumber of threads: " << (nthreads == 0 ? "AUTO" : std::to_string(nthreads)) << slog::endl;
+ }
+ }
+ catch (const ov::Exception&) {}
+ }
+ }
+}
+
+inline void logBasicModelInfo(const std::shared_ptr<ov::Model>& model) {
+ slog::info << "Model name: " << model->get_friendly_name() << slog::endl;
+
+ // Dump information about model inputs/outputs
+ ov::OutputVector inputs = model->inputs();
+ ov::OutputVector outputs = model->outputs();
+
+ slog::info << "\tInputs: " << slog::endl;
+ for (const ov::Output<ov::Node>& input : inputs) {
+ const std::string name = input.get_any_name();
+ const ov::element::Type type = input.get_element_type();
+ const ov::PartialShape shape = input.get_partial_shape();
+ const ov::Layout layout = ov::layout::get_layout(input);
+
+ slog::info << "\t\t" << name << ", " << type << ", " << shape << ", " << layout.to_string() << slog::endl;
+ }
+
+ slog::info << "\tOutputs: " << slog::endl;
+ for (const ov::Output<ov::Node>& output : outputs) {
+ const std::string name = output.get_any_name();
+ const ov::element::Type type = output.get_element_type();
+ const ov::PartialShape shape = output.get_partial_shape();
+ const ov::Layout layout = ov::layout::get_layout(output);
+
+ slog::info << "\t\t" << name << ", " << type << ", " << shape << ", " << layout.to_string() << slog::endl;
+ }
+
+ return;
+}
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/config_factory.h b/python/openvino/runtime/common/demo_utils/include/utils/config_factory.h
new file mode 100644
index 0000000..c7440b5
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/config_factory.h
@@ -0,0 +1,52 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stdint.h>
+
+#include <map>
+#include <set>
+#include <string>
+
+#include <openvino/openvino.hpp>
+
+struct ModelConfig {
+ std::string deviceName;
+ std::string cpuExtensionsPath;
+ std::string clKernelsConfigPath;
+ std::string fpgaArchPath;
+ unsigned int maxAsyncRequests;
+ ov::AnyMap compiledModelConfig;
+
+ std::set<std::string> getDevices();
+ std::map<std::string, std::string> getLegacyConfig();
+
+protected:
+ std::set<std::string> devices;
+};
+
+class ConfigFactory {
+public:
+ static ModelConfig getUserConfig(const std::string& flags_d,
+ uint32_t flags_nireq,
+ const std::string& flags_nstreams,
+ uint32_t flags_nthreads,
+ const std::string &flags_arch);
+ static ModelConfig getMinLatencyConfig(const std::string& flags_d, uint32_t flags_nireq);
+
+protected:
+ static ModelConfig getCommonConfig(const std::string& flags_d, uint32_t flags_nireq);
+};
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/default_flags.hpp b/python/openvino/runtime/common/demo_utils/include/utils/default_flags.hpp
new file mode 100644
index 0000000..83c32c2
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/default_flags.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <gflags/gflags.h>
+
+#define DEFINE_INPUT_FLAGS \
+DEFINE_string(i, "", input_message); \
+DEFINE_bool(loop, false, loop_message);
+
+#define DEFINE_OUTPUT_FLAGS \
+DEFINE_string(o, "", output_message); \
+DEFINE_int32(limit, 1000, limit_message);
+
+static const char input_message[] = "Required. An input to process. The input must be a single image, a folder of "
+ "images, video file or camera id.";
+static const char loop_message[] = "Optional. Enable reading the input in a loop.";
+static const char output_message[] = "Optional. Name of the output file(s) to save.";
+static const char limit_message[] = "Optional. Number of frames to store in output. If 0 is set, all frames are stored.";
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/grid_mat.hpp b/python/openvino/runtime/common/demo_utils/include/utils/grid_mat.hpp
new file mode 100644
index 0000000..7d46d2b
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/grid_mat.hpp
@@ -0,0 +1,127 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+
+class GridMat {
+public:
+ cv::Mat outimg;
+
+ explicit GridMat(const std::vector<cv::Size>& sizes, const cv::Size maxDisp = cv::Size{1920, 1080}) {
+ size_t maxWidth = 0;
+ size_t maxHeight = 0;
+ for (size_t i = 0; i < sizes.size(); i++) {
+ maxWidth = std::max(maxWidth, static_cast<size_t>(sizes[i].width));
+ maxHeight = std::max(maxHeight, static_cast<size_t>(sizes[i].height));
+ }
+ if (0 == maxWidth || 0 == maxHeight) {
+ throw std::invalid_argument("Input resolution must not be zero.");
+ }
+
+ size_t nGridCols = static_cast<size_t>(ceil(sqrt(static_cast<float>(sizes.size()))));
+ size_t nGridRows = (sizes.size() - 1) / nGridCols + 1;
+ size_t gridMaxWidth = static_cast<size_t>(maxDisp.width/nGridCols);
+ size_t gridMaxHeight = static_cast<size_t>(maxDisp.height/nGridRows);
+
+ float scaleWidth = static_cast<float>(gridMaxWidth) / maxWidth;
+ float scaleHeight = static_cast<float>(gridMaxHeight) / maxHeight;
+ float scaleFactor = std::min(1.f, std::min(scaleWidth, scaleHeight));
+
+ cellSize.width = static_cast<int>(maxWidth * scaleFactor);
+ cellSize.height = static_cast<int>(maxHeight * scaleFactor);
+
+ for (size_t i = 0; i < sizes.size(); i++) {
+ cv::Point p;
+ p.x = cellSize.width * (i % nGridCols);
+ p.y = cellSize.height * (i / nGridCols);
+ points.push_back(p);
+ }
+
+ outimg.create(cellSize.height * nGridRows, cellSize.width * nGridCols, CV_8UC3);
+ outimg.setTo(0);
+ clear();
+ }
+
+ cv::Size getCellSize() {
+ return cellSize;
+ }
+
+ void fill(std::vector<cv::Mat>& frames) {
+ if (frames.size() > points.size()) {
+ throw std::logic_error("Cannot display " + std::to_string(frames.size()) + " channels in a grid with " + std::to_string(points.size()) + " cells");
+ }
+
+ for (size_t i = 0; i < frames.size(); i++) {
+ cv::Mat cell = outimg(cv::Rect(points[i].x, points[i].y, cellSize.width, cellSize.height));
+
+ if ((cellSize.width == frames[i].cols) && (cellSize.height == frames[i].rows)) {
+ frames[i].copyTo(cell);
+ } else if ((cellSize.width > frames[i].cols) && (cellSize.height > frames[i].rows)) {
+ frames[i].copyTo(cell(cv::Rect(0, 0, frames[i].cols, frames[i].rows)));
+ } else {
+ cv::resize(frames[i], cell, cellSize);
+ }
+ }
+ unupdatedSourceIDs.clear();
+ }
+
+ void update(const cv::Mat& frame, const size_t sourceID) {
+ const cv::Mat& cell = outimg(cv::Rect(points[sourceID], cellSize));
+
+ if ((cellSize.width == frame.cols) && (cellSize.height == frame.rows)) {
+ frame.copyTo(cell);
+ } else if ((cellSize.width > frame.cols) && (cellSize.height > frame.rows)) {
+ frame.copyTo(cell(cv::Rect(0, 0, frame.cols, frame.rows)));
+ } else {
+ cv::resize(frame, cell, cellSize);
+ }
+ unupdatedSourceIDs.erase(unupdatedSourceIDs.find(sourceID));
+ }
+
+ bool isFilled() const noexcept {
+ return unupdatedSourceIDs.empty();
+ }
+ void clear() {
+ size_t counter = 0;
+ std::generate_n(std::inserter(unupdatedSourceIDs, unupdatedSourceIDs.end()), points.size(), [&counter]{return counter++;});
+ }
+ std::set<size_t> getUnupdatedSourceIDs() const noexcept {
+ return unupdatedSourceIDs;
+ }
+ cv::Mat getMat() const noexcept {
+ return outimg;
+ }
+
+private:
+ cv::Size cellSize;
+ std::set<size_t> unupdatedSourceIDs;
+ std::vector<cv::Point> points;
+};
+
+void fillROIColor(cv::Mat& displayImage, cv::Rect roi, cv::Scalar color, double opacity) {
+ if (opacity > 0) {
+ roi = roi & cv::Rect(0, 0, displayImage.cols, displayImage.rows);
+ cv::Mat textROI = displayImage(roi);
+ cv::addWeighted(color, opacity, textROI, 1.0 - opacity , 0.0, textROI);
+ }
+}
+
+void putTextOnImage(cv::Mat& displayImage, std::string str, cv::Point p,
+ cv::HersheyFonts font, double fontScale, cv::Scalar color,
+ int thickness = 1, cv::Scalar bgcolor = cv::Scalar(),
+ double opacity = 0) {
+ int baseline = 0;
+ cv::Size textSize = cv::getTextSize(str, font, 0.5, 1, &baseline);
+ fillROIColor(displayImage, cv::Rect(cv::Point(p.x, p.y + baseline),
+ cv::Point(p.x + textSize.width, p.y - textSize.height)),
+ bgcolor, opacity);
+ cv::putText(displayImage, str, p, font, fontScale, color, thickness);
+}
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/image_utils.h b/python/openvino/runtime/common/demo_utils/include/utils/image_utils.h
new file mode 100644
index 0000000..2731a9a
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/image_utils.h
@@ -0,0 +1,29 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <opencv2/opencv.hpp>
+
+enum RESIZE_MODE {
+ RESIZE_FILL,
+ RESIZE_KEEP_ASPECT,
+ RESIZE_KEEP_ASPECT_LETTERBOX
+};
+
+cv::Mat resizeImageExt(const cv::Mat& mat, int width, int height, RESIZE_MODE resizeMode = RESIZE_FILL,
+ cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR, cv::Rect* roi = nullptr,
+ cv::Scalar BorderConstant = cv::Scalar(0, 0, 0));
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/images_capture.h b/python/openvino/runtime/common/demo_utils/include/utils/images_capture.h
new file mode 100644
index 0000000..f2afdfc
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/images_capture.h
@@ -0,0 +1,53 @@
+// Copyright (C) 2020-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include <stddef.h>
+
+#include <limits>
+#include <memory>
+#include <string>
+
+#include <opencv2/core.hpp>
+
+#include "utils/performance_metrics.hpp"
+
+enum class read_type { efficient, safe };
+
+class ImagesCapture {
+public:
+ const bool loop;
+
+ ImagesCapture(bool loop) : loop{loop} {}
+ virtual double fps() const = 0;
+ virtual cv::Mat read() = 0;
+ virtual std::string getType() const = 0;
+ const PerformanceMetrics& getMetrics() {
+ return readerMetrics;
+ }
+ virtual ~ImagesCapture() = default;
+
+protected:
+ PerformanceMetrics readerMetrics;
+};
+
+// An advanced version of
+// try {
+// return cv::VideoCapture(std::stoi(input));
+// } catch (const std::invalid_argument&) {
+// return cv::VideoCapture(input);
+// } catch (const std::out_of_range&) {
+// return cv::VideoCapture(input);
+// }
+// Some VideoCapture backends continue owning the video buffer under cv::Mat. safe_copy forses to return a copy from
+// read()
+// https://github.com/opencv/opencv/blob/46e1560678dba83d25d309d8fbce01c40f21b7be/modules/gapi/include/opencv2/gapi/streaming/cap.hpp#L72-L76
+std::unique_ptr<ImagesCapture> openImagesCapture(
+ const std::string& input,
+ bool loop,
+ read_type type = read_type::efficient,
+ size_t initialImageId = 0,
+ size_t readLengthLimit = std::numeric_limits<size_t>::max(), // General option
+ cv::Size cameraResolution = {1280, 720}
+ );
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/input_wrappers.hpp b/python/openvino/runtime/common/demo_utils/include/utils/input_wrappers.hpp
new file mode 100644
index 0000000..eff38a7
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/input_wrappers.hpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <list>
+#include <memory>
+#include <set>
+#include <thread>
+#include <vector>
+#include <queue>
+
+#include <opencv2/opencv.hpp>
+
+class InputChannel;
+
+class IInputSource {
+public:
+ virtual bool read(cv::Mat& mat, const std::shared_ptr<InputChannel>& caller) = 0;
+ virtual void addSubscriber(const std::weak_ptr<InputChannel>& inputChannel) = 0;
+ virtual cv::Size getSize() = 0;
+ virtual void lock() {
+ sourceLock.lock();
+ }
+ virtual void unlock() {
+ sourceLock.unlock();
+ }
+ virtual ~IInputSource() = default;
+private:
+ std::mutex sourceLock;
+};
+
+class InputChannel: public std::enable_shared_from_this<InputChannel> { // note: public inheritance
+public:
+ InputChannel(const InputChannel&) = delete;
+ InputChannel& operator=(const InputChannel&) = delete;
+ static std::shared_ptr<InputChannel> create(const std::shared_ptr<IInputSource>& source) {
+ auto tmp = std::shared_ptr<InputChannel>(new InputChannel(source));
+ source->addSubscriber(tmp);
+ return tmp;
+ }
+ bool read(cv::Mat& mat) {
+ readQueueMutex.lock();
+ if (readQueue.empty()) {
+ readQueueMutex.unlock();
+ source->lock();
+ readQueueMutex.lock();
+ if (readQueue.empty()) {
+ bool res = source->read(mat, shared_from_this());
+ readQueueMutex.unlock();
+ source->unlock();
+ return res;
+ } else {
+ source->unlock();
+ }
+ }
+ mat = readQueue.front().clone();
+ readQueue.pop();
+ readQueueMutex.unlock();
+ return true;
+ }
+ void push(const cv::Mat& mat) {
+ readQueueMutex.lock();
+ readQueue.push(mat);
+ readQueueMutex.unlock();
+ }
+ cv::Size getSize() {
+ return source->getSize();
+ }
+
+private:
+ explicit InputChannel(const std::shared_ptr<IInputSource>& source): source{source} {}
+ std::shared_ptr<IInputSource> source;
+ std::queue<cv::Mat, std::list<cv::Mat>> readQueue;
+ std::mutex readQueueMutex;
+};
+
+class VideoCaptureSource: public IInputSource {
+public:
+ VideoCaptureSource(const cv::VideoCapture& videoCapture, bool loop): videoCapture{videoCapture}, loop{loop},
+ imSize{static_cast<int>(videoCapture.get(cv::CAP_PROP_FRAME_WIDTH)), static_cast<int>(videoCapture.get(cv::CAP_PROP_FRAME_HEIGHT))} {}
+ bool read(cv::Mat& mat, const std::shared_ptr<InputChannel>& caller) override {
+ if (!videoCapture.read(mat)) {
+ if (loop) {
+ videoCapture.set(cv::CAP_PROP_POS_FRAMES, 0);
+ videoCapture.read(mat);
+ } else {
+ return false;
+ }
+ }
+ if (1 != subscribedInputChannels.size()) {
+ cv::Mat shared = mat.clone();
+ for (const std::weak_ptr<InputChannel>& weakInputChannel : subscribedInputChannels) {
+ try {
+ std::shared_ptr<InputChannel> sharedInputChannel = std::shared_ptr<InputChannel>(weakInputChannel);
+ if (caller != sharedInputChannel) {
+ sharedInputChannel->push(shared);
+ }
+ } catch (const std::bad_weak_ptr&) {}
+ }
+ }
+ return true;
+ }
+ void addSubscriber(const std::weak_ptr<InputChannel>& inputChannel) override {
+ subscribedInputChannels.push_back(inputChannel);
+ }
+ cv::Size getSize() override {
+ return imSize;
+ }
+
+private:
+ std::vector<std::weak_ptr<InputChannel>> subscribedInputChannels;
+ cv::VideoCapture videoCapture;
+ bool loop;
+ cv::Size imSize;
+};
+
+class ImageSource: public IInputSource {
+public:
+ ImageSource(const cv::Mat& im, bool loop): im{im.clone()}, loop{loop} {} // clone to avoid image changing
+ bool read(cv::Mat& mat, const std::shared_ptr<InputChannel>& caller) override {
+ if (!loop) {
+ auto subscribedInputChannelsIt = subscribedInputChannels.find(caller);
+ if (subscribedInputChannels.end() == subscribedInputChannelsIt) {
+ return false;
+ } else {
+ subscribedInputChannels.erase(subscribedInputChannelsIt);
+ mat = im;
+ return true;
+ }
+ } else {
+ mat = im;
+ return true;
+ }
+ }
+ void addSubscriber(const std::weak_ptr<InputChannel>& inputChannel) override {
+ if (false == subscribedInputChannels.insert(inputChannel).second)
+ throw std::invalid_argument("The insertion did not take place");
+ }
+ cv::Size getSize() override {
+ return im.size();
+ }
+
+private:
+ std::set<std::weak_ptr<InputChannel>, std::owner_less<std::weak_ptr<InputChannel>>> subscribedInputChannels;
+ cv::Mat im;
+ bool loop;
+};
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/kuhn_munkres.hpp b/python/openvino/runtime/common/demo_utils/include/utils/kuhn_munkres.hpp
new file mode 100644
index 0000000..6e6ac51
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/kuhn_munkres.hpp
@@ -0,0 +1,57 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "opencv2/core.hpp"
+
+#include <memory>
+#include <vector>
+
+
+///
+/// \brief The KuhnMunkres class
+///
+/// Solves the assignment problem.
+///
+class KuhnMunkres {
+public:
+ ///
+ /// \brief Initializes the class for assignment problem solving.
+ /// \param[in] greedy If a faster greedy matching algorithm should be used.
+ explicit KuhnMunkres(bool greedy = false);
+
+ ///
+ /// \brief Solves the assignment problem for given dissimilarity matrix.
+ /// It returns a vector that where each element is a column index for
+ /// corresponding row (e.g. result[0] stores optimal column index for very
+ /// first row in the dissimilarity matrix).
+ /// \param dissimilarity_matrix CV_32F dissimilarity matrix.
+ /// \return Optimal column index for each row. -1 means that there is no
+ /// column for row.
+ ///
+ std::vector<size_t> Solve(const cv::Mat &dissimilarity_matrix);
+
+private:
+ static constexpr int kStar = 1;
+ static constexpr int kPrime = 2;
+
+ cv::Mat dm_;
+ cv::Mat marked_;
+ std::vector<cv::Point> points_;
+
+ std::vector<int> is_row_visited_;
+ std::vector<int> is_col_visited_;
+
+ int n_;
+ bool greedy_;
+
+ void TrySimpleCase();
+ bool CheckIfOptimumIsFound();
+ cv::Point FindUncoveredMinValPos();
+ void UpdateDissimilarityMatrix(float val);
+ int FindInRow(int row, int what);
+ int FindInCol(int col, int what);
+ void Run();
+};
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/nms.hpp b/python/openvino/runtime/common/demo_utils/include/utils/nms.hpp
new file mode 100644
index 0000000..1fd475f
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/nms.hpp
@@ -0,0 +1,81 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "opencv2/core.hpp"
+#include <numeric>
+#include <vector>
+
+struct Anchor {
+ float left;
+ float top;
+ float right;
+ float bottom;
+
+ float getWidth() const {
+ return (right - left) + 1.0f;
+ }
+ float getHeight() const {
+ return (bottom - top) + 1.0f;
+ }
+ float getXCenter() const {
+ return left + (getWidth() - 1.0f) / 2.0f;
+ }
+ float getYCenter() const {
+ return top + (getHeight() - 1.0f) / 2.0f;
+ }
+};
+
+template <typename Anchor>
+std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>& scores,
+ const float thresh, bool includeBoundaries=false) {
+ std::vector<float> areas(boxes.size());
+ for (size_t i = 0; i < boxes.size(); ++i) {
+ areas[i] = (boxes[i].right - boxes[i].left + includeBoundaries) * (boxes[i].bottom - boxes[i].top + includeBoundaries);
+ }
+ std::vector<int> order(scores.size());
+ std::iota(order.begin(), order.end(), 0);
+ std::sort(order.begin(), order.end(), [&scores](int o1, int o2) { return scores[o1] > scores[o2]; });
+
+ size_t ordersNum = 0;
+ for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0; ordersNum++);
+
+ std::vector<int> keep;
+ bool shouldContinue = true;
+ for (size_t i = 0; shouldContinue && i < ordersNum; ++i) {
+ auto idx1 = order[i];
+ if (idx1 >= 0) {
+ keep.push_back(idx1);
+ shouldContinue = false;
+ for (size_t j = i + 1; j < ordersNum; ++j) {
+ auto idx2 = order[j];
+ if (idx2 >= 0) {
+ shouldContinue = true;
+ auto overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - std::fmaxf(boxes[idx1].left, boxes[idx2].left);
+ auto overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - std::fmaxf(boxes[idx1].top, boxes[idx2].top);
+ auto intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0;
+ auto overlap = intersection / (areas[idx1] + areas[idx2] - intersection);
+
+ if (overlap >= thresh) {
+ order[j] = -1;
+ }
+ }
+ }
+ }
+ }
+ return keep;
+}
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp b/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp
new file mode 100644
index 0000000..ebb5e14
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/ocv_common.hpp
@@ -0,0 +1,289 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality using OpenCV
+ * @file ocv_common.hpp
+ */
+
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include <openvino/openvino.hpp>
+
+#include "utils/common.hpp"
+#include "utils/shared_tensor_allocator.hpp"
+
+/**
+* @brief Get cv::Mat value in the correct format.
+*/
+template <typename T>
+const T getMatValue(const cv::Mat& mat, size_t h, size_t w, size_t c) {
+ switch (mat.type()) {
+ case CV_8UC1: return (T)mat.at<uchar>(h, w);
+ case CV_8UC3: return (T)mat.at<cv::Vec3b>(h, w)[c];
+ case CV_32FC1: return (T)mat.at<float>(h, w);
+ case CV_32FC3: return (T)mat.at<cv::Vec3f>(h, w)[c];
+ }
+ throw std::runtime_error("cv::Mat type is not recognized");
+};
+
+/**
+* @brief Resize and copy image data from cv::Mat object to a given Tensor object.
+* @param mat - given cv::Mat object with an image data.
+* @param tensor - Tensor object which to be filled by an image data.
+* @param batchIndex - batch index of an image inside of the blob.
+*/
+static UNUSED void matToTensor(const cv::Mat& mat, const ov::Tensor& tensor, int batchIndex = 0) {
+ ov::Shape tensorShape = tensor.get_shape();
+ static const ov::Layout layout("NCHW");
+ const size_t width = tensorShape[ov::layout::width_idx(layout)];
+ const size_t height = tensorShape[ov::layout::height_idx(layout)];
+ const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
+ if (static_cast<size_t>(mat.channels()) != channels) {
+ throw std::runtime_error("The number of channels for model input and image must match");
+ }
+ if (channels != 1 && channels != 3) {
+ throw std::runtime_error("Unsupported number of channels");
+ }
+ int batchOffset = batchIndex * width * height * channels;
+
+ cv::Mat resizedMat;
+ if (static_cast<int>(width) != mat.size().width || static_cast<int>(height) != mat.size().height) {
+ cv::resize(mat, resizedMat, cv::Size(width, height));
+ } else {
+ resizedMat = mat;
+ }
+
+ if (tensor.get_element_type() == ov::element::f32) {
+ float_t* tensorData = tensor.data<float_t>();
+ for (size_t c = 0; c < channels; c++)
+ for (size_t h = 0; h < height; h++)
+ for (size_t w = 0; w < width; w++)
+ tensorData[batchOffset + c * width * height + h * width + w] =
+ getMatValue<float_t>(resizedMat, h, w, c);
+ } else {
+ uint8_t* tensorData = tensor.data<uint8_t>();
+ if (resizedMat.depth() == CV_32F) {
+ throw std::runtime_error("Conversion of cv::Mat from float_t to uint8_t is forbidden");
+ }
+ for (size_t c = 0; c < channels; c++)
+ for (size_t h = 0; h < height; h++)
+ for (size_t w = 0; w < width; w++)
+ tensorData[batchOffset + c * width * height + h * width + w] =
+ getMatValue<uint8_t>(resizedMat, h, w, c);
+ }
+}
+
+static UNUSED ov::Tensor wrapMat2Tensor(const cv::Mat& mat) {
+ auto matType = mat.type() & CV_MAT_DEPTH_MASK;
+ if (matType != CV_8U && matType != CV_32F) {
+ throw std::runtime_error("Unsupported mat type for wrapping");
+ }
+ bool isMatFloat = matType == CV_32F;
+
+ const size_t channels = mat.channels();
+ const size_t height = mat.rows;
+ const size_t width = mat.cols;
+
+ const size_t strideH = mat.step.buf[0];
+ const size_t strideW = mat.step.buf[1];
+
+ const bool isDense = !isMatFloat ? (strideW == channels && strideH == channels * width) :
+ (strideW == channels * sizeof(float) && strideH == channels * width * sizeof(float));
+ if (!isDense) {
+ throw std::runtime_error("Doesn't support conversion from not dense cv::Mat");
+ }
+ auto precision = isMatFloat ? ov::element::f32 : ov::element::u8;
+ auto allocator = std::make_shared<SharedTensorAllocator>(mat);
+ return ov::Tensor(precision, ov::Shape{ 1, height, width, channels }, ov::Allocator(allocator));
+}
+
+static inline void resize2tensor(const cv::Mat& mat, const ov::Tensor& tensor) {
+ static const ov::Layout layout{"NHWC"};
+ const ov::Shape& shape = tensor.get_shape();
+ cv::Size size{int(shape[ov::layout::width_idx(layout)]), int(shape[ov::layout::height_idx(layout)])};
+ assert(tensor.get_element_type() == ov::element::u8);
+ assert(shape.size() == 4);
+ assert(shape[ov::layout::batch_idx(layout)] == 1);
+ assert(shape[ov::layout::channels_idx(layout)] == 3);
+ cv::resize(mat, cv::Mat{size, CV_8UC3, tensor.data()}, size);
+}
+
+static inline ov::Layout getLayoutFromShape(const ov::Shape& shape) {
+ if (shape.size() == 2) {
+ return "NC";
+ }
+ else if (shape.size() == 3) {
+ return (shape[0] >= 1 && shape[0] <= 4) ? "CHW" :
+ "HWC";
+ }
+ else if (shape.size() == 4) {
+ return (shape[1] >= 1 && shape[1] <= 4) ? "NCHW" :
+ "NHWC";
+ }
+ else {
+ throw std::runtime_error("Usupported " + std::to_string(shape.size()) + "D shape");
+ }
+}
+
+/**
+ * @brief Puts text message on the frame, highlights the text with a white border to make it distinguishable from
+ * the background.
+ * @param frame - frame to put the text on.
+ * @param message - text of the message.
+ * @param position - bottom-left corner of the text string in the image.
+ * @param fontFace - font type.
+ * @param fontScale - font scale factor that is multiplied by the font-specific base size.
+ * @param color - text color.
+ * @param thickness - thickness of the lines used to draw a text.
+ */
+inline void putHighlightedText(const cv::Mat& frame,
+ const std::string& message,
+ cv::Point position,
+ int fontFace,
+ double fontScale,
+ cv::Scalar color,
+ int thickness) {
+ cv::putText(frame, message, position, fontFace, fontScale, cv::Scalar(255, 255, 255), thickness + 1);
+ cv::putText(frame, message, position, fontFace, fontScale, color, thickness);
+}
+
+// TODO: replace with Size::empty() after OpenCV3 is dropped
+static inline bool isSizeEmpty(const cv::Size& size) {
+ return size.width <= 0 || size.height <= 0;
+}
+
+// TODO: replace with Rect::empty() after OpenCV3 is dropped
+static inline bool isRectEmpty(const cv::Rect& rect) {
+ return rect.width <= 0 || rect.height <= 0;
+}
+
+class OutputTransform {
+public:
+ OutputTransform() : doResize(false), scaleFactor(1) {}
+
+ OutputTransform(cv::Size inputSize, cv::Size outputResolution) :
+ doResize(true), scaleFactor(1), inputSize(inputSize), outputResolution(outputResolution) {}
+
+ cv::Size computeResolution() {
+ float inputWidth = static_cast<float>(inputSize.width);
+ float inputHeight = static_cast<float>(inputSize.height);
+ scaleFactor = std::min(outputResolution.height / inputHeight, outputResolution.width / inputWidth);
+ newResolution = cv::Size{static_cast<int>(inputWidth * scaleFactor), static_cast<int>(inputHeight * scaleFactor)};
+ return newResolution;
+ }
+
+ void resize(cv::Mat& image) {
+ if (!doResize) { return; }
+ cv::Size currSize = image.size();
+ if (currSize != inputSize) {
+ inputSize = currSize;
+ computeResolution();
+ }
+ if (scaleFactor == 1) { return; }
+ cv::resize(image, image, newResolution);
+ }
+
+ template<typename T>
+ void scaleCoord(T& coord) {
+ if (!doResize || scaleFactor == 1) { return; }
+ coord.x = std::floor(coord.x * scaleFactor);
+ coord.y = std::floor(coord.y * scaleFactor);
+ }
+
+ template<typename T>
+ void scaleRect(T& rect) {
+ if (!doResize || scaleFactor == 1) { return; }
+ scaleCoord(rect);
+ rect.width = std::floor(rect.width * scaleFactor);
+ rect.height = std::floor(rect.height * scaleFactor);
+ }
+
+ bool doResize;
+
+private:
+ float scaleFactor;
+ cv::Size inputSize;
+ cv::Size outputResolution;
+ cv::Size newResolution;
+};
+
+class InputTransform {
+public:
+ InputTransform() : reverseInputChannels(false), isTrivial(true) {}
+
+ InputTransform(bool reverseInputChannels, const std::string& meanValues, const std::string& scaleValues) :
+ reverseInputChannels(reverseInputChannels),
+ isTrivial(!reverseInputChannels && meanValues.empty() && scaleValues.empty()),
+ means(meanValues.empty() ? cv::Scalar(0.0, 0.0, 0.0) : string2Vec(meanValues)),
+ stdScales(scaleValues.empty() ? cv::Scalar(1.0, 1.0, 1.0) : string2Vec(scaleValues)) {
+ }
+
+ cv::Scalar string2Vec(const std::string& string) {
+ const auto& strValues = split(string, ' ');
+ std::vector<float> values;
+ try {
+ for (auto& str : strValues)
+ values.push_back(std::stof(str));
+ }
+ catch (const std::invalid_argument&) {
+ throw std::runtime_error("Invalid parameter --mean_values or --scale_values is provided.");
+ }
+ if (values.size() != 3) {
+ throw std::runtime_error("InputTransform expects 3 values per channel, but get \"" + string + "\".");
+ }
+ return cv::Scalar(values[0], values[1], values[2]);
+ }
+
+ void setPrecision(ov::preprocess::PrePostProcessor& ppp, const std::string& tensorName) {
+ const auto precision = isTrivial ? ov::element::u8 : ov::element::f32;
+ ppp.input(tensorName).tensor().
+ set_element_type(precision);
+ }
+
+ cv::Mat operator()(const cv::Mat& inputs) {
+ if (isTrivial) { return inputs; }
+ cv::Mat result;
+ inputs.convertTo(result, CV_32F);
+ if (reverseInputChannels) {
+ cv::cvtColor(result, result, cv::COLOR_BGR2RGB);
+ }
+ // TODO: merge the two following lines after OpenCV3 is droppped
+ result -= means;
+ result /= cv::Mat{stdScales};
+ return result;
+ }
+
+private:
+ bool reverseInputChannels;
+ bool isTrivial;
+ cv::Scalar means;
+ cv::Scalar stdScales;
+};
+
+class LazyVideoWriter {
+ cv::VideoWriter writer;
+ unsigned nwritten;
+public:
+ const std::string filenames;
+ const double fps;
+ const unsigned lim;
+
+ LazyVideoWriter(const std::string& filenames, double fps, unsigned lim) :
+ nwritten{1}, filenames{filenames}, fps{fps}, lim{lim} {}
+ void write(const cv::Mat& im) {
+ if (writer.isOpened() && (nwritten < lim || 0 == lim)) {
+ writer.write(im);
+ ++nwritten;
+ return;
+ }
+ if (!writer.isOpened() && !filenames.empty()) {
+ if (!writer.open(filenames, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), fps, im.size())) {
+ throw std::runtime_error("Can't open video writer");
+ }
+ writer.write(im);
+ }
+ }
+};
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/performance_metrics.hpp b/python/openvino/runtime/common/demo_utils/include/utils/performance_metrics.hpp
new file mode 100644
index 0000000..6c728b0
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/performance_metrics.hpp
@@ -0,0 +1,92 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file for performance metrics calculation class
+ * @file performance_metrics.hpp
+ */
+
+#pragma once
+
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+
+#include "utils/ocv_common.hpp"
+
+class PerformanceMetrics {
+public:
+ using Clock = std::chrono::steady_clock;
+ using TimePoint = std::chrono::time_point<Clock>;
+ using Duration = Clock::duration;
+ using Ms = std::chrono::duration<double, std::ratio<1, 1000>>;
+ using Sec = std::chrono::duration<double, std::ratio<1, 1>>;
+
+ struct Metrics {
+ double latency;
+ double fps;
+ };
+
+ enum MetricTypes {
+ ALL,
+ FPS,
+ LATENCY
+ };
+
+ PerformanceMetrics(Duration timeWindow = std::chrono::seconds(1));
+ void update(TimePoint lastRequestStartTime,
+ const cv::Mat& frame,
+ cv::Point position = {15, 30},
+ int fontFace = cv::FONT_HERSHEY_COMPLEX,
+ double fontScale = 0.75,
+ cv::Scalar color = {200, 10, 10},
+ int thickness = 2, MetricTypes metricType = ALL);
+ void update(TimePoint lastRequestStartTime);
+
+ /// Paints metrics over provided mat
+ /// @param frame frame to paint over
+ /// @param position left top corner of text block
+ /// @param fontScale font scale
+ /// @param color font color
+ /// @param thickness font thickness
+ void paintMetrics(const cv::Mat& frame,
+ cv::Point position = { 15, 30 },
+ int fontFace = cv::FONT_HERSHEY_COMPLEX,
+ double fontScale = 0.75,
+ cv::Scalar color = { 200, 10, 10 },
+ int thickness = 2, MetricTypes metricType = ALL) const;
+
+ Metrics getLast() const;
+ Metrics getTotal() const;
+ void logTotal() const;
+
+private:
+ struct Statistic {
+ Duration latency;
+ Duration period;
+ int frameCount;
+
+ Statistic() {
+ latency = Duration::zero();
+ period = Duration::zero();
+ frameCount = 0;
+ }
+
+ void combine(const Statistic& other) {
+ latency += other.latency;
+ period += other.period;
+ frameCount += other.frameCount;
+ }
+ };
+
+ Duration timeWindowSize;
+ Statistic lastMovingStatistic;
+ Statistic currentMovingStatistic;
+ Statistic totalStatistic;
+ TimePoint lastUpdateTime;
+ bool firstFrameProcessed;
+};
+
+void logLatencyPerStage(double readLat, double preprocLat, double inferLat, double postprocLat, double renderLat);
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/shared_tensor_allocator.hpp b/python/openvino/runtime/common/demo_utils/include/utils/shared_tensor_allocator.hpp
new file mode 100644
index 0000000..f74e8d0
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/shared_tensor_allocator.hpp
@@ -0,0 +1,47 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <opencv2/core.hpp>
+#include <openvino/runtime/allocator.hpp>
+
+// To prevent false-positive clang compiler warning
+// (https://github.com/openvinotoolkit/openvino/pull/11092#issuecomment-1073846256):
+// warning: destructor called on non-final 'SharedTensorAllocator' that has virtual functions
+// but non-virtual destructor [-Wdelete-non-abstract-non-virtual-dtor]
+// SharedTensorAllocator class declared as final
+
+class SharedTensorAllocator final : public ov::AllocatorImpl {
+public:
+ SharedTensorAllocator(const cv::Mat& img) : img(img) {}
+
+ ~SharedTensorAllocator() = default;
+
+ void* allocate(const size_t bytes, const size_t) override {
+ return bytes <= img.rows * img.step[0] ? img.data : nullptr;
+ }
+
+ void deallocate(void* handle, const size_t bytes, const size_t) override {}
+
+ bool is_equal(const AllocatorImpl& other) const override {
+ auto other_tensor_allocator = dynamic_cast<const SharedTensorAllocator*>(&other);
+ return other_tensor_allocator != nullptr && other_tensor_allocator == this;
+ }
+
+private:
+ const cv::Mat img;
+};
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/slog.hpp b/python/openvino/runtime/common/demo_utils/include/utils/slog.hpp
new file mode 100644
index 0000000..316b98d
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/slog.hpp
@@ -0,0 +1,99 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with logging facility for common samples
+ * @file log.hpp
+ */
+
+#pragma once
+
+#include <iostream>
+#include <string>
+
+namespace slog {
+
+/**
+ * @class LogStreamEndLine
+ * @brief The LogStreamEndLine class implements an end line marker for a log stream
+ */
+class LogStreamEndLine { };
+
+static constexpr LogStreamEndLine endl;
+
+
+/**
+ * @class LogStreamBoolAlpha
+ * @brief The LogStreamBoolAlpha class implements bool printing for a log stream
+ */
+class LogStreamBoolAlpha { };
+
+static constexpr LogStreamBoolAlpha boolalpha;
+
+
+/**
+ * @class LogStream
+ * @brief The LogStream class implements a stream for sample logging
+ */
+class LogStream {
+ std::string _prefix;
+ std::ostream* _log_stream;
+ bool _new_line;
+
+public:
+ /**
+ * @brief A constructor. Creates a LogStream object
+ * @param prefix The prefix to print
+ */
+ LogStream(const std::string &prefix, std::ostream& log_stream)
+ : _prefix(prefix), _new_line(true) {
+ _log_stream = &log_stream;
+ }
+
+ /**
+ * @brief A stream output operator to be used within the logger
+ * @param arg Object for serialization in the logger message
+ */
+ template<class T>
+ LogStream &operator<<(const T &arg) {
+ if (_new_line) {
+ (*_log_stream) << "[ " << _prefix << " ] ";
+ _new_line = false;
+ }
+
+ (*_log_stream) << arg;
+ return *this;
+ }
+
+ // Specializing for LogStreamEndLine to support slog::endl
+ LogStream& operator<< (const LogStreamEndLine &/*arg*/) {
+ _new_line = true;
+
+ (*_log_stream) << std::endl;
+ return *this;
+ }
+
+ // Specializing for LogStreamBoolAlpha to support slog::boolalpha
+ LogStream& operator<< (const LogStreamBoolAlpha &/*arg*/) {
+ (*_log_stream) << std::boolalpha;
+ return *this;
+ }
+
+ // Specializing for std::vector and std::list
+ template<template<class, class> class Container, class T>
+ LogStream& operator<< (const Container<T, std::allocator<T>>& container) {
+ for (const auto& el : container) {
+ *this << el << slog::endl;
+ }
+ return *this;
+ }
+};
+
+
+static LogStream info("INFO", std::cout);
+static LogStream debug("DEBUG", std::cout);
+static LogStream warn("WARNING", std::cout);
+static LogStream err("ERROR", std::cerr);
+
+} // namespace slog
diff --git a/python/openvino/runtime/common/demo_utils/include/utils/threads_common.hpp b/python/openvino/runtime/common/demo_utils/include/utils/threads_common.hpp
new file mode 100644
index 0000000..f0e5cbf
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/include/utils/threads_common.hpp
@@ -0,0 +1,165 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <utility>
+#include <set>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include "utils/performance_metrics.hpp"
+
+// VideoFrame can represent not a single image but the whole grid
+class VideoFrame {
+public:
+ typedef std::shared_ptr<VideoFrame> Ptr;
+
+ VideoFrame(unsigned sourceID, int64_t frameId, const cv::Mat& frame = cv::Mat()) :
+ sourceID{sourceID}, frameId{frameId}, frame{frame} {}
+ virtual ~VideoFrame() = default; // A user has to define how it is reconstructed
+
+ const unsigned sourceID;
+ const int64_t frameId;
+ cv::Mat frame;
+
+ PerformanceMetrics::TimePoint timestamp;
+};
+
+class Worker;
+
+class Task {
+public:
+ explicit Task(VideoFrame::Ptr sharedVideoFrame, float priority = 0):
+ sharedVideoFrame{sharedVideoFrame}, priority{priority} {}
+ virtual bool isReady() = 0;
+ virtual void process() = 0;
+ virtual ~Task() = default;
+
+ std::string name;
+ VideoFrame::Ptr sharedVideoFrame; // it is possible that two tasks try to draw on the same cvMat
+ const float priority;
+};
+
+struct HigherPriority {
+ bool operator()(const std::shared_ptr<Task>& lhs, const std::shared_ptr<Task>& rhs) const {
+ return lhs->priority > rhs->priority
+ || (lhs->priority == rhs->priority && lhs->sharedVideoFrame->frameId < rhs->sharedVideoFrame->frameId)
+ || (lhs->priority == rhs->priority && lhs->sharedVideoFrame->frameId == rhs->sharedVideoFrame->frameId && lhs < rhs);
+ }
+};
+
+class Worker {
+public:
+ explicit Worker(unsigned threadNum):
+ threadPool(threadNum), running{false} {}
+ ~Worker() {
+ stop();
+ }
+ void runThreads() {
+ running = true;
+ for (std::thread& t : threadPool) {
+ t = std::thread(&Worker::threadFunc, this);
+ }
+ }
+ void push(std::shared_ptr<Task> task) {
+ tasksMutex.lock();
+ tasks.insert(task);
+ tasksMutex.unlock();
+ tasksCondVar.notify_one();
+ }
+ void threadFunc() {
+ while (running) {
+ std::unique_lock<std::mutex> lk(tasksMutex);
+ while (running && tasks.empty()) {
+ tasksCondVar.wait(lk);
+ }
+ try {
+ auto it = std::find_if(tasks.begin(), tasks.end(), [](const std::shared_ptr<Task>& task){return task->isReady();});
+ if (tasks.end() != it) {
+ const std::shared_ptr<Task> task = std::move(*it);
+ tasks.erase(it);
+ lk.unlock();
+ task->process();
+ }
+ } catch (...) {
+ std::lock_guard<std::mutex> lock{exceptionMutex};
+ if (nullptr == currentException) {
+ currentException = std::current_exception();
+ stop();
+ }
+ }
+ }
+ }
+ void stop() {
+ running = false;
+ tasksCondVar.notify_all();
+ }
+ void join() {
+ for (auto& t : threadPool) {
+ t.join();
+ }
+ if (nullptr != currentException) {
+ std::rethrow_exception(currentException);
+ }
+ }
+
+private:
+ std::condition_variable tasksCondVar;
+ std::set<std::shared_ptr<Task>, HigherPriority> tasks;
+ std::mutex tasksMutex;
+ std::vector<std::thread> threadPool;
+ std::atomic<bool> running;
+ std::exception_ptr currentException;
+ std::mutex exceptionMutex;
+};
+
+void tryPush(const std::weak_ptr<Worker>& worker, std::shared_ptr<Task>&& task) {
+ try {
+ std::shared_ptr<Worker>(worker)->push(task);
+ } catch (const std::bad_weak_ptr&) {}
+}
+
+template <class C> class ConcurrentContainer {
+public:
+ C container;
+ mutable std::mutex mutex;
+
+ bool lockedEmpty() const noexcept {
+ std::lock_guard<std::mutex> lock{mutex};
+ return container.empty();
+ }
+ typename C::size_type lockedSize() const noexcept {
+ std::lock_guard<std::mutex> lock{mutex};
+ return container.size();
+ }
+ void lockedPushBack(const typename C::value_type& value) {
+ std::lock_guard<std::mutex> lock{mutex};
+ container.push_back(value);
+ }
+ bool lockedTryPop(typename C::value_type& value) {
+ mutex.lock();
+ if (!container.empty()) {
+ value = container.back();
+ container.pop_back();
+ mutex.unlock();
+ return true;
+ } else {
+ mutex.unlock();
+ return false;
+ }
+ }
+
+ operator C() const {
+ std::lock_guard<std::mutex> lock{mutex};
+ return container;
+ }
+};
diff --git a/python/openvino/runtime/common/demo_utils/src/args_helper.cpp b/python/openvino/runtime/common/demo_utils/src/args_helper.cpp
new file mode 100644
index 0000000..8f4bc35
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/src/args_helper.cpp
@@ -0,0 +1,155 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "utils/args_helper.hpp"
+#include "utils/slog.hpp"
+
+#ifdef _WIN32
+#include "w_dirent.hpp"
+#else
+#include <dirent.h>
+#endif
+
+#include <gflags/gflags.h>
+
+#include <sys/stat.h>
+#include <map>
+
+#include <algorithm>
+#include <cctype>
+#include <sstream>
+
+void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg) {
+ struct stat sb;
+ if (stat(arg.c_str(), &sb) != 0) {
+ if (arg.compare(0, 5, "rtsp:") != 0) {
+ slog::warn << "File " << arg << " cannot be opened!" << slog::endl;
+ return;
+ }
+ }
+ if (S_ISDIR(sb.st_mode)) {
+ DIR *dp;
+ dp = opendir(arg.c_str());
+ if (dp == nullptr) {
+ slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl;
+ return;
+ }
+
+ struct dirent *ep;
+ while (nullptr != (ep = readdir(dp))) {
+ std::string fileName = ep->d_name;
+ if (fileName == "." || fileName == "..") continue;
+ files.push_back(arg + "/" + ep->d_name);
+ }
+ closedir(dp);
+ } else {
+ files.push_back(arg);
+ }
+}
+
+void parseInputFilesArguments(std::vector<std::string>& files) {
+ std::vector<std::string> args = gflags::GetArgvs();
+ bool readArguments = false;
+ for (size_t i = 0; i < args.size(); i++) {
+ if (args.at(i) == "-i" || args.at(i) == "--i") {
+ readArguments = true;
+ continue;
+ }
+ if (!readArguments) {
+ continue;
+ }
+ if (args.at(i).c_str()[0] == '-') {
+ break;
+ }
+ readInputFilesArguments(files, args.at(i));
+ }
+}
+
+std::vector<std::string> split(const std::string& s, char delim) {
+ std::vector<std::string> result;
+ std::stringstream ss(s);
+ std::string item;
+
+ while (getline(ss, item, delim)) {
+ result.push_back(item);
+ }
+ return result;
+}
+
+std::vector<std::string> parseDevices(const std::string& device_string) {
+ const std::string::size_type colon_position = device_string.find(":");
+ if (colon_position != std::string::npos) {
+ std::string device_type = device_string.substr(0, colon_position);
+ if (device_type == "HETERO" || device_type == "MULTI") {
+ std::string comma_separated_devices = device_string.substr(colon_position + 1);
+ std::vector<std::string> devices = split(comma_separated_devices, ',');
+ for (auto& device : devices)
+ device = device.substr(0, device.find("("));
+ return devices;
+ }
+ }
+ return {device_string};
+}
+
+// Format: <device1>:<value1>,<device2>:<value2> or just <value>
+std::map<std::string, int32_t> parseValuePerDevice(const std::set<std::string>& devices,
+ const std::string& values_string) {
+ auto values_string_upper = values_string;
+ std::transform(values_string_upper.begin(),
+ values_string_upper.end(),
+ values_string_upper.begin(),
+ [](unsigned char c){ return std::toupper(c); });
+ std::map<std::string, int32_t> result;
+ auto device_value_strings = split(values_string_upper, ',');
+ for (auto& device_value_string : device_value_strings) {
+ auto device_value_vec = split(device_value_string, ':');
+ if (device_value_vec.size() == 2) {
+ auto it = std::find(devices.begin(), devices.end(), device_value_vec.at(0));
+ if (it != devices.end()) {
+ result[device_value_vec.at(0)] = std::stoi(device_value_vec.at(1));
+ }
+ } else if (device_value_vec.size() == 1) {
+ uint32_t value = std::stoi(device_value_vec.at(0));
+ for (const auto& device : devices) {
+ result[device] = value;
+ }
+ } else if (device_value_vec.size() != 0) {
+ throw std::runtime_error("Unknown string format: " + values_string);
+ }
+ }
+ return result;
+}
+
+cv::Size stringToSize(const std::string& str) {
+ std::vector<std::string> strings = split(str, 'x');
+ if (strings.size() != 2) {
+ throw std::invalid_argument("Can't convert std::string to cv::Size. The string must contain exactly one x");
+ }
+ return {std::stoi(strings[0]), std::stoi(strings[1])};
+}
+
+std::map<std::string, ov::Layout> parseLayoutString(const std::string& layout_string) {
+ // Parse parameter string like "input0:NCHW,input1:NC" or "NCHW" (applied to all
+ // inputs)
+ std::map<std::string, ov::Layout> layouts;
+ std::string searchStr = (layout_string.find_last_of(':') == std::string::npos && !layout_string.empty() ?
+ ":" : "") + layout_string;
+ auto colonPos = searchStr.find_last_of(':');
+ while (colonPos != std::string::npos) {
+ auto startPos = searchStr.find_last_of(',');
+ auto inputName = searchStr.substr(startPos + 1, colonPos - startPos - 1);
+ auto inputLayout = searchStr.substr(colonPos + 1);
+ layouts[inputName] = ov::Layout(inputLayout);
+ searchStr = searchStr.substr(0, startPos + 1);
+ if (searchStr.empty() || searchStr.back() != ',') {
+ break;
+ }
+ searchStr.pop_back();
+ colonPos = searchStr.find_last_of(':');
+ }
+ if (!searchStr.empty()) {
+ throw std::invalid_argument("Can't parse input layout string: " + layout_string);
+ }
+ return layouts;
+}
diff --git a/python/openvino/runtime/common/demo_utils/src/config_factory.cpp b/python/openvino/runtime/common/demo_utils/src/config_factory.cpp
new file mode 100644
index 0000000..2e9a442
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/src/config_factory.cpp
@@ -0,0 +1,111 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "utils/config_factory.h"
+
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/runtime/intel_gpu/properties.hpp>
+#include "dla_plugin_config.hpp"
+#include "utils/args_helper.hpp"
+#include <sys/stat.h>
+
+std::set<std::string> ModelConfig::getDevices() {
+ if (devices.empty()) {
+ for (const std::string& device : parseDevices(deviceName)) {
+ devices.insert(device);
+ }
+ }
+
+ return devices;
+}
+
+ModelConfig ConfigFactory::getUserConfig(const std::string& flags_d,
+ uint32_t flags_nireq,
+ const std::string& flags_nstreams,
+ uint32_t flags_nthreads,
+ const std::string &flags_arch) {
+ auto config = getCommonConfig(flags_d, flags_nireq);
+
+ std::map<std::string, int> deviceNstreams = parseValuePerDevice(config.getDevices(), flags_nstreams);
+ for (const auto& device : config.getDevices()) {
+ if (flags_arch != "" && device == "FPGA") {
+ struct stat buffer;
+ if (stat(flags_arch.c_str(), &buffer) != 0) {
+ std::cout << "Error: architecture file: " << flags_arch << " doesn't exist. Please provide a valid path." << std::endl;
+ throw std::logic_error("architecture file path does not exist.");
+ }
+ config.compiledModelConfig.emplace(DLIAPlugin::properties::arch_path.name(), flags_arch);
+ } else if (device == "CPU") { // CPU supports a few special performance-oriented keys
+ // limit threading for CPU portion of inference
+ if (flags_nthreads != 0)
+ config.compiledModelConfig.emplace(ov::inference_num_threads.name(), flags_nthreads);
+
+ config.compiledModelConfig.emplace(ov::affinity.name(), ov::Affinity::NONE);
+
+ ov::streams::Num nstreams =
+ deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO;
+ config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams);
+ } else if (device == "GPU") {
+ ov::streams::Num nstreams =
+ deviceNstreams.count(device) > 0 ? ov::streams::Num(deviceNstreams[device]) : ov::streams::AUTO;
+ config.compiledModelConfig.emplace(ov::streams::num.name(), nstreams);
+ if (flags_d.find("MULTI") != std::string::npos &&
+ config.getDevices().find("CPU") != config.getDevices().end()) {
+ // multi-device execution with the CPU + GPU performs best with GPU throttling hint,
+ // which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
+ config.compiledModelConfig.emplace(ov::intel_gpu::hint::queue_throttle.name(),
+ ov::intel_gpu::hint::ThrottleLevel(1));
+ }
+ }
+ }
+ return config;
+}
+
+ModelConfig ConfigFactory::getMinLatencyConfig(const std::string& flags_d, uint32_t flags_nireq) {
+ auto config = getCommonConfig(flags_d, flags_nireq);
+ for (const auto& device : config.getDevices()) {
+ if (device == "CPU") { // CPU supports a few special performance-oriented keys
+ config.compiledModelConfig.emplace(ov::streams::num.name(), 1);
+ } else if (device == "GPU") {
+ config.compiledModelConfig.emplace(ov::streams::num.name(), 1);
+ }
+ }
+ return config;
+}
+
+ModelConfig ConfigFactory::getCommonConfig(const std::string& flags_d, uint32_t flags_nireq) {
+ ModelConfig config;
+
+ if (!flags_d.empty()) {
+ config.deviceName = flags_d;
+ }
+
+ config.maxAsyncRequests = flags_nireq;
+
+ return config;
+}
+
+std::map<std::string, std::string> ModelConfig::getLegacyConfig() {
+ std::map<std::string, std::string> config;
+ for (const auto& item : compiledModelConfig) {
+ config[item.first] = item.second.as<std::string>();
+ }
+ return config;
+}
diff --git a/python/openvino/runtime/common/demo_utils/src/image_utils.cpp b/python/openvino/runtime/common/demo_utils/src/image_utils.cpp
new file mode 100644
index 0000000..039dd66
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/src/image_utils.cpp
@@ -0,0 +1,55 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "utils/image_utils.h"
+
+cv::Mat resizeImageExt(const cv::Mat& mat, int width, int height, RESIZE_MODE resizeMode,
+ cv::InterpolationFlags interpolationMode, cv::Rect* roi, cv::Scalar BorderConstant) {
+ if (width == mat.cols && height == mat.rows) {
+ return mat;
+ }
+
+ cv::Mat dst;
+
+ switch (resizeMode) {
+ case RESIZE_FILL:
+ {
+ cv::resize(mat, dst, cv::Size(width, height), interpolationMode);
+ if (roi) {
+ *roi = cv::Rect(0, 0, width, height);
+ }
+ break;
+ }
+ case RESIZE_KEEP_ASPECT:
+ case RESIZE_KEEP_ASPECT_LETTERBOX:
+ {
+ double scale = std::min(static_cast<double>(width) / mat.cols, static_cast<double>(height) / mat.rows);
+ cv::Mat resizedImage;
+ cv::resize(mat, resizedImage, cv::Size(0, 0), scale, scale, interpolationMode);
+
+ int dx = resizeMode == RESIZE_KEEP_ASPECT ? 0 : (width - resizedImage.cols) / 2;
+ int dy = resizeMode == RESIZE_KEEP_ASPECT ? 0 : (height - resizedImage.rows) / 2;
+
+ cv::copyMakeBorder(resizedImage, dst, dy, height - resizedImage.rows - dy,
+ dx, width - resizedImage.cols - dx, cv::BORDER_CONSTANT, BorderConstant);
+ if (roi) {
+ *roi = cv::Rect(dx, dy, resizedImage.cols, resizedImage.rows);
+ }
+ break;
+ }
+ }
+ return dst;
+}
diff --git a/python/openvino/runtime/common/demo_utils/src/images_capture.cpp b/python/openvino/runtime/common/demo_utils/src/images_capture.cpp
new file mode 100644
index 0000000..febcdd7
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/src/images_capture.cpp
@@ -0,0 +1,327 @@
+// Copyright (C) 2020-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "utils/images_capture.h"
+
+#include <string.h>
+
+#ifdef _WIN32
+# include "w_dirent.hpp"
+#else
+# include <dirent.h> // for closedir, dirent, opendir, readdir, DIR
+#endif
+
+#include <algorithm>
+#include <chrono>
+#include <fstream>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/videoio.hpp>
+
+class InvalidInput : public std::runtime_error {
+public:
+ explicit InvalidInput(const std::string& message) noexcept : std::runtime_error(message) {}
+};
+
+class OpenError : public std::runtime_error {
+public:
+ explicit OpenError(const std::string& message) noexcept : std::runtime_error(message) {}
+};
+
+class ImreadWrapper : public ImagesCapture {
+ cv::Mat img;
+ bool canRead;
+
+public:
+ ImreadWrapper(const std::string& input, bool loop) : ImagesCapture{loop}, canRead{true} {
+ auto startTime = std::chrono::steady_clock::now();
+
+ std::ifstream file(input.c_str());
+ if (!file.good())
+ throw InvalidInput("Can't find the image by " + input);
+
+ img = cv::imread(input);
+ if (!img.data)
+ throw OpenError("Can't open the image from " + input);
+ else
+ readerMetrics.update(startTime);
+ }
+
+ double fps() const override {
+ return 1.0;
+ }
+
+ std::string getType() const override {
+ return "IMAGE";
+ }
+
+ cv::Mat read() override {
+ if (loop)
+ return img.clone();
+ if (canRead) {
+ canRead = false;
+ return img.clone();
+ }
+ return cv::Mat{};
+ }
+};
+
+class DirReader : public ImagesCapture {
+ std::vector<std::string> names;
+ size_t fileId;
+ size_t nextImgId;
+ const size_t initialImageId;
+ const size_t readLengthLimit;
+ const std::string input;
+
+public:
+ DirReader(const std::string& input, bool loop, size_t initialImageId, size_t readLengthLimit)
+ : ImagesCapture{loop},
+ fileId{0},
+ nextImgId{0},
+ initialImageId{initialImageId},
+ readLengthLimit{readLengthLimit},
+ input{input} {
+ DIR* dir = opendir(input.c_str());
+ if (!dir)
+ throw InvalidInput("Can't find the dir by " + input);
+ while (struct dirent* ent = readdir(dir))
+ if (strcmp(ent->d_name, ".") && strcmp(ent->d_name, ".."))
+ names.emplace_back(ent->d_name);
+ closedir(dir);
+ if (names.empty())
+ throw OpenError("The dir " + input + " is empty");
+ sort(names.begin(), names.end());
+ size_t readImgs = 0;
+ while (fileId < names.size()) {
+ cv::Mat img = cv::imread(input + '/' + names[fileId]);
+ if (img.data) {
+ ++readImgs;
+ if (readImgs - 1 >= initialImageId)
+ return;
+ }
+ ++fileId;
+ }
+ throw OpenError("Can't read the first image from " + input);
+ }
+
+ double fps() const override {
+ return 1.0;
+ }
+
+ std::string getType() const override {
+ return "DIR";
+ }
+
+ cv::Mat read() override {
+ auto startTime = std::chrono::steady_clock::now();
+
+ while (fileId < names.size() && nextImgId < readLengthLimit) {
+ cv::Mat img = cv::imread(input + '/' + names[fileId]);
+ ++fileId;
+ if (img.data) {
+ ++nextImgId;
+ readerMetrics.update(startTime);
+ return img;
+ }
+ }
+
+ if (loop) {
+ fileId = 0;
+ size_t readImgs = 0;
+ while (fileId < names.size()) {
+ cv::Mat img = cv::imread(input + '/' + names[fileId]);
+ ++fileId;
+ if (img.data) {
+ ++readImgs;
+ if (readImgs - 1 >= initialImageId) {
+ nextImgId = 1;
+ readerMetrics.update(startTime);
+ return img;
+ }
+ }
+ }
+ }
+ return cv::Mat{};
+ }
+};
+
+class VideoCapWrapper : public ImagesCapture {
+ cv::VideoCapture cap;
+ bool first_read;
+ const read_type type;
+ size_t nextImgId;
+ const double initialImageId;
+ size_t readLengthLimit;
+
+public:
+ VideoCapWrapper(const std::string& input, bool loop, read_type type, size_t initialImageId, size_t readLengthLimit)
+ : ImagesCapture{loop},
+ first_read{true},
+ type{type},
+ nextImgId{0},
+ initialImageId{static_cast<double>(initialImageId)} {
+ if (0 == readLengthLimit) {
+ throw std::runtime_error("readLengthLimit must be positive");
+ }
+ if (cap.open(input)) {
+ this->readLengthLimit = readLengthLimit;
+ if (!cap.set(cv::CAP_PROP_POS_FRAMES, this->initialImageId))
+ throw OpenError("Can't set the frame to begin with");
+ return;
+ }
+ throw InvalidInput("Can't open the video from " + input);
+ }
+
+ double fps() const override {
+ return cap.get(cv::CAP_PROP_FPS);
+ }
+
+ std::string getType() const override {
+ return "VIDEO";
+ }
+
+ cv::Mat read() override {
+ auto startTime = std::chrono::steady_clock::now();
+
+ if (nextImgId >= readLengthLimit) {
+ if (loop && cap.set(cv::CAP_PROP_POS_FRAMES, initialImageId)) {
+ nextImgId = 1;
+ cv::Mat img;
+ cap.read(img);
+ if (type == read_type::safe) {
+ img = img.clone();
+ }
+ readerMetrics.update(startTime);
+ return img;
+ }
+ return cv::Mat{};
+ }
+ cv::Mat img;
+ bool success = cap.read(img);
+ if (!success && first_read) {
+ throw std::runtime_error("The first image can't be read");
+ }
+ first_read = false;
+ if (!success && loop && cap.set(cv::CAP_PROP_POS_FRAMES, initialImageId)) {
+ nextImgId = 1;
+ cap.read(img);
+ } else {
+ ++nextImgId;
+ }
+ if (type == read_type::safe) {
+ img = img.clone();
+ }
+ readerMetrics.update(startTime);
+ return img;
+ }
+};
+
+class CameraCapWrapper : public ImagesCapture {
+ cv::VideoCapture cap;
+ const read_type type;
+ size_t nextImgId;
+ size_t readLengthLimit;
+
+public:
+ CameraCapWrapper(const std::string& input,
+ bool loop,
+ read_type type,
+ size_t readLengthLimit,
+ cv::Size cameraResolution)
+ : ImagesCapture{loop},
+ type{type},
+ nextImgId{0} {
+ if (0 == readLengthLimit) {
+ throw std::runtime_error("readLengthLimit must be positive");
+ }
+ try {
+ if (cap.open(std::stoi(input))) {
+ this->readLengthLimit = loop ? std::numeric_limits<size_t>::max() : readLengthLimit;
+ cap.set(cv::CAP_PROP_BUFFERSIZE, 1);
+ cap.set(cv::CAP_PROP_FRAME_WIDTH, cameraResolution.width);
+ cap.set(cv::CAP_PROP_FRAME_HEIGHT, cameraResolution.height);
+ cap.set(cv::CAP_PROP_AUTOFOCUS, true);
+ cap.set(cv::CAP_PROP_FOURCC, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'));
+ return;
+ }
+ throw OpenError("Can't open the camera from " + input);
+ } catch (const std::invalid_argument&) {
+ throw InvalidInput("Can't find the camera " + input);
+ } catch (const std::out_of_range&) { throw InvalidInput("Can't find the camera " + input); }
+ }
+
+ double fps() const override {
+ return cap.get(cv::CAP_PROP_FPS) > 0 ? cap.get(cv::CAP_PROP_FPS) : 30;
+ }
+
+ std::string getType() const override {
+ return "CAMERA";
+ }
+
+ cv::Mat read() override {
+ auto startTime = std::chrono::steady_clock::now();
+
+ if (nextImgId >= readLengthLimit) {
+ return cv::Mat{};
+ }
+ cv::Mat img;
+ if (!cap.read(img)) {
+ throw std::runtime_error("The image can't be captured from the camera");
+ }
+ if (type == read_type::safe) {
+ img = img.clone();
+ }
+ ++nextImgId;
+
+ readerMetrics.update(startTime);
+ return img;
+ }
+};
+
+std::unique_ptr<ImagesCapture> openImagesCapture(const std::string& input,
+ bool loop,
+ read_type type,
+ size_t initialImageId,
+ size_t readLengthLimit,
+ cv::Size cameraResolution
+ ) {
+ if (readLengthLimit == 0)
+ throw std::runtime_error{"Read length limit must be positive"};
+ std::vector<std::string> invalidInputs, openErrors;
+ try {
+ return std::unique_ptr<ImagesCapture>(new ImreadWrapper{input, loop});
+ } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
+ openErrors.push_back(e.what());
+ }
+
+ try {
+ return std::unique_ptr<ImagesCapture>(new DirReader{input, loop, initialImageId, readLengthLimit});
+ } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
+ openErrors.push_back(e.what());
+ }
+
+ try {
+ return std::unique_ptr<ImagesCapture>(new VideoCapWrapper{input, loop, type, initialImageId, readLengthLimit});
+ } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
+ openErrors.push_back(e.what());
+ }
+
+ try {
+ return std::unique_ptr<ImagesCapture>(
+ new CameraCapWrapper{input, loop, type, readLengthLimit, cameraResolution});
+ } catch (const InvalidInput& e) { invalidInputs.push_back(e.what()); } catch (const OpenError& e) {
+ openErrors.push_back(e.what());
+ }
+
+ std::vector<std::string> errorMessages = openErrors.empty() ? invalidInputs : openErrors;
+ std::string errorsInfo;
+ for (const auto& message : errorMessages) {
+ errorsInfo.append(message + "\n");
+ }
+ throw std::runtime_error(errorsInfo);
+}
diff --git a/python/openvino/runtime/common/demo_utils/src/kuhn_munkres.cpp b/python/openvino/runtime/common/demo_utils/src/kuhn_munkres.cpp
new file mode 100644
index 0000000..7d612c1
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/src/kuhn_munkres.cpp
@@ -0,0 +1,169 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include <utils/kuhn_munkres.hpp>
+
+KuhnMunkres::KuhnMunkres(bool greedy) : n_(), greedy_(greedy) {}
+
+std::vector<size_t> KuhnMunkres::Solve(const cv::Mat& dissimilarity_matrix) {
+ CV_Assert(dissimilarity_matrix.type() == CV_32F);
+ double min_val;
+ cv::minMaxLoc(dissimilarity_matrix, &min_val);
+
+ n_ = std::max(dissimilarity_matrix.rows, dissimilarity_matrix.cols);
+ dm_ = cv::Mat(n_, n_, CV_32F, cv::Scalar(0));
+ marked_ = cv::Mat(n_, n_, CV_8S, cv::Scalar(0));
+ points_ = std::vector<cv::Point>(n_ * 2);
+
+ dissimilarity_matrix.copyTo(dm_(
+ cv::Rect(0, 0, dissimilarity_matrix.cols, dissimilarity_matrix.rows)));
+
+ is_row_visited_ = std::vector<int>(n_, 0);
+ is_col_visited_ = std::vector<int>(n_, 0);
+
+ Run();
+
+ std::vector<size_t> results(dissimilarity_matrix.rows, -1);
+ for (int i = 0; i < dissimilarity_matrix.rows; i++) {
+ const auto ptr = marked_.ptr<char>(i);
+ for (int j = 0; j < dissimilarity_matrix.cols; j++) {
+ if (ptr[j] == kStar) {
+ results[i] = (size_t)j;
+ }
+ }
+ }
+ return results;
+}
+
+void KuhnMunkres::TrySimpleCase() {
+ auto is_row_visited = std::vector<int>(n_, 0);
+ auto is_col_visited = std::vector<int>(n_, 0);
+
+ for (int row = 0; row < n_; row++) {
+ auto ptr = dm_.ptr<float>(row);
+ auto marked_ptr = marked_.ptr<char>(row);
+ auto min_val = *std::min_element(ptr, ptr + n_);
+ for (int col = 0; col < n_; col++) {
+ ptr[col] -= min_val;
+ if (ptr[col] == 0 && !is_col_visited[col] && !is_row_visited[row]) {
+ marked_ptr[col] = kStar;
+ is_col_visited[col] = 1;
+ is_row_visited[row] = 1;
+ }
+ }
+ }
+}
+
+bool KuhnMunkres::CheckIfOptimumIsFound() {
+ int count = 0;
+ for (int i = 0; i < n_; i++) {
+ const auto marked_ptr = marked_.ptr<char>(i);
+ for (int j = 0; j < n_; j++) {
+ if (marked_ptr[j] == kStar) {
+ is_col_visited_[j] = 1;
+ count++;
+ }
+ }
+ }
+
+ return count >= n_;
+}
+
+cv::Point KuhnMunkres::FindUncoveredMinValPos() {
+ auto min_val = std::numeric_limits<float>::max();
+ cv::Point min_val_pos(-1, -1);
+ for (int i = 0; i < n_; i++) {
+ if (!is_row_visited_[i]) {
+ auto dm_ptr = dm_.ptr<float>(i);
+ for (int j = 0; j < n_; j++) {
+ if (!is_col_visited_[j] && dm_ptr[j] < min_val) {
+ min_val = dm_ptr[j];
+ min_val_pos = cv::Point(j, i);
+ }
+ }
+ }
+ }
+ return min_val_pos;
+}
+
+void KuhnMunkres::UpdateDissimilarityMatrix(float val) {
+ for (int i = 0; i < n_; i++) {
+ auto dm_ptr = dm_.ptr<float>(i);
+ for (int j = 0; j < n_; j++) {
+ if (is_row_visited_[i]) dm_ptr[j] += val;
+ if (!is_col_visited_[j]) dm_ptr[j] -= val;
+ }
+ }
+}
+
+int KuhnMunkres::FindInRow(int row, int what) {
+ for (int j = 0; j < n_; j++) {
+ if (marked_.at<char>(row, j) == what) {
+ return j;
+ }
+ }
+ return -1;
+}
+
+int KuhnMunkres::FindInCol(int col, int what) {
+ for (int i = 0; i < n_; i++) {
+ if (marked_.at<char>(i, col) == what) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+void KuhnMunkres::Run() {
+ TrySimpleCase();
+ if (greedy_)
+ return;
+ while (!CheckIfOptimumIsFound()) {
+ while (true) {
+ auto point = FindUncoveredMinValPos();
+ auto min_val = dm_.at<float>(point.y, point.x);
+ if (min_val > 0) {
+ UpdateDissimilarityMatrix(min_val);
+ } else {
+ marked_.at<char>(point.y, point.x) = kPrime;
+ int col = FindInRow(point.y, kStar);
+ if (col >= 0) {
+ is_row_visited_[point.y] = 1;
+ is_col_visited_[col] = 0;
+ } else {
+ int count = 0;
+ points_[count] = point;
+
+ while (true) {
+ int row = FindInCol(points_[count].x, kStar);
+ if (row >= 0) {
+ count++;
+ points_[count] = cv::Point(points_[count - 1].x, row);
+ int col = FindInRow(points_[count].y, kPrime);
+ count++;
+ points_[count] = cv::Point(col, points_[count - 1].y);
+ } else {
+ break;
+ }
+ }
+
+ for (int i = 0; i < count + 1; i++) {
+ auto& mark = marked_.at<char>(points_[i].y, points_[i].x);
+ mark = mark == kStar ? 0 : kStar;
+ }
+
+ is_row_visited_ = std::vector<int>(n_, 0);
+ is_col_visited_ = std::vector<int>(n_, 0);
+
+ marked_.setTo(0, marked_ == kPrime);
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/python/openvino/runtime/common/demo_utils/src/performance_metrics.cpp b/python/openvino/runtime/common/demo_utils/src/performance_metrics.cpp
new file mode 100644
index 0000000..d1e494e
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/src/performance_metrics.cpp
@@ -0,0 +1,114 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <limits>
+#include "utils/performance_metrics.hpp"
+#include "utils/slog.hpp"
+
+// timeWindow defines the length of the timespan over which the 'current fps' value is calculated
+PerformanceMetrics::PerformanceMetrics(Duration timeWindow)
+ : timeWindowSize(timeWindow)
+ , firstFrameProcessed(false)
+{}
+
+void PerformanceMetrics::update(TimePoint lastRequestStartTime,
+ const cv::Mat& frame,
+ cv::Point position,
+ int fontFace,
+ double fontScale,
+ cv::Scalar color,
+ int thickness,
+ MetricTypes metricType) {
+ update(lastRequestStartTime);
+ paintMetrics(frame, position, fontFace, fontScale, color, thickness, metricType);
+}
+
+void PerformanceMetrics::update(TimePoint lastRequestStartTime) {
+ TimePoint currentTime = Clock::now();
+
+ if (!firstFrameProcessed) {
+ lastUpdateTime = lastRequestStartTime;
+ firstFrameProcessed = true;
+ }
+
+ currentMovingStatistic.latency += currentTime - lastRequestStartTime;
+ currentMovingStatistic.period = currentTime - lastUpdateTime;
+ currentMovingStatistic.frameCount++;
+
+ if (currentTime - lastUpdateTime > timeWindowSize) {
+ lastMovingStatistic = currentMovingStatistic;
+ totalStatistic.combine(lastMovingStatistic);
+ currentMovingStatistic = Statistic();
+
+ lastUpdateTime = currentTime;
+ }
+}
+
+void PerformanceMetrics::paintMetrics(const cv::Mat& frame, cv::Point position, int fontFace,
+ double fontScale, cv::Scalar color, int thickness, MetricTypes metricType) const {
+ // Draw performance stats over frame
+ Metrics metrics = getLast();
+
+ std::ostringstream out;
+ if (!std::isnan(metrics.latency) &&
+ (metricType == PerformanceMetrics::MetricTypes::LATENCY || metricType == PerformanceMetrics::MetricTypes::ALL)) {
+ out << "Latency: " << std::fixed << std::setprecision(1) << metrics.latency << " ms";
+ putHighlightedText(frame, out.str(), position, fontFace, fontScale, color, thickness);
+ }
+ if (!std::isnan(metrics.fps) &&
+ (metricType == PerformanceMetrics::MetricTypes::FPS || metricType == PerformanceMetrics::MetricTypes::ALL)) {
+ out.str("");
+ out << "FPS: " << std::fixed << std::setprecision(1) << metrics.fps;
+ int offset = metricType == PerformanceMetrics::MetricTypes::ALL ? 30 : 0;
+ putHighlightedText(frame, out.str(), {position.x, position.y + offset}, fontFace, fontScale, color, thickness);
+ }
+}
+
+PerformanceMetrics::Metrics PerformanceMetrics::getLast() const {
+ Metrics metrics;
+
+ metrics.latency = lastMovingStatistic.frameCount != 0
+ ? std::chrono::duration_cast<Ms>(lastMovingStatistic.latency).count()
+ / lastMovingStatistic.frameCount
+ : std::numeric_limits<double>::signaling_NaN();
+ metrics.fps = lastMovingStatistic.period != Duration::zero()
+ ? lastMovingStatistic.frameCount
+ / std::chrono::duration_cast<Sec>(lastMovingStatistic.period).count()
+ : std::numeric_limits<double>::signaling_NaN();
+
+ return metrics;
+}
+
+PerformanceMetrics::Metrics PerformanceMetrics::getTotal() const {
+ Metrics metrics;
+
+ int frameCount = totalStatistic.frameCount + currentMovingStatistic.frameCount;
+ if (frameCount != 0) {
+ metrics.latency = std::chrono::duration_cast<Ms>(
+ totalStatistic.latency + currentMovingStatistic.latency).count() / frameCount;
+ metrics.fps = frameCount / std::chrono::duration_cast<Sec>(
+ totalStatistic.period + currentMovingStatistic.period).count();
+ } else {
+ metrics.latency = std::numeric_limits<double>::signaling_NaN();
+ metrics.fps = std::numeric_limits<double>::signaling_NaN();
+ }
+
+ return metrics;
+}
+
+void PerformanceMetrics::logTotal() const {
+ Metrics metrics = getTotal();
+
+ slog::info << "\tLatency: " << std::fixed << std::setprecision(1) << metrics.latency << " ms" << slog::endl;
+ slog::info << "\tFPS: " << metrics.fps << slog::endl;
+}
+
+void logLatencyPerStage(double readLat, double preprocLat, double inferLat, double postprocLat, double renderLat) {
+ slog::info << "\tDecoding:\t" << std::fixed << std::setprecision(1) <<
+ readLat << " ms" << slog::endl;
+ slog::info << "\tPreprocessing:\t" << preprocLat << " ms" << slog::endl;
+ slog::info << "\tInference:\t" << inferLat << " ms" << slog::endl;
+ slog::info << "\tPostprocessing:\t" << postprocLat << " ms" << slog::endl;
+ slog::info << "\tRendering:\t" << renderLat << " ms" << slog::endl;
+}
diff --git a/python/openvino/runtime/common/demo_utils/src/w_dirent.hpp b/python/openvino/runtime/common/demo_utils/src/w_dirent.hpp
new file mode 100644
index 0000000..0df8636
--- /dev/null
+++ b/python/openvino/runtime/common/demo_utils/src/w_dirent.hpp
@@ -0,0 +1,114 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if defined(_WIN32)
+
+#ifndef NOMINMAX
+# define NOMINMAX
+#endif
+
+#include <WinSock2.h>
+#include <Windows.h>
+#include <stdlib.h>
+
+#else
+
+#include <unistd.h>
+#include <cstdlib>
+#include <string.h>
+
+#endif
+
+#include <string>
+
+#include <sys/stat.h>
+
+#if defined(WIN32)
+ // Copied from linux libc sys/stat.h:
+ #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+ #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#endif
+
+struct dirent {
+ char *d_name;
+
+ explicit dirent(const wchar_t *wsFilePath) {
+ size_t i;
+ auto slen = wcslen(wsFilePath);
+ d_name = static_cast<char*>(malloc(slen + 1));
+ wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen);
+ }
+
+ ~dirent() {
+ free(d_name);
+ }
+};
+
+class DIR {
+ WIN32_FIND_DATAA FindFileData;
+ HANDLE hFind;
+ dirent *next;
+
+ static inline bool endsWith(const std::string &src, const char *with) {
+ int wl = static_cast<int>(strlen(with));
+ int so = static_cast<int>(src.length()) - wl;
+ if (so < 0) return false;
+ return 0 == strncmp(with, &src[so], wl);
+ }
+
+public:
+ explicit DIR(const char *dirPath) : next(nullptr) {
+ std::string ws = dirPath;
+ if (endsWith(ws, "\\"))
+ ws += "*";
+ else
+ ws += "\\*";
+ hFind = FindFirstFileA(ws.c_str(), &FindFileData);
+ FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE;
+ }
+
+ ~DIR() {
+ if (!next) delete next;
+ FindClose(hFind);
+ }
+
+ bool isValid() const {
+ return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0);
+ }
+
+ dirent* nextEnt() {
+ if (next != nullptr) delete next;
+ next = nullptr;
+
+ if (!FindFileData.dwReserved0) return nullptr;
+
+ wchar_t wbuf[4096];
+
+ size_t outSize;
+ mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094);
+ next = new dirent(wbuf);
+ FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData);
+ return next;
+ }
+};
+
+
+static DIR *opendir(const char* dirPath) {
+ auto dp = new DIR(dirPath);
+ if (!dp->isValid()) {
+ delete dp;
+ return nullptr;
+ }
+ return dp;
+}
+
+static struct dirent *readdir(DIR *dp) {
+ return dp->nextEnt();
+}
+
+static void closedir(DIR *dp) {
+ delete dp;
+}
diff --git a/python/openvino/runtime/common/format_reader/CMakeLists.txt b/python/openvino/runtime/common/format_reader/CMakeLists.txt
new file mode 100644
index 0000000..3daab96
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/CMakeLists.txt
@@ -0,0 +1,55 @@
+# Copyright (C) 2018-2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set (TARGET_NAME "format_reader")
+
+file (GLOB MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
+file (GLOB LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
+
+# Create named folders for the sources within the .vcproj
+# Empty name lists them directly under the .vcproj
+source_group("src" FILES ${LIBRARY_SRC})
+source_group("include" FILES ${LIBRARY_HEADERS})
+
+# Create library file from sources.
+add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS})
+
+# Find OpenCV components if exist
+find_package(OpenCV QUIET COMPONENTS core imgproc imgcodecs)
+if(NOT OpenCV_FOUND)
+ message(WARNING "OpenCV is disabled or not found, ${TARGET_NAME} will be built without OpenCV support")
+else()
+ target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCV_LIBRARIES} ie_samples_utils)
+ if(UNIX AND NOT APPLE)
+ # Workaround issue that rpath-link is missing for PRIVATE dependencies
+ # Fixed in cmake 3.16.0 https://gitlab.kitware.com/cmake/cmake/issues/19556
+ target_link_libraries(${TARGET_NAME} INTERFACE "-Wl,-rpath-link,${OpenCV_INSTALL_PATH}/lib")
+ endif()
+ # Make this definition public so that it's also seen by dla benchmark. As dla benchmark
+ # uses this macro to identify which image extensions are supported by the image reader
+ target_compile_definitions(${TARGET_NAME} PUBLIC USE_OPENCV)
+endif()
+
+target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_FORMAT_READER)
+
+target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}"
+ "${CMAKE_CURRENT_SOURCE_DIR}/..")
+
+set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}
+ FOLDER cpp_samples)
+
+if(COMMAND add_clang_format_target)
+ add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
+endif()
+
+install(
+ TARGETS ${TARGET_NAME}
+ RUNTIME DESTINATION samples_bin/ COMPONENT samples_bin EXCLUDE_FROM_ALL
+ LIBRARY DESTINATION samples_bin/ COMPONENT samples_bin EXCLUDE_FROM_ALL
+)
+
+install(TARGETS ${TARGET_NAME}
+ RUNTIME DESTINATION "dla/bin" COMPONENT EMUTEST
+ LIBRARY DESTINATION "dla/lib" COMPONENT EMUTEST
+ ARCHIVE DESTINATION "dla/lib" COMPONENT EMUTEST)
diff --git a/python/openvino/runtime/common/format_reader/MnistUbyte.cpp b/python/openvino/runtime/common/format_reader/MnistUbyte.cpp
new file mode 100644
index 0000000..182ef99
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/MnistUbyte.cpp
@@ -0,0 +1,66 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// clang-format off
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "MnistUbyte.h"
+// clang-format on
+
+using namespace FormatReader;
+
+int MnistUbyte::reverseInt(int i) {
+ unsigned char ch1, ch2, ch3, ch4;
+ ch1 = (unsigned char)(i & 255);
+ ch2 = (unsigned char)((i >> 8) & 255);
+ ch3 = (unsigned char)((i >> 16) & 255);
+ ch4 = (unsigned char)((i >> 24) & 255);
+ return (static_cast<int>(ch1) << 24) + (static_cast<int>(ch2) << 16) + (static_cast<int>(ch3) << 8) + ch4;
+}
+
+MnistUbyte::MnistUbyte(const std::string& filename) {
+ std::ifstream file(filename, std::ios::binary);
+ if (!file.is_open()) {
+ return;
+ }
+ int magic_number = 0;
+ int number_of_images = 0;
+ int n_rows = 0;
+ int n_cols = 0;
+ file.read(reinterpret_cast<char*>(&magic_number), sizeof(magic_number));
+ magic_number = reverseInt(magic_number);
+ if (magic_number != 2051) {
+ return;
+ }
+ file.read(reinterpret_cast<char*>(&number_of_images), sizeof(number_of_images));
+ number_of_images = reverseInt(number_of_images);
+ file.read(reinterpret_cast<char*>(&n_rows), sizeof(n_rows));
+ n_rows = reverseInt(n_rows);
+ _height = (size_t)n_rows;
+ file.read(reinterpret_cast<char*>(&n_cols), sizeof(n_cols));
+ n_cols = reverseInt(n_cols);
+ _width = (size_t)n_cols;
+ if (number_of_images > 1) {
+ std::cout << "[MNIST] Warning: number_of_images in mnist file equals " << number_of_images
+ << ". Only a first image will be read." << std::endl;
+ }
+
+ size_t size = _width * _height * 1;
+
+ _data.reset(new unsigned char[size], std::default_delete<unsigned char[]>());
+ size_t count = 0;
+ if (0 < number_of_images) {
+ for (int r = 0; r < n_rows; ++r) {
+ for (int c = 0; c < n_cols; ++c) {
+ unsigned char temp = 0;
+ file.read(reinterpret_cast<char*>(&temp), sizeof(temp));
+ _data.get()[count++] = temp;
+ }
+ }
+ }
+
+ file.close();
+}
diff --git a/python/openvino/runtime/common/format_reader/MnistUbyte.h b/python/openvino/runtime/common/format_reader/MnistUbyte.h
new file mode 100644
index 0000000..8991166
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/MnistUbyte.h
@@ -0,0 +1,58 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * \brief Mnist reader
+ * \file MnistUbyte.h
+ */
+#pragma once
+
+#include <memory>
+#include <string>
+
+// clang-format off
+#include "format_reader.h"
+#include "register.h"
+// clang-format on
+
+namespace FormatReader {
+/**
+ * \class MnistUbyte
+ * \brief Reader for mnist db files
+ */
+class MnistUbyte : public Reader {
+private:
+ int reverseInt(int i);
+
+ static Register<MnistUbyte> reg;
+
+public:
+ /**
+ * \brief Constructor of Mnist reader
+ * @param filename - path to input data
+ * @return MnistUbyte reader object
+ */
+ explicit MnistUbyte(const std::string& filename);
+ virtual ~MnistUbyte() {}
+
+ /**
+ * \brief Get size
+ * @return size
+ */
+ size_t size() const override {
+ return _width * _height * 1;
+ }
+
+ // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper).
+ // format_reader is copied from openvino samples/cpp/common/format_reader/
+ // this might need special care when doing a OV uplift
+ std::shared_ptr<unsigned char> getData(size_t width, size_t height, ResizeType resize_type) override {
+ if ((width * height != 0) && (_width * _height != width * height)) {
+ std::cout << "[ WARNING ] Image won't be resized! Please use OpenCV.\n";
+ return nullptr;
+ }
+ return _data;
+ }
+};
+} // namespace FormatReader
diff --git a/python/openvino/runtime/common/format_reader/bmp.cpp b/python/openvino/runtime/common/format_reader/bmp.cpp
new file mode 100644
index 0000000..240d13f
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/bmp.cpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// clang-format off
+#include <fstream>
+#include <iostream>
+
+#include "bmp.h"
+// clang-format on
+
+using namespace std;
+using namespace FormatReader;
+
+BitMap::BitMap(const string& filename) {
+ BmpHeader header;
+ BmpInfoHeader infoHeader;
+
+ ifstream input(filename, ios::binary);
+ if (!input) {
+ return;
+ }
+
+ input.read(reinterpret_cast<char*>(&header.type), 2);
+
+ if (header.type != 'M' * 256 + 'B') {
+ std::cerr << "[BMP] file is not bmp type\n";
+ return;
+ }
+
+ input.read(reinterpret_cast<char*>(&header.size), 4);
+ input.read(reinterpret_cast<char*>(&header.reserved), 4);
+ input.read(reinterpret_cast<char*>(&header.offset), 4);
+
+ input.read(reinterpret_cast<char*>(&infoHeader), sizeof(BmpInfoHeader));
+
+ bool rowsReversed = infoHeader.height < 0;
+ _width = infoHeader.width;
+ _height = abs(infoHeader.height);
+
+ if (infoHeader.bits != 24) {
+ cerr << "[BMP] 24bpp only supported. But input has:" << infoHeader.bits << "\n";
+ return;
+ }
+
+ if (infoHeader.compression != 0) {
+ cerr << "[BMP] compression not supported\n";
+ }
+
+ int padSize = _width & 3;
+ char pad[3];
+ size_t size = _width * _height * 3;
+
+ _data.reset(new unsigned char[size], std::default_delete<unsigned char[]>());
+
+ input.seekg(header.offset, ios::beg);
+
+ // reading by rows in invert vertically
+ for (uint32_t i = 0; i < _height; i++) {
+ uint32_t storeAt = rowsReversed ? i : (uint32_t)_height - 1 - i;
+ input.read(reinterpret_cast<char*>(_data.get()) + _width * 3 * storeAt, _width * 3);
+ input.read(pad, padSize);
+ }
+}
diff --git a/python/openvino/runtime/common/format_reader/bmp.h b/python/openvino/runtime/common/format_reader/bmp.h
new file mode 100644
index 0000000..ac3ff31
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/bmp.h
@@ -0,0 +1,75 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * \brief BMP reader
+ * \file bmp.h
+ */
+#pragma once
+
+#include <memory>
+#include <string>
+
+// clang-format off
+#include "format_reader.h"
+#include "register.h"
+// clang-format on
+
+namespace FormatReader {
+/**
+ * \class BitMap
+ * \brief Reader for bmp files
+ */
+class BitMap : public Reader {
+private:
+ static Register<BitMap> reg;
+
+ typedef struct BmpHeaderType {
+ unsigned short type = 0u; /* Magic identifier */
+ unsigned int size = 0u; /* File size in bytes */
+ unsigned int reserved = 0u;
+ unsigned int offset = 0u; /* Offset to image data, bytes */
+ } BmpHeader;
+
+ typedef struct BmpInfoHeaderType {
+ unsigned int size = 0u; /* Header size in bytes */
+ int width = 0, height = 0; /* Width and height of image */
+ unsigned short planes = 0u; /* Number of colour planes */
+ unsigned short bits = 0u; /* Bits per pixel */
+ unsigned int compression = 0u; /* Compression type */
+ unsigned int imagesize = 0u; /* Image size in bytes */
+ int xresolution = 0, yresolution = 0; /* Pixels per meter */
+ unsigned int ncolours = 0u; /* Number of colours */
+ unsigned int importantcolours = 0u; /* Important colours */
+ } BmpInfoHeader;
+
+public:
+ /**
+ * \brief Constructor of BMP reader
+ * @param filename - path to input data
+ * @return BitMap reader object
+ */
+ explicit BitMap(const std::string& filename);
+ virtual ~BitMap() {}
+
+ /**
+ * \brief Get size
+ * @return size
+ */
+ size_t size() const override {
+ return _width * _height * 3;
+ }
+
+ // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper).
+ // format_reader is copied from openvino samples/cpp/common/format_reader/
+ // this might need special care when doing a OV uplift
+ std::shared_ptr<unsigned char> getData(size_t width, size_t height, ResizeType resize_type) override {
+ if ((width * height != 0) && (_width * _height != width * height)) {
+ std::cout << "[ WARNING ] Image won't be resized! Please use OpenCV.\n";
+ return nullptr;
+ }
+ return _data;
+ }
+};
+} // namespace FormatReader
diff --git a/python/openvino/runtime/common/format_reader/format_reader.cpp b/python/openvino/runtime/common/format_reader/format_reader.cpp
new file mode 100644
index 0000000..94a8441
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/format_reader.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <iostream>
+
+// clang-format off
+#include "bmp.h"
+#include "MnistUbyte.h"
+#include "yuv_nv12.h"
+#include "opencv_wrapper.h"
+#include "format_reader.h"
+// clang-format on
+
+using namespace FormatReader;
+
+std::vector<Registry::CreatorFunction> Registry::_data;
+
+Register<MnistUbyte> MnistUbyte::reg;
+Register<YUV_NV12> YUV_NV12::reg;
+#ifdef USE_OPENCV
+Register<OCVReader> OCVReader::reg;
+#else
+Register<BitMap> BitMap::reg;
+#endif
+
+Reader* Registry::CreateReader(const char* filename) {
+ for (const auto &maker : _data) {
+ Reader* ol = maker(filename);
+ if (ol != nullptr && ol->size() != 0)
+ return ol;
+ if (ol != nullptr)
+ delete ol;
+ }
+ return nullptr;
+}
+
+void Registry::RegisterReader(CreatorFunction f) {
+ _data.push_back(f);
+}
+
+FORMAT_READER_API(Reader*) CreateFormatReader(const char* filename) {
+ return Registry::CreateReader(filename);
+}
diff --git a/python/openvino/runtime/common/format_reader/format_reader.h b/python/openvino/runtime/common/format_reader/format_reader.h
new file mode 100644
index 0000000..99fc573
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/format_reader.h
@@ -0,0 +1,95 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * \brief Format reader abstract class implementation
+ * \file format_reader.h
+ */
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#if defined(_WIN32)
+# ifdef IMPLEMENT_FORMAT_READER
+# define FORMAT_READER_API(type) extern "C" __declspec(dllexport) type
+# else
+# define FORMAT_READER_API(type) extern "C" type
+# endif
+#elif (__GNUC__ >= 4)
+# ifdef IMPLEMENT_FORMAT_READER
+# define FORMAT_READER_API(type) extern "C" __attribute__((visibility("default"))) type
+# else
+# define FORMAT_READER_API(type) extern "C" type
+# endif
+#else
+# define FORMAT_READER_API(TYPE) extern "C" TYPE
+#endif
+
+namespace FormatReader {
+/**
+ * \class FormatReader
+ * \brief This is an abstract class for reading input data
+ */
+class Reader {
+protected:
+ /// \brief height
+ size_t _height = 0;
+ /// \brief width
+ size_t _width = 0;
+ /// \brief data
+ std::shared_ptr<unsigned char> _data;
+
+public:
+ virtual ~Reader() = default;
+
+ // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper).
+ // format_reader is copied from openvino samples/cpp/common/format_reader/
+ // this might need special care when doing a OV uplift
+ enum ResizeType {
+ RESIZE, // resize the image to target (height, width)
+ PAD_RESIZE, // pad the image into a squared image and then resize the image to target (height, width)
+ };
+
+ /**
+ * \brief Get width
+ * @return width
+ */
+ size_t width() const {
+ return _width;
+ }
+
+ /**
+ * \brief Get height
+ * @return height
+ */
+ size_t height() const {
+ return _height;
+ }
+
+ /**
+ * \brief Get input data ptr
+ * @return shared pointer with input data
+ * @In case of using OpenCV, parameters width and height will be used for image resizing
+ */
+ // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper).
+ // Needs special care when doing a OV uplift
+ virtual std::shared_ptr<unsigned char> getData(size_t width = 0, size_t height = 0,
+ ResizeType resize_type = ResizeType::RESIZE) = 0;
+
+ /**
+ * \brief Get size
+ * @return size
+ */
+ virtual size_t size() const = 0;
+};
+} // namespace FormatReader
+
+/**
+ * \brief Function for create reader
+ * @return FormatReader pointer
+ */
+FORMAT_READER_API(FormatReader::Reader*) CreateFormatReader(const char* filename);
diff --git a/python/openvino/runtime/common/format_reader/format_reader_ptr.h b/python/openvino/runtime/common/format_reader/format_reader_ptr.h
new file mode 100644
index 0000000..eb9bf8e
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/format_reader_ptr.h
@@ -0,0 +1,43 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * \brief Implementation of smart pointer for Reader class
+ * \file format_reader_ptr.h
+ */
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "format_reader.h"
+
+namespace FormatReader {
+class ReaderPtr {
+public:
+ explicit ReaderPtr(const char* imageName) : reader(CreateFormatReader(imageName)) {}
+ /**
+ * @brief dereference operator overload
+ * @return Reader
+ */
+ Reader* operator->() const noexcept {
+ return reader.get();
+ }
+
+ /**
+ * @brief dereference operator overload
+ * @return Reader
+ */
+ Reader* operator*() const noexcept {
+ return reader.get();
+ }
+
+ Reader* get() {
+ return reader.get();
+ }
+
+protected:
+ std::unique_ptr<Reader> reader;
+};
+} // namespace FormatReader
diff --git a/python/openvino/runtime/common/format_reader/opencv_wrapper.cpp b/python/openvino/runtime/common/format_reader/opencv_wrapper.cpp
new file mode 100644
index 0000000..b8ebeef
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/opencv_wrapper.cpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifdef USE_OPENCV
+# include <fstream>
+# include <iostream>
+
+// clang-format off
+# include <opencv2/opencv.hpp>
+
+# include "samples/slog.hpp"
+# include "opencv_wrapper.h"
+// clang-format on
+
+using namespace std;
+using namespace FormatReader;
+
+OCVReader::OCVReader(const string& filename) {
+ img = cv::imread(filename);
+ _size = 0;
+
+ if (img.empty()) {
+ return;
+ }
+
+ _size = img.size().width * img.size().height * img.channels();
+ _width = img.size().width;
+ _height = img.size().height;
+}
+
+// Set the maximum number of printed warnings; large image directories can otherwise be overwhelming
+static size_t resize_warning_count = 0;
+const size_t max_resize_warnings = 5;
+
+std::shared_ptr<unsigned char> OCVReader::getData(size_t width = 0, size_t height = 0, ResizeType resize_type = ResizeType::RESIZE) {
+ if (width == 0)
+ width = img.cols;
+
+ if (height == 0)
+ height = img.rows;
+
+ size_t size = width * height * img.channels();
+ _data.reset(new unsigned char[size], std::default_delete<unsigned char[]>());
+
+ if (width != static_cast<size_t>(img.cols) || height != static_cast<size_t>(img.rows)) {
+ if (resize_warning_count < max_resize_warnings) {
+ slog::warn << "Image is resized from (" << img.cols << ", " << img.rows << ") to (" << width << ", " << height
+ << ")" << slog::endl;
+ resize_warning_count++;
+ } else if (resize_warning_count == max_resize_warnings) {
+ slog::warn << "Additional image resizing messages have been suppressed." << slog::endl;
+ resize_warning_count++;
+ }
+ }
+
+ cv::Mat resized;
+ if (resize_type == ResizeType::RESIZE) {
+ resized = cv::Mat(cv::Size(width, height), img.type(), _data.get());
+ // cv::resize() just copy data to output image if sizes are the same
+ cv::resize(img, resized, cv::Size(width, height));
+ } else if (resize_type == ResizeType::PAD_RESIZE)
+ {
+ cv::Mat padded;
+ // Find the larger side out of width and height of the image
+ int max_dim = std::max(img.rows, img.cols);
+ // Calculate padding for shorter dimension
+ int top = (max_dim - img.rows) / 2;
+ int bottom = (max_dim - img.rows + 1) / 2;
+ int left = (max_dim - img.cols) / 2;
+ int right = (max_dim - img.cols + 1) / 2;
+ // Add padding (0, i.e., black) to make the image a square
+ cv::copyMakeBorder(img, padded, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar());
+ cv::resize(padded, resized, cv::Size(width, height));
+ std::memcpy(_data.get(), resized.data, resized.total() * resized.elemSize());
+ } else {
+ slog::err << "Specified resize type is not implemented." << slog::endl;
+ std::exit(1);
+ }
+
+ return _data;
+}
+#endif
diff --git a/python/openvino/runtime/common/format_reader/opencv_wrapper.h b/python/openvino/runtime/common/format_reader/opencv_wrapper.h
new file mode 100644
index 0000000..c402e8d
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/opencv_wrapper.h
@@ -0,0 +1,58 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * \brief Image reader
+ * \file opencv_wrapper.h
+ */
+#pragma once
+
+#ifdef USE_OPENCV
+# include <memory>
+# include <string>
+
+// clang-format off
+# include <opencv2/opencv.hpp>
+
+# include "format_reader.h"
+# include "register.h"
+// clang-format on
+
+namespace FormatReader {
+/**
+ * \class OCVMAT
+ * \brief OpenCV Wrapper
+ */
+class OCVReader : public Reader {
+private:
+ cv::Mat img;
+ size_t _size;
+ static Register<OCVReader> reg;
+
+public:
+ /**
+ * \brief Constructor of BMP reader
+ * @param filename - path to input data
+ * @return BitMap reader object
+ */
+ explicit OCVReader(const std::string& filename);
+ virtual ~OCVReader() {}
+
+ /**
+ * \brief Get size
+ * @return size
+ */
+ size_t size() const override {
+ return _size;
+ }
+
+ // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper).
+ // format_reader is copied from openvino samples/cpp/common/format_reader/
+ // this might need special care when doing a OV uplift
+ std::shared_ptr<unsigned char> getData(size_t width,
+ size_t height,
+ ResizeType resize_type) override;
+};
+} // namespace FormatReader
+#endif
diff --git a/python/openvino/runtime/common/format_reader/register.h b/python/openvino/runtime/common/format_reader/register.h
new file mode 100644
index 0000000..781eca3
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/register.h
@@ -0,0 +1,58 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+/**
+ * \brief Register for readers
+ * \file register.h
+ */
+#pragma once
+
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "format_reader.h"
+
+namespace FormatReader {
+/**
+ * \class Registry
+ * \brief Create reader from fabric
+ */
+class Registry {
+private:
+ typedef std::function<Reader*(const std::string& filename)> CreatorFunction;
+ static std::vector<CreatorFunction> _data;
+
+public:
+ /**
+ * \brief Create reader
+ * @param filename - path to input data
+ * @return Reader for input data or nullptr
+ */
+ static Reader* CreateReader(const char* filename);
+
+ /**
+ * \brief Registers reader in fabric
+ * @param f - a creation function
+ */
+ static void RegisterReader(CreatorFunction f);
+};
+
+/**
+ * \class Register
+ * \brief Registers reader in fabric
+ */
+template <typename To>
+class Register {
+public:
+ /**
+ * \brief Constructor creates creation function for fabric
+ * @return Register object
+ */
+ Register() {
+ Registry::RegisterReader([](const std::string& filename) -> Reader* {
+ return new To(filename);
+ });
+ }
+};
+} // namespace FormatReader
diff --git a/python/openvino/runtime/common/format_reader/yuv_nv12.cpp b/python/openvino/runtime/common/format_reader/yuv_nv12.cpp
new file mode 100644
index 0000000..f25c5cb
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/yuv_nv12.cpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// clang-format off
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "yuv_nv12.h"
+// clang-format on
+
+using namespace FormatReader;
+
+YUV_NV12::YUV_NV12(const std::string& filename) {
+ auto pos = filename.rfind('.');
+ if (pos == std::string::npos)
+ return;
+ if (filename.substr(pos + 1) != "yuv")
+ return;
+
+ std::ifstream file(filename, std::ios::binary);
+ if (!file.is_open()) {
+ return;
+ }
+
+ file.seekg(0, file.end);
+ _size = file.tellg();
+ file.seekg(0, file.beg);
+
+ _data.reset(new unsigned char[_size], std::default_delete<unsigned char[]>());
+
+ file.read(reinterpret_cast<char*>(_data.get()), _size);
+
+ file.close();
+}
diff --git a/python/openvino/runtime/common/format_reader/yuv_nv12.h b/python/openvino/runtime/common/format_reader/yuv_nv12.h
new file mode 100644
index 0000000..dd74c7b
--- /dev/null
+++ b/python/openvino/runtime/common/format_reader/yuv_nv12.h
@@ -0,0 +1,57 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * \brief YUV NV12 reader
+ * \file yuv_nv12.h
+ */
+#pragma once
+
+#include <memory>
+#include <string>
+
+// clang-format off
+#include "format_reader.h"
+#include "register.h"
+// clang-format on
+
+namespace FormatReader {
+/**
+ * \class YUV_NV12
+ * \brief Reader for YUV NV12 files
+ */
+class YUV_NV12 : public Reader {
+private:
+ static Register<YUV_NV12> reg;
+ size_t _size = 0;
+
+public:
+ /**
+ * \brief Constructor of YUV NV12 reader
+ * @param filename - path to input data
+ * @return YUV_NV12 reader object
+ */
+ explicit YUV_NV12(const std::string& filename);
+ virtual ~YUV_NV12() {}
+
+ /**
+ * \brief Get size
+ * @return size
+ */
+ size_t size() const override {
+ return _size;
+ }
+
+ // langsu: ResizeType is a added by us to support custom resizing functions (only in opencv_wrapper).
+ // format_reader is copied from openvino samples/cpp/common/format_reader/
+ // this might need special care when doing a OV uplift
+ std::shared_ptr<unsigned char> getData(size_t width, size_t height, Reader::ResizeType resize_type) override {
+ if ((width * height * 3 / 2 != size())) {
+ std::cout << "Image dimensions not match with NV12 file size \n";
+ return nullptr;
+ }
+ return _data;
+ }
+};
+} // namespace FormatReader
diff --git a/python/openvino/runtime/common/models/CMakeLists.txt b/python/openvino/runtime/common/models/CMakeLists.txt
new file mode 100644
index 0000000..07c8da3
--- /dev/null
+++ b/python/openvino/runtime/common/models/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+file(GLOB SOURCES ./src/*.cpp)
+file(GLOB HEADERS ./include/models/*.h)
+
+# Create named folders for the sources within the .vcproj
+# Empty name lists them directly under the .vcproj
+source_group("src" FILES ${SOURCES})
+source_group("include" FILES ${HEADERS})
+
+add_library(models STATIC ${SOURCES} ${HEADERS})
+target_include_directories(models PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
+
+target_link_libraries(models PRIVATE openvino::runtime utils opencv_core opencv_imgproc)
diff --git a/python/openvino/runtime/common/models/include/models/associative_embedding_decoder.h b/python/openvino/runtime/common/models/include/models/associative_embedding_decoder.h
new file mode 100644
index 0000000..94afbda
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/associative_embedding_decoder.h
@@ -0,0 +1,94 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <vector>
+
+#include <opencv2/core.hpp>
+
+struct Peak {
+ explicit Peak(const cv::Point2f& keypoint = cv::Point2f(-1, -1), const float score = 0.0f, const float tag = 0.0f)
+ : keypoint(keypoint),
+ score(score),
+ tag(tag) {}
+
+ cv::Point2f keypoint;
+ float score;
+ float tag;
+};
+
+class Pose {
+public:
+ explicit Pose(size_t numJoints) : peaks(numJoints) {}
+
+ void add(size_t index, Peak peak) {
+ peaks[index] = peak;
+ sum += peak.score;
+ poseTag = poseTag * static_cast<float>(validPointsNum) + peak.tag;
+ poseCenter = poseCenter * static_cast<float>(validPointsNum) + peak.keypoint;
+ validPointsNum += 1;
+ poseTag = poseTag / static_cast<float>(validPointsNum);
+ poseCenter = poseCenter / static_cast<float>(validPointsNum);
+ }
+
+ float getPoseTag() const {
+ return poseTag;
+ }
+
+ float getMeanScore() const {
+ return sum / static_cast<float>(size());
+ }
+
+ Peak& getPeak(size_t index) {
+ return peaks[index];
+ }
+
+ cv::Point2f& getPoseCenter() {
+ return poseCenter;
+ }
+
+ size_t size() const {
+ return peaks.size();
+ }
+
+private:
+ std::vector<Peak> peaks;
+ cv::Point2f poseCenter = cv::Point2f(0.f, 0.f);
+ int validPointsNum = 0;
+ float poseTag = 0;
+ float sum = 0;
+};
+
+void findPeaks(const std::vector<cv::Mat>& nmsHeatMaps,
+ const std::vector<cv::Mat>& aembdsMaps,
+ std::vector<std::vector<Peak>>& allPeaks,
+ size_t jointId,
+ size_t maxNumPeople,
+ float detectionThreshold);
+
+std::vector<Pose> matchByTag(std::vector<std::vector<Peak>>& allPeaks,
+ size_t maxNumPeople,
+ size_t numJoints,
+ float tagThreshold);
+
+void adjustAndRefine(std::vector<Pose>& allPoses,
+ const std::vector<cv::Mat>& heatMaps,
+ const std::vector<cv::Mat>& aembdsMaps,
+ int poseId,
+ float delta);
diff --git a/python/openvino/runtime/common/models/include/models/classification_model.h b/python/openvino/runtime/common/models/include/models/classification_model.h
new file mode 100644
index 0000000..6d32e44
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/classification_model.h
@@ -0,0 +1,57 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "models/image_model.h"
+
+namespace ov {
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+class ClassificationModel : public ImageModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load.
+ /// @param nTop - number of top results.
+ /// Any detected object with confidence lower than this threshold will be ignored.
+ /// @param useAutoResize - if true, image will be resized by openvino.
+ /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+ /// @param labels - array of labels for every class.
+ /// @param layout - model input layout
+ ClassificationModel(const std::string& modelFileName,
+ size_t nTop,
+ bool useAutoResize,
+ const std::vector<std::string>& labels,
+ const std::string& layout = "");
+
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+ static std::vector<std::string> loadLabels(const std::string& labelFilename);
+
+protected:
+ size_t nTop;
+ std::vector<std::string> labels;
+
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
diff --git a/python/openvino/runtime/common/models/include/models/deblurring_model.h b/python/openvino/runtime/common/models/include/models/deblurring_model.h
new file mode 100644
index 0000000..33f5542
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/deblurring_model.h
@@ -0,0 +1,52 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writingb software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+
+#include <opencv2/core/types.hpp>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class DeblurringModel : public ImageModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param inputImgSize size of image to set model input shape
+ /// @param layout - model input layout
+ DeblurringModel(const std::string& modelFileName, const cv::Size& inputImgSize, const std::string& layout = "");
+
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+ void changeInputSize(std::shared_ptr<ov::Model>& model);
+
+ static const size_t stride = 32;
+};
diff --git a/python/openvino/runtime/common/models/include/models/detection_model.h b/python/openvino/runtime/common/models/include/models/detection_model.h
new file mode 100644
index 0000000..4d57a61
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/detection_model.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <string>
+#include <vector>
+
+#include "models/image_model.h"
+
+class DetectionModel : public ImageModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+ /// Any detected object with confidence lower than this threshold will be ignored.
+ /// @param useAutoResize - if true, image will be resized by openvino.
+ /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+ /// @param labels - array of labels for every class. If this array is empty or contains less elements
+ /// than actual classes number, default "Label #N" will be shown for missing items.
+ /// @param layout - model input layout
+ DetectionModel(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ const std::vector<std::string>& labels,
+ const std::string& layout = "");
+
+ static std::vector<std::string> loadLabels(const std::string& labelFilename);
+
+protected:
+ float confidenceThreshold;
+ std::vector<std::string> labels;
+
+ std::string getLabelName(int labelID) {
+ return (size_t)labelID < labels.size() ? labels[labelID] : std::string("Label #") + std::to_string(labelID);
+ }
+};
diff --git a/python/openvino/runtime/common/models/include/models/detection_model_centernet.h b/python/openvino/runtime/common/models/include/models/detection_model_centernet.h
new file mode 100644
index 0000000..db9ebdb
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/detection_model_centernet.h
@@ -0,0 +1,59 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class ModelCenterNet : public DetectionModel {
+public:
+ struct BBox {
+ float left;
+ float top;
+ float right;
+ float bottom;
+
+ float getWidth() const {
+ return (right - left) + 1.0f;
+ }
+ float getHeight() const {
+ return (bottom - top) + 1.0f;
+ }
+ };
+ static const int INIT_VECTOR_SIZE = 200;
+
+ ModelCenterNet(const std::string& modelFileName,
+ float confidenceThreshold,
+ const std::vector<std::string>& labels = std::vector<std::string>(),
+ const std::string& layout = "");
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
diff --git a/python/openvino/runtime/common/models/include/models/detection_model_faceboxes.h b/python/openvino/runtime/common/models/include/models/detection_model_faceboxes.h
new file mode 100644
index 0000000..8ec2b21
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/detection_model_faceboxes.h
@@ -0,0 +1,55 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <utils/nms.hpp>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+class ModelFaceBoxes : public DetectionModel {
+public:
+ static const int INIT_VECTOR_SIZE = 200;
+
+ ModelFaceBoxes(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ float boxIOUThreshold,
+ const std::string& layout = "");
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ size_t maxProposalsCount;
+ const float boxIOUThreshold;
+ const std::vector<float> variance;
+ const std::vector<int> steps;
+ const std::vector<std::vector<int>> minSizes;
+ std::vector<Anchor> anchors;
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+ void priorBoxes(const std::vector<std::pair<size_t, size_t>>& featureMaps);
+};
diff --git a/python/openvino/runtime/common/models/include/models/detection_model_retinaface.h b/python/openvino/runtime/common/models/include/models/detection_model_retinaface.h
new file mode 100644
index 0000000..ac2c235
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/detection_model_retinaface.h
@@ -0,0 +1,74 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <utils/nms.hpp>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+class ModelRetinaFace : public DetectionModel {
+public:
+ static const int LANDMARKS_NUM = 5;
+ static const int INIT_VECTOR_SIZE = 200;
+ /// Loads model and performs required initialization
+ /// @param model_name name of model to load
+ /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+ /// Any detected object with confidence lower than this threshold will be ignored.
+ /// @param useAutoResize - if true, image will be resized by openvino.
+ /// @param boxIOUThreshold - threshold for NMS boxes filtering, varies in [0.0, 1.0] range.
+ /// @param layout - model input layout
+ ModelRetinaFace(const std::string& model_name,
+ float confidenceThreshold,
+ bool useAutoResize,
+ float boxIOUThreshold,
+ const std::string& layout = "");
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ struct AnchorCfgLine {
+ int stride;
+ std::vector<int> scales;
+ int baseSize;
+ std::vector<int> ratios;
+ };
+
+ bool shouldDetectMasks;
+ bool shouldDetectLandmarks;
+ const float boxIOUThreshold;
+ const float maskThreshold;
+ float landmarkStd;
+
+ enum OutputType { OUT_BOXES, OUT_SCORES, OUT_LANDMARKS, OUT_MASKSCORES, OUT_MAX };
+
+ std::vector<std::string> separateOutputsNames[OUT_MAX];
+ const std::vector<AnchorCfgLine> anchorCfg;
+ std::map<int, std::vector<Anchor>> anchorsFpn;
+ std::vector<std::vector<Anchor>> anchors;
+
+ void generateAnchorsFpn();
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
diff --git a/python/openvino/runtime/common/models/include/models/detection_model_retinaface_pt.h b/python/openvino/runtime/common/models/include/models/detection_model_retinaface_pt.h
new file mode 100644
index 0000000..68cc907
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/detection_model_retinaface_pt.h
@@ -0,0 +1,81 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <opencv2/core/types.hpp>
+#include <utils/nms.hpp>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class Model;
+class Tensor;
+} // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+class ModelRetinaFacePT : public DetectionModel {
+public:
+ struct Box {
+ float cX;
+ float cY;
+ float width;
+ float height;
+ };
+
+ /// Loads model and performs required initialization
+ /// @param model_name name of model to load
+ /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+ /// Any detected object with confidence lower than this threshold will be ignored.
+ /// @param useAutoResize - if true, image will be resized by openvino.
+ /// @param boxIOUThreshold - threshold for NMS boxes filtering, varies in [0.0, 1.0] range.
+ /// @param layout - model input layout
+ ModelRetinaFacePT(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ float boxIOUThreshold,
+ const std::string& layout = "");
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ size_t landmarksNum;
+ const float boxIOUThreshold;
+ float variance[2] = {0.1f, 0.2f};
+
+ enum OutputType { OUT_BOXES, OUT_SCORES, OUT_LANDMARKS, OUT_MAX };
+
+ std::vector<ModelRetinaFacePT::Box> priors;
+
+ std::vector<size_t> filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold);
+ std::vector<float> getFilteredScores(const ov::Tensor& scoresTensor, const std::vector<size_t>& indicies);
+ std::vector<cv::Point2f> getFilteredLandmarks(const ov::Tensor& landmarksTensor,
+ const std::vector<size_t>& indicies,
+ int imgWidth,
+ int imgHeight);
+ std::vector<ModelRetinaFacePT::Box> generatePriorData();
+ std::vector<Anchor> getFilteredProposals(const ov::Tensor& boxesTensor,
+ const std::vector<size_t>& indicies,
+ int imgWidth,
+ int imgHeight);
+
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
diff --git a/python/openvino/runtime/common/models/include/models/detection_model_ssd.h b/python/openvino/runtime/common/models/include/models/detection_model_ssd.h
new file mode 100644
index 0000000..646d7b0
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/detection_model_ssd.h
@@ -0,0 +1,63 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "models/detection_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class ModelSSD : public DetectionModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+ /// Any detected object with confidence lower than this threshold will be ignored.
+ /// @param useAutoResize - if true, image will be resized by openvino.
+ /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+ /// @param labels - array of labels for every class. If this array is empty or contains less elements
+ /// than actual classes number, default "Label #N" will be shown for missing items.
+ /// @param layout - model input layout
+ ModelSSD(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ const std::vector<std::string>& labels = std::vector<std::string>(),
+ const std::string& layout = "");
+
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ std::unique_ptr<ResultBase> postprocessSingleOutput(InferenceResult& infResult);
+ std::unique_ptr<ResultBase> postprocessMultipleOutputs(InferenceResult& infResult);
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+ void prepareSingleOutput(std::shared_ptr<ov::Model>& model);
+ void prepareMultipleOutputs(std::shared_ptr<ov::Model>& model);
+ size_t objectSize = 0;
+ size_t detectionsNumId = 0;
+};
diff --git a/python/openvino/runtime/common/models/include/models/detection_model_yolo.h b/python/openvino/runtime/common/models/include/models/detection_model_yolo.h
new file mode 100644
index 0000000..38b0b64
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/detection_model_yolo.h
@@ -0,0 +1,107 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <openvino/op/region_yolo.hpp>
+#include <openvino/openvino.hpp>
+
+#include "models/detection_model.h"
+
+struct DetectedObject;
+struct InferenceResult;
+struct ResultBase;
+
+class ModelYolo : public DetectionModel {
+protected:
+ class Region {
+ public:
+ int num = 0;
+ size_t classes = 0;
+ int coords = 0;
+ std::vector<float> anchors;
+ size_t outputWidth = 0;
+ size_t outputHeight = 0;
+
+ Region(const std::shared_ptr<ov::op::v0::RegionYolo>& regionYolo);
+ Region(size_t classes,
+ int coords,
+ const std::vector<float>& anchors,
+ const std::vector<int64_t>& masks,
+ size_t outputWidth,
+ size_t outputHeight);
+ };
+
+public:
+ enum YoloVersion { YOLO_V1V2, YOLO_V3, YOLO_V4, YOLO_V4_TINY, YOLOF };
+
+ /// Constructor.
+ /// @param modelFileName name of model to load
+ /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+ /// Any detected object with confidence lower than this threshold will be ignored.
+ /// @param useAutoResize - if true, image will be resized by openvino.
+ /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+ /// @param useAdvancedPostprocessing - if true, an advanced algorithm for filtering/postprocessing will be used
+ /// (with better processing of multiple crossing objects). Otherwise, classic algorithm will be used.
+ /// @param boxIOUThreshold - threshold to treat separate output regions as one object for filtering
+ /// during postprocessing (only one of them should stay). The default value is 0.5
+ /// @param labels - array of labels for every class. If this array is empty or contains less elements
+ /// than actual classes number, default "Label #N" will be shown for missing items.
+ /// @param anchors - vector of anchors coordinates. Required for YOLOv4, for other versions it may be omitted.
+ /// @param masks - vector of masks values. Required for YOLOv4, for other versions it may be omitted.
+ /// @param layout - model input layout
+ ModelYolo(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ bool useAdvancedPostprocessing = true,
+ float boxIOUThreshold = 0.5,
+ const std::vector<std::string>& labels = std::vector<std::string>(),
+ const std::vector<float>& anchors = std::vector<float>(),
+ const std::vector<int64_t>& masks = std::vector<int64_t>(),
+ const std::string& layout = "");
+
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+ void parseYOLOOutput(const std::string& output_name,
+ const ov::Tensor& tensor,
+ const unsigned long resized_im_h,
+ const unsigned long resized_im_w,
+ const unsigned long original_im_h,
+ const unsigned long original_im_w,
+ std::vector<DetectedObject>& objects);
+
+ static int calculateEntryIndex(int entriesNum, int lcoords, size_t lclasses, int location, int entry);
+ static double intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2);
+
+ std::map<std::string, Region> regions;
+ double boxIOUThreshold;
+ bool useAdvancedPostprocessing;
+ bool isObjConf = 1;
+ YoloVersion yoloVersion;
+ const std::vector<float> presetAnchors;
+ const std::vector<int64_t> presetMasks;
+ ov::Layout yoloRegionLayout = "NCHW";
+};
diff --git a/python/openvino/runtime/common/models/include/models/detection_model_yolov3_onnx.h b/python/openvino/runtime/common/models/include/models/detection_model_yolov3_onnx.h
new file mode 100644
index 0000000..66c4f03
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/detection_model_yolov3_onnx.h
@@ -0,0 +1,50 @@
+/*
+// Copyright (C) 2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include "models/detection_model.h"
+
+class ModelYoloV3ONNX: public DetectionModel {
+public:
+ /// Constructor.
+ /// @param modelFileName name of model to load
+ /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+ /// Any detected object with confidence lower than this threshold will be ignored.
+ /// @param labels - array of labels for every class. If this array is empty or contains less elements
+ /// than actual classes number, default "Label #N" will be shown for missing items.
+ /// @param layout - model input layout
+ ModelYoloV3ONNX(const std::string& modelFileName,
+ float confidenceThreshold,
+ const std::vector<std::string>& labels = std::vector<std::string>(),
+ const std::string& layout = "");
+
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+ std::string boxesOutputName;
+ std::string scoresOutputName;
+ std::string indicesOutputName;
+ static const int numberOfClasses = 80;
+};
diff --git a/python/openvino/runtime/common/models/include/models/detection_model_yolox.h b/python/openvino/runtime/common/models/include/models/detection_model_yolox.h
new file mode 100644
index 0000000..d7e4ea3
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/detection_model_yolox.h
@@ -0,0 +1,54 @@
+/*
+// Copyright (C) 2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include "models/detection_model.h"
+
+class ModelYoloX: public DetectionModel {
+public:
+ /// Constructor.
+ /// @param modelFileName name of model to load
+ /// @param confidenceThreshold - threshold to eliminate low-confidence detections.
+ /// Any detected object with confidence lower than this threshold will be ignored.
+ /// @param boxIOUThreshold - threshold to treat separate output regions as one object for filtering
+ /// during postprocessing (only one of them should stay). The default value is 0.5
+ /// @param labels - array of labels for every class. If this array is empty or contains less elements
+ /// than actual classes number, default "Label #N" will be shown for missing items.
+ /// @param layout - model input layout
+ ModelYoloX(const std::string& modelFileName,
+ float confidenceThreshold,
+ float boxIOUThreshold = 0.5,
+ const std::vector<std::string>& labels = std::vector<std::string>(),
+ const std::string& layout = "");
+
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+ void setStridesGrids();
+
+ double boxIOUThreshold;
+ std::vector<std::pair<size_t, size_t>> grids;
+ std::vector<size_t> expandedStrides;
+ static const size_t numberOfClasses = 80;
+};
diff --git a/python/openvino/runtime/common/models/include/models/hpe_model_associative_embedding.h b/python/openvino/runtime/common/models/include/models/hpe_model_associative_embedding.h
new file mode 100644
index 0000000..66e217e
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/hpe_model_associative_embedding.h
@@ -0,0 +1,89 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+
+#include <utils/image_utils.h>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+class Shape;
+} // namespace ov
+struct HumanPose;
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class HpeAssociativeEmbedding : public ImageModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param aspectRatio - the ratio of input width to its height.
+ /// @param targetSize - the length of a short image side used for model reshaping.
+ /// @param confidenceThreshold - threshold to eliminate low-confidence poses.
+ /// Any pose with confidence lower than this threshold will be ignored.
+ /// @param layout - model input layout
+ HpeAssociativeEmbedding(const std::string& modelFileName,
+ double aspectRatio,
+ int targetSize,
+ float confidenceThreshold,
+ const std::string& layout = "",
+ float delta = 0.0,
+ RESIZE_MODE resizeMode = RESIZE_KEEP_ASPECT);
+
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+ cv::Size inputLayerSize;
+ double aspectRatio;
+ int targetSize;
+ float confidenceThreshold;
+ float delta;
+
+ std::string embeddingsTensorName;
+ std::string heatmapsTensorName;
+ std::string nmsHeatmapsTensorName;
+
+ static const int numJoints = 17;
+ static const int stride = 32;
+ static const int maxNumPeople = 30;
+ static const cv::Vec3f meanPixel;
+ static const float detectionThreshold;
+ static const float tagThreshold;
+
+ void changeInputSize(std::shared_ptr<ov::Model>& model);
+
+ std::string findTensorByName(const std::string& tensorName, const std::vector<std::string>& outputsNames);
+
+ std::vector<cv::Mat> split(float* data, const ov::Shape& shape);
+
+ std::vector<HumanPose> extractPoses(std::vector<cv::Mat>& heatMaps,
+ const std::vector<cv::Mat>& aembdsMaps,
+ const std::vector<cv::Mat>& nmsHeatMaps) const;
+};
diff --git a/python/openvino/runtime/common/models/include/models/hpe_model_openpose.h b/python/openvino/runtime/common/models/include/models/hpe_model_openpose.h
new file mode 100644
index 0000000..d5e1ce7
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/hpe_model_openpose.h
@@ -0,0 +1,78 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+} // namespace ov
+struct HumanPose;
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class HPEOpenPose : public ImageModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param aspectRatio - the ratio of input width to its height.
+ /// @param targetSize - the height used for model reshaping.
+ /// @param confidenceThreshold - threshold to eliminate low-confidence keypoints.
+ /// @param layout - model input layout
+ HPEOpenPose(const std::string& modelFileName,
+ double aspectRatio,
+ int targetSize,
+ float confidenceThreshold,
+ const std::string& layout = "");
+
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+ static const size_t keypointsNumber = 18;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+ static const int minJointsNumber = 3;
+ static const int stride = 8;
+ static const int upsampleRatio = 4;
+ static const cv::Vec3f meanPixel;
+ static const float minPeaksDistance;
+ static const float midPointsScoreThreshold;
+ static const float foundMidPointsRatioThreshold;
+ static const float minSubsetScore;
+ cv::Size inputLayerSize;
+ double aspectRatio;
+ int targetSize;
+ float confidenceThreshold;
+
+ std::vector<HumanPose> extractPoses(const std::vector<cv::Mat>& heatMaps, const std::vector<cv::Mat>& pafs) const;
+ void resizeFeatureMaps(std::vector<cv::Mat>& featureMaps) const;
+
+ void changeInputSize(std::shared_ptr<ov::Model>& model);
+};
diff --git a/python/openvino/runtime/common/models/include/models/image_model.h b/python/openvino/runtime/common/models/include/models/image_model.h
new file mode 100644
index 0000000..b18daa1
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/image_model.h
@@ -0,0 +1,49 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+
+#include "models/model_base.h"
+#include "utils/image_utils.h"
+
+namespace ov {
+class InferRequest;
+} // namespace ov
+struct InputData;
+struct InternalModelData;
+
+class ImageModel : public ModelBase {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param useAutoResize - if true, image is resized by openvino
+ /// @param layout - model input layout
+ ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = "");
+
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+
+protected:
+ bool useAutoResize;
+
+ size_t netInputHeight = 0;
+ size_t netInputWidth = 0;
+ cv::InterpolationFlags interpolationMode = cv::INTER_LINEAR;
+ RESIZE_MODE resizeMode = RESIZE_FILL;
+};
diff --git a/python/openvino/runtime/common/models/include/models/input_data.h b/python/openvino/runtime/common/models/include/models/input_data.h
new file mode 100644
index 0000000..bff9fa5
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/input_data.h
@@ -0,0 +1,41 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <opencv2/opencv.hpp>
+
+struct InputData {
+ virtual ~InputData() {}
+
+ template <class T>
+ T& asRef() {
+ return dynamic_cast<T&>(*this);
+ }
+
+ template <class T>
+ const T& asRef() const {
+ return dynamic_cast<const T&>(*this);
+ }
+};
+
+struct ImageInputData : public InputData {
+ cv::Mat inputImage;
+
+ ImageInputData() {}
+ ImageInputData(const cv::Mat& img) {
+ inputImage = img;
+ }
+};
diff --git a/python/openvino/runtime/common/models/include/models/internal_model_data.h b/python/openvino/runtime/common/models/include/models/internal_model_data.h
new file mode 100644
index 0000000..61d7744
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/internal_model_data.h
@@ -0,0 +1,48 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+struct InternalModelData {
+ virtual ~InternalModelData() {}
+
+ template <class T>
+ T& asRef() {
+ return dynamic_cast<T&>(*this);
+ }
+
+ template <class T>
+ const T& asRef() const {
+ return dynamic_cast<const T&>(*this);
+ }
+};
+
+struct InternalImageModelData : public InternalModelData {
+ InternalImageModelData(int width, int height) : inputImgWidth(width), inputImgHeight(height) {}
+
+ int inputImgWidth;
+ int inputImgHeight;
+};
+
+struct InternalScaleData : public InternalImageModelData {
+ InternalScaleData(int width, int height, float scaleX, float scaleY)
+ : InternalImageModelData(width, height),
+ scaleX(scaleX),
+ scaleY(scaleY) {}
+
+ float scaleX;
+ float scaleY;
+};
diff --git a/python/openvino/runtime/common/models/include/models/jpeg_restoration_model.h b/python/openvino/runtime/common/models/include/models/jpeg_restoration_model.h
new file mode 100644
index 0000000..8b22ac2
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/jpeg_restoration_model.h
@@ -0,0 +1,55 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writingb software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include <opencv2/core/types.hpp>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+class JPEGRestorationModel : public ImageModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param inputImgSize size of image to set model input shape
+ /// @param jpegCompression flag allows to perform compression before the inference
+ /// @param layout - model input layout
+ JPEGRestorationModel(const std::string& modelFileName,
+ const cv::Size& inputImgSize,
+ bool jpegCompression,
+ const std::string& layout = "");
+
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+ void changeInputSize(std::shared_ptr<ov::Model>& model);
+
+ static const size_t stride = 8;
+ bool jpegCompression = false;
+};
diff --git a/python/openvino/runtime/common/models/include/models/model_base.h b/python/openvino/runtime/common/models/include/models/model_base.h
new file mode 100644
index 0000000..c6d9cc1
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/model_base.h
@@ -0,0 +1,77 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/args_helper.hpp>
+#include <utils/config_factory.h>
+#include <utils/ocv_common.hpp>
+
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class ModelBase {
+public:
+ ModelBase(const std::string& modelFileName, const std::string& layout = "")
+ : modelFileName(modelFileName),
+ inputsLayouts(parseLayoutString(layout)) {}
+
+ virtual ~ModelBase() {}
+
+ virtual std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) = 0;
+ virtual ov::CompiledModel compileModel(const ModelConfig& config, ov::Core& core);
+ virtual void onLoadCompleted(const std::vector<ov::InferRequest>& requests) {}
+ virtual std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) = 0;
+
+ const std::vector<std::string>& getOutputsNames() const {
+ return outputsNames;
+ }
+ const std::vector<std::string>& getInputsNames() const {
+ return inputsNames;
+ }
+
+ std::string getModelFileName() {
+ return modelFileName;
+ }
+
+ void setInputsPreprocessing(bool reverseInputChannels,
+ const std::string& meanValues,
+ const std::string& scaleValues) {
+ this->inputTransform = InputTransform(reverseInputChannels, meanValues, scaleValues);
+ }
+
+protected:
+ virtual void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) = 0;
+
+ std::shared_ptr<ov::Model> prepareModel(ov::Core& core);
+
+ InputTransform inputTransform = InputTransform();
+ std::vector<std::string> inputsNames;
+ std::vector<std::string> outputsNames;
+ ov::CompiledModel compiledModel;
+ std::string modelFileName;
+ ModelConfig config = {};
+ std::map<std::string, ov::Layout> inputsLayouts;
+ ov::Layout getInputLayout(const ov::Output<ov::Node>& input);
+};
diff --git a/python/openvino/runtime/common/models/include/models/openpose_decoder.h b/python/openvino/runtime/common/models/include/models/openpose_decoder.h
new file mode 100644
index 0000000..d40e56e
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/openpose_decoder.h
@@ -0,0 +1,62 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stddef.h>
+
+#include <vector>
+
+#include <opencv2/core.hpp>
+
+struct HumanPose;
+
+struct Peak {
+ Peak(const int id = -1, const cv::Point2f& pos = cv::Point2f(), const float score = 0.0f);
+
+ int id;
+ cv::Point2f pos;
+ float score;
+};
+
+struct HumanPoseByPeaksIndices {
+ explicit HumanPoseByPeaksIndices(const int keypointsNumber);
+
+ std::vector<int> peaksIndices;
+ int nJoints;
+ float score;
+};
+
+struct TwoJointsConnection {
+ TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score);
+
+ int firstJointIdx;
+ int secondJointIdx;
+ float score;
+};
+
+void findPeaks(const std::vector<cv::Mat>& heatMaps,
+ const float minPeaksDistance,
+ std::vector<std::vector<Peak>>& allPeaks,
+ int heatMapId,
+ float confidenceThreshold);
+
+std::vector<HumanPose> groupPeaksToPoses(const std::vector<std::vector<Peak>>& allPeaks,
+ const std::vector<cv::Mat>& pafs,
+ const size_t keypointsNumber,
+ const float midPointsScoreThreshold,
+ const float foundMidPointsRatioThreshold,
+ const int minJointsNumber,
+ const float minSubsetScore);
diff --git a/python/openvino/runtime/common/models/include/models/results.h b/python/openvino/runtime/common/models/include/models/results.h
new file mode 100644
index 0000000..6b3a89d
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/results.h
@@ -0,0 +1,122 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <openvino/openvino.hpp>
+
+#include "internal_model_data.h"
+
+struct MetaData;
+struct ResultBase {
+ ResultBase(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+ : frameId(frameId),
+ metaData(metaData) {}
+ virtual ~ResultBase() {}
+
+ int64_t frameId;
+
+ std::shared_ptr<MetaData> metaData;
+ bool IsEmpty() {
+ return frameId < 0;
+ }
+
+ template <class T>
+ T& asRef() {
+ return dynamic_cast<T&>(*this);
+ }
+
+ template <class T>
+ const T& asRef() const {
+ return dynamic_cast<const T&>(*this);
+ }
+};
+
+struct InferenceResult : public ResultBase {
+ std::shared_ptr<InternalModelData> internalModelData;
+ std::map<std::string, ov::Tensor> outputsData;
+
+ /// Returns the first output tensor
+ /// This function is a useful addition to direct access to outputs list as many models have only one output
+ /// @returns first output tensor
+ ov::Tensor getFirstOutputTensor() {
+ if (outputsData.empty()) {
+ throw std::out_of_range("Outputs map is empty.");
+ }
+ return outputsData.begin()->second;
+ }
+
+ /// Returns true if object contains no valid data
+ /// @returns true if object contains no valid data
+ bool IsEmpty() {
+ return outputsData.empty();
+ }
+};
+
+struct ClassificationResult : public ResultBase {
+ ClassificationResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+ : ResultBase(frameId, metaData) {}
+
+ struct Classification {
+ unsigned int id;
+ std::string label;
+ float score;
+
+ Classification(unsigned int id, const std::string& label, float score) : id(id), label(label), score(score) {}
+ };
+
+ std::vector<Classification> topLabels;
+};
+
+struct DetectedObject : public cv::Rect2f {
+ unsigned int labelID;
+ std::string label;
+ float confidence;
+};
+
+struct DetectionResult : public ResultBase {
+ DetectionResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+ : ResultBase(frameId, metaData) {}
+ std::vector<DetectedObject> objects;
+};
+
+struct RetinaFaceDetectionResult : public DetectionResult {
+ RetinaFaceDetectionResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+ : DetectionResult(frameId, metaData) {}
+ std::vector<cv::Point2f> landmarks;
+};
+
+struct ImageResult : public ResultBase {
+ ImageResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+ : ResultBase(frameId, metaData) {}
+ cv::Mat resultImage;
+};
+
+struct HumanPose {
+ std::vector<cv::Point2f> keypoints;
+ float score;
+};
+
+struct HumanPoseResult : public ResultBase {
+ HumanPoseResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
+ : ResultBase(frameId, metaData) {}
+ std::vector<HumanPose> poses;
+};
diff --git a/python/openvino/runtime/common/models/include/models/segmentation_model.h b/python/openvino/runtime/common/models/include/models/segmentation_model.h
new file mode 100644
index 0000000..9d4d2cb
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/segmentation_model.h
@@ -0,0 +1,50 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writingb software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "models/image_model.h"
+
+namespace ov {
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct ResultBase;
+
+#pragma once
+class SegmentationModel : public ImageModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param useAutoResize - if true, image will be resized by openvino.
+ /// Otherwise, image will be preprocessed and resized using OpenCV routines.
+ /// @param layout - model input layout
+ SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = "");
+
+ static std::vector<std::string> loadLabels(const std::string& labelFilename);
+
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+
+ int outHeight = 0;
+ int outWidth = 0;
+ int outChannels = 0;
+};
diff --git a/python/openvino/runtime/common/models/include/models/style_transfer_model.h b/python/openvino/runtime/common/models/include/models/style_transfer_model.h
new file mode 100644
index 0000000..9bcc541
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/style_transfer_model.h
@@ -0,0 +1,43 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writingb software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class StyleTransferModel : public ImageModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param layout - model input layout
+ StyleTransferModel(const std::string& modelFileName, const std::string& layout = "");
+
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
diff --git a/python/openvino/runtime/common/models/include/models/super_resolution_model.h b/python/openvino/runtime/common/models/include/models/super_resolution_model.h
new file mode 100644
index 0000000..773b5c3
--- /dev/null
+++ b/python/openvino/runtime/common/models/include/models/super_resolution_model.h
@@ -0,0 +1,49 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writingb software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <memory>
+#include <string>
+
+#include <opencv2/core/types.hpp>
+
+#include "models/image_model.h"
+
+namespace ov {
+class InferRequest;
+class Model;
+} // namespace ov
+struct InferenceResult;
+struct InputData;
+struct InternalModelData;
+struct ResultBase;
+
+class SuperResolutionModel : public ImageModel {
+public:
+ /// Constructor
+ /// @param modelFileName name of model to load
+ /// @param layout - model input layout
+ SuperResolutionModel(const std::string& modelFileName,
+ const cv::Size& inputImgSize,
+ const std::string& layout = "");
+
+ std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
+ std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
+
+protected:
+ void changeInputSize(std::shared_ptr<ov::Model>& model, int coeff);
+ void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
+};
diff --git a/python/openvino/runtime/common/models/src/associative_embedding_decoder.cpp b/python/openvino/runtime/common/models/src/associative_embedding_decoder.cpp
new file mode 100644
index 0000000..b1e8285
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/associative_embedding_decoder.cpp
@@ -0,0 +1,201 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/associative_embedding_decoder.h"
+
+#include <algorithm>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include <utils/kuhn_munkres.hpp>
+
+void findPeaks(const std::vector<cv::Mat>& nmsHeatMaps,
+ const std::vector<cv::Mat>& aembdsMaps,
+ std::vector<std::vector<Peak>>& allPeaks,
+ size_t jointId,
+ size_t maxNumPeople,
+ float detectionThreshold) {
+ const cv::Mat& nmsHeatMap = nmsHeatMaps[jointId];
+ const float* heatMapData = nmsHeatMap.ptr<float>();
+ cv::Size outputSize = nmsHeatMap.size();
+
+ std::vector<int> indices(outputSize.area());
+ std::iota(std::begin(indices), std::end(indices), 0);
+ std::partial_sort(std::begin(indices),
+ std::begin(indices) + maxNumPeople,
+ std::end(indices),
+ [heatMapData](int l, int r) {
+ return heatMapData[l] > heatMapData[r];
+ });
+
+ for (size_t personId = 0; personId < maxNumPeople; personId++) {
+ int index = indices[personId];
+ int x = index / outputSize.width;
+ int y = index % outputSize.width;
+ float tag = aembdsMaps[jointId].at<float>(x, y);
+ float score = heatMapData[index];
+ allPeaks[jointId].reserve(maxNumPeople);
+ if (score > detectionThreshold) {
+ allPeaks[jointId].emplace_back(Peak{cv::Point2f(static_cast<float>(x), static_cast<float>(y)), score, tag});
+ }
+ }
+}
+
+std::vector<Pose> matchByTag(std::vector<std::vector<Peak>>& allPeaks,
+ size_t maxNumPeople,
+ size_t numJoints,
+ float tagThreshold) {
+ size_t jointOrder[]{0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16};
+ std::vector<Pose> allPoses;
+ for (size_t jointId : jointOrder) {
+ std::vector<Peak>& jointPeaks = allPeaks[jointId];
+ std::vector<float> tags;
+ for (auto& peak : jointPeaks) {
+ tags.push_back(peak.tag);
+ }
+ if (allPoses.empty()) {
+ for (size_t personId = 0; personId < jointPeaks.size(); personId++) {
+ Peak peak = jointPeaks[personId];
+ Pose pose = Pose(numJoints);
+ pose.add(jointId, peak);
+ allPoses.push_back(pose);
+ }
+ continue;
+ }
+ if (jointPeaks.empty() || (allPoses.size() == maxNumPeople)) {
+ continue;
+ }
+ std::vector<float> posesTags;
+ std::vector<cv::Point2f> posesCenters;
+ for (auto& pose : allPoses) {
+ posesTags.push_back(pose.getPoseTag());
+ posesCenters.push_back(pose.getPoseCenter());
+ }
+ size_t numAdded = tags.size();
+ size_t numGrouped = posesTags.size();
+ cv::Mat tagsDiff(numAdded, numGrouped, CV_32F);
+ cv::Mat matchingCost(numAdded, numGrouped, CV_32F);
+ std::vector<float> dists(numAdded);
+ for (size_t j = 0; j < numGrouped; j++) {
+ float minDist = std::numeric_limits<float>::max();
+ // Compute euclidean distance (in spatial space) between the pose center and all joints.
+ const cv::Point2f center = posesCenters.at(j);
+ for (size_t i = 0; i < numAdded; i++) {
+ cv::Point2f v = jointPeaks.at(i).keypoint - center;
+ float dist = std::sqrt(v.x * v.x + v.y * v.y);
+ dists[i] = dist;
+ minDist = std::min(dist, minDist);
+ }
+ // Compute semantic distance (in embedding space) between the pose tag and all joints
+ // and corresponding matching costs.
+ auto poseTag = posesTags[j];
+ for (size_t i = 0; i < numAdded; i++) {
+ float diff = static_cast<float>(cv::norm(tags[i] - poseTag));
+ tagsDiff.at<float>(i, j) = diff;
+ if (diff < tagThreshold) {
+ diff *= dists[i] / (minDist + 1e-10f);
+ }
+ matchingCost.at<float>(i, j) = std::round(diff) * 100 - jointPeaks[i].score;
+ }
+ }
+
+ if (numAdded > numGrouped) {
+ cv::copyMakeBorder(matchingCost,
+ matchingCost,
+ 0,
+ 0,
+ 0,
+ numAdded - numGrouped,
+ cv::BORDER_CONSTANT,
+ 10000000);
+ }
+ // Get pairs
+ auto res = KuhnMunkres().Solve(matchingCost);
+ for (size_t row = 0; row < res.size(); row++) {
+ size_t col = res[row];
+ if (row < numAdded && col < numGrouped && tagsDiff.at<float>(row, col) < tagThreshold) {
+ allPoses[col].add(jointId, jointPeaks[row]);
+ } else {
+ Pose pose = Pose(numJoints);
+ pose.add(jointId, jointPeaks[row]);
+ allPoses.push_back(pose);
+ }
+ }
+ }
+ return allPoses;
+}
+
+namespace {
+cv::Point2f adjustLocation(const int x, const int y, const cv::Mat& heatMap) {
+ cv::Point2f delta(0.f, 0.f);
+ int width = heatMap.cols;
+ int height = heatMap.rows;
+ if ((1 < x) && (x < width - 1) && (1 < y) && (y < height - 1)) {
+ auto diffX = heatMap.at<float>(y, x + 1) - heatMap.at<float>(y, x - 1);
+ auto diffY = heatMap.at<float>(y + 1, x) - heatMap.at<float>(y - 1, x);
+ delta.x = diffX > 0 ? 0.25f : -0.25f;
+ delta.y = diffY > 0 ? 0.25f : -0.25f;
+ }
+ return delta;
+}
+} // namespace
+
+void adjustAndRefine(std::vector<Pose>& allPoses,
+ const std::vector<cv::Mat>& heatMaps,
+ const std::vector<cv::Mat>& aembdsMaps,
+ int poseId,
+ const float delta) {
+ Pose& pose = allPoses[poseId];
+ float poseTag = pose.getPoseTag();
+ for (size_t jointId = 0; jointId < pose.size(); jointId++) {
+ Peak& peak = pose.getPeak(jointId);
+ const cv::Mat& heatMap = heatMaps[jointId];
+ const cv::Mat& aembds = aembdsMaps[jointId];
+
+ if (peak.score > 0) {
+ // Adjust
+ int x = static_cast<int>(peak.keypoint.x);
+ int y = static_cast<int>(peak.keypoint.y);
+ peak.keypoint += adjustLocation(x, y, heatMap);
+ if (delta) {
+ peak.keypoint.x += delta;
+ peak.keypoint.y += delta;
+ }
+ } else {
+ // Refine
+ // Get position with the closest tag value to the pose tag
+ cv::Mat diff = cv::abs(aembds - poseTag);
+ diff.convertTo(diff, CV_32S, 1.0, 0.0);
+ diff.convertTo(diff, CV_32F);
+ diff -= heatMap;
+ double min;
+ cv::Point2i minLoc;
+ cv::minMaxLoc(diff, &min, 0, &minLoc);
+ int x = minLoc.x;
+ int y = minLoc.y;
+ float val = heatMap.at<float>(y, x);
+ if (val > 0) {
+ peak.keypoint.x = static_cast<float>(x);
+ peak.keypoint.y = static_cast<float>(y);
+ peak.keypoint += adjustLocation(x, y, heatMap);
+ // Peak score is assigned directly, so it does not affect the pose score.
+ peak.score = val;
+ }
+ }
+ }
+}
diff --git a/python/openvino/runtime/common/models/src/classification_model.cpp b/python/openvino/runtime/common/models/src/classification_model.cpp
new file mode 100644
index 0000000..90bc0d5
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/classification_model.cpp
@@ -0,0 +1,196 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/classification_model.h"
+
+#include <algorithm>
+#include <fstream>
+#include <iterator>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/op/softmax.hpp>
+#include <openvino/op/topk.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/slog.hpp>
+
+#include "models/results.h"
+
+ClassificationModel::ClassificationModel(const std::string& modelFileName,
+ size_t nTop,
+ bool useAutoResize,
+ const std::vector<std::string>& labels,
+ const std::string& layout)
+ : ImageModel(modelFileName, useAutoResize, layout),
+ nTop(nTop),
+ labels(labels) {}
+
+std::unique_ptr<ResultBase> ClassificationModel::postprocess(InferenceResult& infResult) {
+ const ov::Tensor& indicesTensor = infResult.outputsData.find(outputsNames[0])->second;
+ const int* indicesPtr = indicesTensor.data<int>();
+ const ov::Tensor& scoresTensor = infResult.outputsData.find(outputsNames[1])->second;
+ const float* scoresPtr = scoresTensor.data<float>();
+
+ ClassificationResult* result = new ClassificationResult(infResult.frameId, infResult.metaData);
+ auto retVal = std::unique_ptr<ResultBase>(result);
+
+ result->topLabels.reserve(scoresTensor.get_size());
+ for (size_t i = 0; i < scoresTensor.get_size(); ++i) {
+ int ind = indicesPtr[i];
+ if (ind < 0 || ind >= static_cast<int>(labels.size())) {
+ throw std::runtime_error("Invalid index for the class label is found during postprocessing");
+ }
+ result->topLabels.emplace_back(ind, labels[ind], scoresPtr[i]);
+ }
+
+ return retVal;
+}
+
+std::vector<std::string> ClassificationModel::loadLabels(const std::string& labelFilename) {
+ std::vector<std::string> labels;
+
+ /* Read labels */
+ std::ifstream inputFile(labelFilename);
+ if (!inputFile.is_open())
+ throw std::runtime_error("Can't open the labels file: " + labelFilename);
+ std::string labelsLine;
+ while (std::getline(inputFile, labelsLine)) {
+ size_t labelBeginIdx = labelsLine.find(' ');
+ size_t labelEndIdx = labelsLine.find(','); // can be npos when class has only one label
+ if (labelBeginIdx == std::string::npos) {
+ throw std::runtime_error("The labels file has incorrect format.");
+ }
+ labels.push_back(labelsLine.substr(labelBeginIdx + 1, labelEndIdx - (labelBeginIdx + 1)));
+ }
+ if (labels.empty())
+ throw std::logic_error("File is empty: " + labelFilename);
+
+ return labels;
+}
+
+void ClassificationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("Classification model wrapper supports topologies with only 1 input");
+ }
+ const auto& input = model->input();
+ inputsNames.push_back(input.get_any_name());
+
+ const ov::Shape& inputShape = input.get_shape();
+ const ov::Layout& inputLayout = getInputLayout(input);
+
+ if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("3-channel 4-dimensional model's input is expected");
+ }
+
+ const auto width = inputShape[ov::layout::width_idx(inputLayout)];
+ const auto height = inputShape[ov::layout::height_idx(inputLayout)];
+ if (height != width) {
+ throw std::logic_error("Model input has incorrect image shape. Must be NxN square."
+ " Got " +
+ std::to_string(height) + "x" + std::to_string(width) + ".");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+ if (useAutoResize) {
+ ppp.input().tensor().set_spatial_dynamic_shape();
+
+ ppp.input()
+ .preprocess()
+ .convert_element_type(ov::element::f32)
+ .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+ }
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ if (model->outputs().size() != 1) {
+ throw std::logic_error("Classification model wrapper supports topologies with only 1 output");
+ }
+
+ const ov::Shape& outputShape = model->output().get_shape();
+ if (outputShape.size() != 2 && outputShape.size() != 4) {
+ throw std::logic_error("Classification model wrapper supports topologies only with"
+ " 2-dimensional or 4-dimensional output");
+ }
+
+ const ov::Layout outputLayout("NCHW");
+ if (outputShape.size() == 4 && (outputShape[ov::layout::height_idx(outputLayout)] != 1 ||
+ outputShape[ov::layout::width_idx(outputLayout)] != 1)) {
+ throw std::logic_error("Classification model wrapper supports topologies only"
+ " with 4-dimensional output which has last two dimensions of size 1");
+ }
+
+ size_t classesNum = outputShape[ov::layout::channels_idx(outputLayout)];
+ if (nTop > classesNum) {
+ throw std::logic_error("The model provides " + std::to_string(classesNum) + " classes, but " +
+ std::to_string(nTop) + " labels are requested to be predicted");
+ }
+ if (classesNum == labels.size() + 1) {
+ labels.insert(labels.begin(), "other");
+ slog::warn << "Inserted 'other' label as first." << slog::endl;
+ } else if (classesNum != labels.size()) {
+ throw std::logic_error("Model's number of classes and parsed labels must match (" +
+ std::to_string(outputShape[1]) + " and " + std::to_string(labels.size()) + ')');
+ }
+
+ ppp.output().tensor().set_element_type(ov::element::f32);
+ model = ppp.build();
+
+ // --------------------------- Adding softmax and topK output ---------------------------
+ auto nodes = model->get_ops();
+ auto softmaxNodeIt = std::find_if(std::begin(nodes), std::end(nodes), [](const std::shared_ptr<ov::Node>& op) {
+ return std::string(op->get_type_name()) == "Softmax";
+ });
+
+ std::shared_ptr<ov::Node> softmaxNode;
+ if (softmaxNodeIt == nodes.end()) {
+ auto logitsNode = model->get_output_op(0)->input(0).get_source_output().get_node();
+ softmaxNode = std::make_shared<ov::op::v1::Softmax>(logitsNode->output(0), 1);
+ } else {
+ softmaxNode = *softmaxNodeIt;
+ }
+ const auto k = std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{}, std::vector<size_t>{nTop});
+ std::shared_ptr<ov::Node> topkNode = std::make_shared<ov::op::v3::TopK>(softmaxNode,
+ k,
+ 1,
+ ov::op::v3::TopK::Mode::MAX,
+ ov::op::v3::TopK::SortType::SORT_VALUES);
+
+ auto indices = std::make_shared<ov::op::v0::Result>(topkNode->output(0));
+ auto scores = std::make_shared<ov::op::v0::Result>(topkNode->output(1));
+ ov::ResultVector res({scores, indices});
+ model = std::make_shared<ov::Model>(res, model->get_parameters(), "classification");
+
+ // manually set output tensors name for created topK node
+ model->outputs()[0].set_names({"indices"});
+ outputsNames.push_back("indices");
+ model->outputs()[1].set_names({"scores"});
+ outputsNames.push_back("scores");
+
+ // set output precisions
+ ppp = ov::preprocess::PrePostProcessor(model);
+ ppp.output("indices").tensor().set_element_type(ov::element::i32);
+ ppp.output("scores").tensor().set_element_type(ov::element::f32);
+ model = ppp.build();
+}
diff --git a/python/openvino/runtime/common/models/src/deblurring_model.cpp b/python/openvino/runtime/common/models/src/deblurring_model.cpp
new file mode 100644
index 0000000..261efb3
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/deblurring_model.cpp
@@ -0,0 +1,158 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/deblurring_model.h"
+
+#include <algorithm>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+DeblurringModel::DeblurringModel(const std::string& modelFileName,
+ const cv::Size& inputImgSize,
+ const std::string& layout)
+ : ImageModel(modelFileName, false, layout) {
+ netInputHeight = inputImgSize.height;
+ netInputWidth = inputImgSize.width;
+}
+
+void DeblurringModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("Deblurring model wrapper supports topologies with only 1 input");
+ }
+
+ inputsNames.push_back(model->input().get_any_name());
+
+ const ov::Shape& inputShape = model->input().get_shape();
+ const ov::Layout& inputLayout = getInputLayout(model->input());
+
+ if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+ inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("3-channel 4-dimensional model's input is expected");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC");
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ if (model->outputs().size() != 1) {
+ throw std::logic_error("Deblurring model wrapper supports topologies with only 1 output");
+ }
+
+ outputsNames.push_back(model->output().get_any_name());
+
+ const ov::Shape& outputShape = model->output().get_shape();
+ const ov::Layout outputLayout("NCHW");
+ if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 ||
+ outputShape[ov::layout::channels_idx(outputLayout)] != 3) {
+ throw std::logic_error("3-channel 4-dimensional model's output is expected");
+ }
+
+ ppp.output().tensor().set_element_type(ov::element::f32);
+ model = ppp.build();
+
+ changeInputSize(model);
+}
+
+void DeblurringModel::changeInputSize(std::shared_ptr<ov::Model>& model) {
+ const ov::Layout& layout = ov::layout::get_layout(model->input());
+ ov::Shape inputShape = model->input().get_shape();
+
+ const auto batchId = ov::layout::batch_idx(layout);
+ const auto heightId = ov::layout::height_idx(layout);
+ const auto widthId = ov::layout::width_idx(layout);
+
+ if (inputShape[heightId] % stride || inputShape[widthId] % stride) {
+ throw std::logic_error("Model input shape HxW = " + std::to_string(inputShape[heightId]) + "x" +
+ std::to_string(inputShape[widthId]) + "must be divisible by stride " +
+ std::to_string(stride));
+ }
+
+ netInputHeight = static_cast<int>((netInputHeight + stride - 1) / stride) * stride;
+ netInputWidth = static_cast<int>((netInputWidth + stride - 1) / stride) * stride;
+
+ inputShape[batchId] = 1;
+ inputShape[heightId] = netInputHeight;
+ inputShape[widthId] = netInputWidth;
+
+ model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> DeblurringModel::preprocess(const InputData& inputData, ov::InferRequest& request) {
+ auto& image = inputData.asRef<ImageInputData>().inputImage;
+ size_t h = image.rows;
+ size_t w = image.cols;
+ cv::Mat resizedImage;
+
+ if (netInputHeight - stride < h && h <= netInputHeight && netInputWidth - stride < w && w <= netInputWidth) {
+ int bottom = netInputHeight - h;
+ int right = netInputWidth - w;
+ cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, cv::BORDER_CONSTANT, 0);
+ } else {
+ slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+ cv::resize(image, resizedImage, cv::Size(netInputWidth, netInputHeight));
+ }
+ request.set_input_tensor(wrapMat2Tensor(resizedImage));
+
+ return std::make_shared<InternalImageModelData>(image.cols, image.rows);
+}
+
+std::unique_ptr<ResultBase> DeblurringModel::postprocess(InferenceResult& infResult) {
+ ImageResult* result = new ImageResult;
+ *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+
+ const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+ const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+ std::vector<cv::Mat> imgPlanes;
+ const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape();
+ const ov::Layout outputLayout("NCHW");
+ size_t outHeight = static_cast<int>((outputShape[ov::layout::height_idx(outputLayout)]));
+ size_t outWidth = static_cast<int>((outputShape[ov::layout::width_idx(outputLayout)]));
+ size_t numOfPixels = outWidth * outHeight;
+ imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
+ cv::Mat resultImg;
+ cv::merge(imgPlanes, resultImg);
+
+ if (netInputHeight - stride < static_cast<size_t>(inputImgSize.inputImgHeight) &&
+ static_cast<size_t>(inputImgSize.inputImgHeight) <= netInputHeight &&
+ netInputWidth - stride < static_cast<size_t>(inputImgSize.inputImgWidth) &&
+ static_cast<size_t>(inputImgSize.inputImgWidth) <= netInputWidth) {
+ result->resultImage = resultImg(cv::Rect(0, 0, inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+ } else {
+ cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+ }
+
+ result->resultImage.convertTo(result->resultImage, CV_8UC3, 255);
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/detection_model.cpp b/python/openvino/runtime/common/models/src/detection_model.cpp
new file mode 100644
index 0000000..83e2d22
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model.cpp
@@ -0,0 +1,52 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model.h"
+
+#include <fstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "models/image_model.h"
+
+DetectionModel::DetectionModel(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ const std::vector<std::string>& labels,
+ const std::string& layout)
+ : ImageModel(modelFileName, useAutoResize, layout),
+ confidenceThreshold(confidenceThreshold),
+ labels(labels) {}
+
+std::vector<std::string> DetectionModel::loadLabels(const std::string& labelFilename) {
+ std::vector<std::string> labelsList;
+
+ /* Read labels (if any) */
+ if (!labelFilename.empty()) {
+ std::ifstream inputFile(labelFilename);
+ if (!inputFile.is_open())
+ throw std::runtime_error("Can't open the labels file: " + labelFilename);
+ std::string label;
+ while (std::getline(inputFile, label)) {
+ labelsList.push_back(label);
+ }
+ if (labelsList.empty())
+ throw std::logic_error("File is empty: " + labelFilename);
+ }
+
+ return labelsList;
+}
diff --git a/python/openvino/runtime/common/models/src/detection_model_centernet.cpp b/python/openvino/runtime/common/models/src/detection_model_centernet.cpp
new file mode 100644
index 0000000..eac42a7
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_centernet.cpp
@@ -0,0 +1,302 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_centernet.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <utility>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelCenterNet::ModelCenterNet(const std::string& modelFileName,
+ float confidenceThreshold,
+ const std::vector<std::string>& labels,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {}
+
+void ModelCenterNet::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("CenterNet model wrapper expects models that have only 1 input");
+ }
+
+ const ov::Shape& inputShape = model->input().get_shape();
+ const ov::Layout& inputLayout = getInputLayout(model->input());
+
+ if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("Expected 3-channel input");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ inputTransform.setPrecision(ppp, model->input().get_any_name());
+ ppp.input().tensor().set_layout("NHWC");
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Reading image input parameters -------------------------------------------
+ inputsNames.push_back(model->input().get_any_name());
+ netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+ netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ if (model->outputs().size() != 3) {
+ throw std::logic_error("CenterNet model wrapper expects models that have 3 outputs");
+ }
+
+ const ov::Layout outLayout{"NCHW"};
+ for (const auto& output : model->outputs()) {
+ auto outTensorName = output.get_any_name();
+ outputsNames.push_back(outTensorName);
+ ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outLayout);
+ }
+ std::sort(outputsNames.begin(), outputsNames.end());
+ model = ppp.build();
+}
+
+cv::Point2f getDir(const cv::Point2f& srcPoint, float rotRadius) {
+ float sn = sinf(rotRadius);
+ float cs = cosf(rotRadius);
+
+ cv::Point2f srcResult(0.0f, 0.0f);
+ srcResult.x = srcPoint.x * cs - srcPoint.y * sn;
+ srcResult.y = srcPoint.x * sn + srcPoint.y * cs;
+
+ return srcResult;
+}
+
+cv::Point2f get3rdPoint(const cv::Point2f& a, const cv::Point2f& b) {
+ cv::Point2f direct = a - b;
+ return b + cv::Point2f(-direct.y, direct.x);
+}
+
+cv::Mat getAffineTransform(float centerX,
+ float centerY,
+ int srcW,
+ float rot,
+ size_t outputWidth,
+ size_t outputHeight,
+ bool inv = false) {
+ float rotRad = static_cast<float>(CV_PI) * rot / 180.0f;
+ auto srcDir = getDir({0.0f, -0.5f * srcW}, rotRad);
+ cv::Point2f dstDir(0.0f, -0.5f * outputWidth);
+ std::vector<cv::Point2f> src(3, {0.0f, 0.0f});
+ std::vector<cv::Point2f> dst(3, {0.0f, 0.0f});
+
+ src[0] = {centerX, centerY};
+ src[1] = srcDir + src[0];
+ src[2] = get3rdPoint(src[0], src[1]);
+
+ dst[0] = {outputWidth * 0.5f, outputHeight * 0.5f};
+ dst[1] = dst[0] + dstDir;
+ dst[2] = get3rdPoint(dst[0], dst[1]);
+
+ cv::Mat trans;
+ if (inv) {
+ trans = cv::getAffineTransform(dst, src);
+ } else {
+ trans = cv::getAffineTransform(src, dst);
+ }
+
+ return trans;
+}
+
+std::shared_ptr<InternalModelData> ModelCenterNet::preprocess(const InputData& inputData, ov::InferRequest& request) {
+ auto& img = inputData.asRef<ImageInputData>().inputImage;
+ const auto& resizedImg = resizeImageExt(img, netInputWidth, netInputHeight, RESIZE_KEEP_ASPECT_LETTERBOX);
+
+ request.set_input_tensor(wrapMat2Tensor(inputTransform(resizedImg)));
+ return std::make_shared<InternalImageModelData>(img.cols, img.rows);
+}
+
+std::vector<std::pair<size_t, float>> nms(float* scoresPtr, const ov::Shape& shape, float threshold, int kernel = 3) {
+ std::vector<std::pair<size_t, float>> scores;
+ scores.reserve(ModelCenterNet::INIT_VECTOR_SIZE);
+ auto chSize = shape[2] * shape[3];
+
+ for (size_t i = 0; i < shape[1] * shape[2] * shape[3]; ++i) {
+ scoresPtr[i] = expf(scoresPtr[i]) / (1 + expf(scoresPtr[i]));
+ }
+
+ for (size_t ch = 0; ch < shape[1]; ++ch) {
+ for (size_t w = 0; w < shape[2]; ++w) {
+ for (size_t h = 0; h < shape[3]; ++h) {
+ float max = scoresPtr[chSize * ch + shape[2] * w + h];
+
+ // --------------------- filter on threshold--------------------------------------
+ if (max < threshold) {
+ continue;
+ }
+
+ // --------------------- store index and score------------------------------------
+ scores.push_back({chSize * ch + shape[2] * w + h, max});
+
+ bool next = true;
+ // ---------------------- maxpool2d -----------------------------------------------
+ for (int i = -kernel / 2; i < kernel / 2 + 1 && next; ++i) {
+ for (int j = -kernel / 2; j < kernel / 2 + 1; ++j) {
+ if (w + i >= 0 && w + i < shape[2] && h + j >= 0 && h + j < shape[3]) {
+ if (scoresPtr[chSize * ch + shape[2] * (w + i) + h + j] > max) {
+ scores.pop_back();
+ next = false;
+ break;
+ }
+ } else {
+ if (max < 0) {
+ scores.pop_back();
+ next = false;
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return scores;
+}
+
+static std::vector<std::pair<size_t, float>> filterScores(const ov::Tensor& scoresTensor, float threshold) {
+ auto shape = scoresTensor.get_shape();
+ float* scoresPtr = scoresTensor.data<float>();
+
+ return nms(scoresPtr, shape, threshold);
+}
+
+std::vector<std::pair<float, float>> filterReg(const ov::Tensor& regressionTensor,
+ const std::vector<std::pair<size_t, float>>& scores,
+ size_t chSize) {
+ const float* regPtr = regressionTensor.data<float>();
+ std::vector<std::pair<float, float>> reg;
+
+ for (auto s : scores) {
+ reg.push_back({regPtr[s.first % chSize], regPtr[chSize + s.first % chSize]});
+ }
+
+ return reg;
+}
+
+std::vector<std::pair<float, float>> filterWH(const ov::Tensor& whTensor,
+ const std::vector<std::pair<size_t, float>>& scores,
+ size_t chSize) {
+ const float* whPtr = whTensor.data<float>();
+ std::vector<std::pair<float, float>> wh;
+
+ for (auto s : scores) {
+ wh.push_back({whPtr[s.first % chSize], whPtr[chSize + s.first % chSize]});
+ }
+
+ return wh;
+}
+
+std::vector<ModelCenterNet::BBox> calcBoxes(const std::vector<std::pair<size_t, float>>& scores,
+ const std::vector<std::pair<float, float>>& reg,
+ const std::vector<std::pair<float, float>>& wh,
+ const ov::Shape& shape) {
+ std::vector<ModelCenterNet::BBox> boxes(scores.size());
+
+ for (size_t i = 0; i < boxes.size(); ++i) {
+ size_t chIdx = scores[i].first % (shape[2] * shape[3]);
+ auto xCenter = chIdx % shape[3];
+ auto yCenter = chIdx / shape[3];
+
+ boxes[i].left = xCenter + reg[i].first - wh[i].first / 2.0f;
+ boxes[i].top = yCenter + reg[i].second - wh[i].second / 2.0f;
+ boxes[i].right = xCenter + reg[i].first + wh[i].first / 2.0f;
+ boxes[i].bottom = yCenter + reg[i].second + wh[i].second / 2.0f;
+ }
+
+ return boxes;
+}
+
+void transform(std::vector<ModelCenterNet::BBox>& boxes,
+ const ov::Shape& shape,
+ int scale,
+ float centerX,
+ float centerY) {
+ cv::Mat1f trans = getAffineTransform(centerX, centerY, scale, 0, shape[2], shape[3], true);
+
+ for (auto& b : boxes) {
+ ModelCenterNet::BBox newbb;
+
+ newbb.left = trans.at<float>(0, 0) * b.left + trans.at<float>(0, 1) * b.top + trans.at<float>(0, 2);
+ newbb.top = trans.at<float>(1, 0) * b.left + trans.at<float>(1, 1) * b.top + trans.at<float>(1, 2);
+ newbb.right = trans.at<float>(0, 0) * b.right + trans.at<float>(0, 1) * b.bottom + trans.at<float>(0, 2);
+ newbb.bottom = trans.at<float>(1, 0) * b.right + trans.at<float>(1, 1) * b.bottom + trans.at<float>(1, 2);
+
+ b = newbb;
+ }
+}
+
+std::unique_ptr<ResultBase> ModelCenterNet::postprocess(InferenceResult& infResult) {
+ // --------------------------- Filter data and get valid indices ---------------------------------
+ const auto& heatmapTensor = infResult.outputsData[outputsNames[0]];
+ const auto& heatmapTensorShape = heatmapTensor.get_shape();
+ const auto chSize = heatmapTensorShape[2] * heatmapTensorShape[3];
+ const auto scores = filterScores(heatmapTensor, confidenceThreshold);
+
+ const auto& regressionTensor = infResult.outputsData[outputsNames[1]];
+ const auto reg = filterReg(regressionTensor, scores, chSize);
+
+ const auto& whTensor = infResult.outputsData[outputsNames[2]];
+ const auto wh = filterWH(whTensor, scores, chSize);
+
+ // --------------------------- Calculate bounding boxes & apply inverse affine transform ----------
+ auto boxes = calcBoxes(scores, reg, wh, heatmapTensorShape);
+
+ const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+ const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+ const auto scale = std::max(imgWidth, imgHeight);
+ const float centerX = imgWidth / 2.0f;
+ const float centerY = imgHeight / 2.0f;
+
+ transform(boxes, heatmapTensorShape, scale, centerX, centerY);
+
+ // --------------------------- Create detection result objects ------------------------------------
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+
+ result->objects.reserve(scores.size());
+ for (size_t i = 0; i < scores.size(); ++i) {
+ DetectedObject desc;
+ desc.confidence = scores[i].second;
+ desc.labelID = scores[i].first / chSize;
+ desc.label = getLabelName(desc.labelID);
+ desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));
+ desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));
+ desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));
+ desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));
+
+ result->objects.push_back(desc);
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp b/python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp
new file mode 100644
index 0000000..bb349a6
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_faceboxes.cpp
@@ -0,0 +1,261 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_faceboxes.h"
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/nms.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelFaceBoxes::ModelFaceBoxes(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ float boxIOUThreshold,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout),
+ maxProposalsCount(0),
+ boxIOUThreshold(boxIOUThreshold),
+ variance({0.1f, 0.2f}),
+ steps({32, 64, 128}),
+ minSizes({{32, 64, 128}, {256}, {512}}) {}
+
+void ModelFaceBoxes::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("FaceBoxes model wrapper expects models that have only 1 input");
+ }
+
+ const ov::Shape& inputShape = model->input().get_shape();
+ const ov::Layout& inputLayout = getInputLayout(model->input());
+
+ if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("Expected 3-channel input");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ inputTransform.setPrecision(ppp, model->input().get_any_name());
+ ppp.input().tensor().set_layout({"NHWC"});
+
+ if (useAutoResize) {
+ ppp.input().tensor().set_spatial_dynamic_shape();
+
+ ppp.input()
+ .preprocess()
+ .convert_element_type(ov::element::f32)
+ .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+ }
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Reading image input parameters -------------------------------------------
+ inputsNames.push_back(model->input().get_any_name());
+ netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+ netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ if (model->outputs().size() != 2) {
+ throw std::logic_error("FaceBoxes model wrapper expects models that have 2 outputs");
+ }
+
+ const ov::Layout outputLayout{"CHW"};
+ maxProposalsCount = model->outputs().front().get_shape()[ov::layout::height_idx(outputLayout)];
+ for (const auto& output : model->outputs()) {
+ const auto outTensorName = output.get_any_name();
+ outputsNames.push_back(outTensorName);
+ ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
+ }
+ std::sort(outputsNames.begin(), outputsNames.end());
+ model = ppp.build();
+
+ // --------------------------- Calculating anchors ----------------------------------------------------
+ std::vector<std::pair<size_t, size_t>> featureMaps;
+ for (auto s : steps) {
+ featureMaps.push_back({netInputHeight / s, netInputWidth / s});
+ }
+
+ priorBoxes(featureMaps);
+}
+
+void calculateAnchors(std::vector<Anchor>& anchors,
+ const std::vector<float>& vx,
+ const std::vector<float>& vy,
+ const int minSize,
+ const int step) {
+ float skx = static_cast<float>(minSize);
+ float sky = static_cast<float>(minSize);
+
+ std::vector<float> dense_cx, dense_cy;
+
+ for (auto x : vx) {
+ dense_cx.push_back(x * step);
+ }
+
+ for (auto y : vy) {
+ dense_cy.push_back(y * step);
+ }
+
+ for (auto cy : dense_cy) {
+ for (auto cx : dense_cx) {
+ anchors.push_back(
+ {cx - 0.5f * skx, cy - 0.5f * sky, cx + 0.5f * skx, cy + 0.5f * sky}); // left top right bottom
+ }
+ }
+}
+
+void calculateAnchorsZeroLevel(std::vector<Anchor>& anchors,
+ const int fx,
+ const int fy,
+ const std::vector<int>& minSizes,
+ const int step) {
+ for (auto s : minSizes) {
+ std::vector<float> vx, vy;
+ if (s == 32) {
+ vx.push_back(static_cast<float>(fx));
+ vx.push_back(fx + 0.25f);
+ vx.push_back(fx + 0.5f);
+ vx.push_back(fx + 0.75f);
+
+ vy.push_back(static_cast<float>(fy));
+ vy.push_back(fy + 0.25f);
+ vy.push_back(fy + 0.5f);
+ vy.push_back(fy + 0.75f);
+ } else if (s == 64) {
+ vx.push_back(static_cast<float>(fx));
+ vx.push_back(fx + 0.5f);
+
+ vy.push_back(static_cast<float>(fy));
+ vy.push_back(fy + 0.5f);
+ } else {
+ vx.push_back(fx + 0.5f);
+ vy.push_back(fy + 0.5f);
+ }
+ calculateAnchors(anchors, vx, vy, s, step);
+ }
+}
+
+void ModelFaceBoxes::priorBoxes(const std::vector<std::pair<size_t, size_t>>& featureMaps) {
+ anchors.reserve(maxProposalsCount);
+
+ for (size_t k = 0; k < featureMaps.size(); ++k) {
+ std::vector<float> a;
+ for (size_t i = 0; i < featureMaps[k].first; ++i) {
+ for (size_t j = 0; j < featureMaps[k].second; ++j) {
+ if (k == 0) {
+ calculateAnchorsZeroLevel(anchors, j, i, minSizes[k], steps[k]);
+ } else {
+ calculateAnchors(anchors, {j + 0.5f}, {i + 0.5f}, minSizes[k][0], steps[k]);
+ }
+ }
+ }
+ }
+}
+
+std::pair<std::vector<size_t>, std::vector<float>> filterScores(const ov::Tensor& scoresTensor,
+ const float confidenceThreshold) {
+ auto shape = scoresTensor.get_shape();
+ const float* scoresPtr = scoresTensor.data<float>();
+
+ std::vector<size_t> indices;
+ std::vector<float> scores;
+ scores.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
+ indices.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
+ for (size_t i = 1; i < shape[1] * shape[2]; i = i + 2) {
+ if (scoresPtr[i] > confidenceThreshold) {
+ indices.push_back(i / 2);
+ scores.push_back(scoresPtr[i]);
+ }
+ }
+
+ return {indices, scores};
+}
+
+std::vector<Anchor> filterBoxes(const ov::Tensor& boxesTensor,
+ const std::vector<Anchor>& anchors,
+ const std::vector<size_t>& validIndices,
+ const std::vector<float>& variance) {
+ auto shape = boxesTensor.get_shape();
+ const float* boxesPtr = boxesTensor.data<float>();
+
+ std::vector<Anchor> boxes;
+ boxes.reserve(ModelFaceBoxes::INIT_VECTOR_SIZE);
+ for (auto i : validIndices) {
+ auto objStart = shape[2] * i;
+
+ auto dx = boxesPtr[objStart];
+ auto dy = boxesPtr[objStart + 1];
+ auto dw = boxesPtr[objStart + 2];
+ auto dh = boxesPtr[objStart + 3];
+
+ auto predCtrX = dx * variance[0] * anchors[i].getWidth() + anchors[i].getXCenter();
+ auto predCtrY = dy * variance[0] * anchors[i].getHeight() + anchors[i].getYCenter();
+ auto predW = exp(dw * variance[1]) * anchors[i].getWidth();
+ auto predH = exp(dh * variance[1]) * anchors[i].getHeight();
+
+ boxes.push_back({static_cast<float>(predCtrX - 0.5f * predW),
+ static_cast<float>(predCtrY - 0.5f * predH),
+ static_cast<float>(predCtrX + 0.5f * predW),
+ static_cast<float>(predCtrY + 0.5f * predH)});
+ }
+
+ return boxes;
+}
+
+std::unique_ptr<ResultBase> ModelFaceBoxes::postprocess(InferenceResult& infResult) {
+ // Filter scores and get valid indices for bounding boxes
+ const auto scoresTensor = infResult.outputsData[outputsNames[1]];
+ const auto scores = filterScores(scoresTensor, confidenceThreshold);
+
+ // Filter bounding boxes on indices
+ auto boxesTensor = infResult.outputsData[outputsNames[0]];
+ std::vector<Anchor> boxes = filterBoxes(boxesTensor, anchors, scores.first, variance);
+
+ // Apply Non-maximum Suppression
+ const std::vector<int> keep = nms(boxes, scores.second, boxIOUThreshold);
+
+ // Create detection result objects
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+ const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+ const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+ const float scaleX = static_cast<float>(netInputWidth) / imgWidth;
+ const float scaleY = static_cast<float>(netInputHeight) / imgHeight;
+
+ result->objects.reserve(keep.size());
+ for (auto i : keep) {
+ DetectedObject desc;
+ desc.confidence = scores.second[i];
+ desc.x = clamp(boxes[i].left / scaleX, 0.f, static_cast<float>(imgWidth));
+ desc.y = clamp(boxes[i].top / scaleY, 0.f, static_cast<float>(imgHeight));
+ desc.width = clamp(boxes[i].getWidth() / scaleX, 0.f, static_cast<float>(imgWidth));
+ desc.height = clamp(boxes[i].getHeight() / scaleY, 0.f, static_cast<float>(imgHeight));
+ desc.labelID = 0;
+ desc.label = labels[0];
+
+ result->objects.push_back(desc);
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/detection_model_retinaface.cpp b/python/openvino/runtime/common/models/src/detection_model_retinaface.cpp
new file mode 100644
index 0000000..8835725
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_retinaface.cpp
@@ -0,0 +1,394 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_retinaface.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <stdexcept>
+
+#include <opencv2/core.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/nms.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelRetinaFace::ModelRetinaFace(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ float boxIOUThreshold,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), // Default label is "Face"
+ shouldDetectMasks(false),
+ shouldDetectLandmarks(false),
+ boxIOUThreshold(boxIOUThreshold),
+ maskThreshold(0.8f),
+ landmarkStd(1.0f),
+ anchorCfg({{32, {32, 16}, 16, {1}}, {16, {8, 4}, 16, {1}}, {8, {2, 1}, 16, {1}}}) {
+ generateAnchorsFpn();
+}
+
+void ModelRetinaFace::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("RetinaFace model wrapper expects models that have only 1 input");
+ }
+ const ov::Shape& inputShape = model->input().get_shape();
+ const ov::Layout& inputLayout = getInputLayout(model->input());
+
+ if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("Expected 3-channel input");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+ if (useAutoResize) {
+ ppp.input().tensor().set_spatial_dynamic_shape();
+
+ ppp.input()
+ .preprocess()
+ .convert_element_type(ov::element::f32)
+ .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+ }
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Reading image input parameters -------------------------------------------
+ inputsNames.push_back(model->input().get_any_name());
+ netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+ netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+ // --------------------------- Prepare output -----------------------------------------------------
+
+ const ov::OutputVector& outputs = model->outputs();
+ if (outputs.size() != 6 && outputs.size() != 9 && outputs.size() != 12) {
+ throw std::logic_error("RetinaFace model wrapper expects models that have 6, 9 or 12 outputs");
+ }
+
+ const ov::Layout outputLayout{"NCHW"};
+ std::vector<size_t> outputsSizes[OUT_MAX];
+ for (const auto& output : model->outputs()) {
+ auto outTensorName = output.get_any_name();
+ outputsNames.push_back(outTensorName);
+ ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
+
+ OutputType type = OUT_MAX;
+ if (outTensorName.find("box") != std::string::npos) {
+ type = OUT_BOXES;
+ } else if (outTensorName.find("cls") != std::string::npos) {
+ type = OUT_SCORES;
+ } else if (outTensorName.find("landmark") != std::string::npos) {
+ type = OUT_LANDMARKS;
+ shouldDetectLandmarks = true;
+ } else if (outTensorName.find("type") != std::string::npos) {
+ type = OUT_MASKSCORES;
+ labels.clear();
+ labels.push_back("No Mask");
+ labels.push_back("Mask");
+ shouldDetectMasks = true;
+ landmarkStd = 0.2f;
+ } else {
+ continue;
+ }
+
+ size_t num = output.get_shape()[ov::layout::height_idx(outputLayout)];
+ size_t i = 0;
+ for (; i < outputsSizes[type].size(); ++i) {
+ if (num < outputsSizes[type][i]) {
+ break;
+ }
+ }
+ separateOutputsNames[type].insert(separateOutputsNames[type].begin() + i, outTensorName);
+ outputsSizes[type].insert(outputsSizes[type].begin() + i, num);
+ }
+ model = ppp.build();
+
+ for (size_t idx = 0; idx < outputsSizes[OUT_BOXES].size(); ++idx) {
+ size_t width = outputsSizes[OUT_BOXES][idx];
+ size_t height = outputsSizes[OUT_BOXES][idx];
+ auto s = anchorCfg[idx].stride;
+ auto anchorNum = anchorsFpn[s].size();
+
+ anchors.push_back(std::vector<Anchor>(height * width * anchorNum));
+ for (size_t iw = 0; iw < width; ++iw) {
+ size_t sw = iw * s;
+ for (size_t ih = 0; ih < height; ++ih) {
+ size_t sh = ih * s;
+ for (size_t k = 0; k < anchorNum; ++k) {
+ Anchor& anc = anchors[idx][(ih * width + iw) * anchorNum + k];
+ anc.left = anchorsFpn[s][k].left + sw;
+ anc.top = anchorsFpn[s][k].top + sh;
+ anc.right = anchorsFpn[s][k].right + sw;
+ anc.bottom = anchorsFpn[s][k].bottom + sh;
+ }
+ }
+ }
+ }
+}
+
+std::vector<Anchor> ratioEnum(const Anchor& anchor, const std::vector<int>& ratios) {
+ std::vector<Anchor> retVal;
+ const auto w = anchor.getWidth();
+ const auto h = anchor.getHeight();
+ const auto xCtr = anchor.getXCenter();
+ const auto yCtr = anchor.getYCenter();
+
+ for (const auto ratio : ratios) {
+ const auto size = w * h;
+ const auto sizeRatio = static_cast<float>(size) / ratio;
+ const auto ws = sqrt(sizeRatio);
+ const auto hs = ws * ratio;
+ retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
+ static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
+ static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
+ static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
+ }
+ return retVal;
+}
+
+std::vector<Anchor> scaleEnum(const Anchor& anchor, const std::vector<int>& scales) {
+ std::vector<Anchor> retVal;
+ const auto w = anchor.getWidth();
+ const auto h = anchor.getHeight();
+ const auto xCtr = anchor.getXCenter();
+ const auto yCtr = anchor.getYCenter();
+
+ for (auto scale : scales) {
+ const auto ws = w * scale;
+ const auto hs = h * scale;
+ retVal.push_back({static_cast<float>(xCtr - 0.5f * (ws - 1.0f)),
+ static_cast<float>(yCtr - 0.5f * (hs - 1.0f)),
+ static_cast<float>(xCtr + 0.5f * (ws - 1.0f)),
+ static_cast<float>(yCtr + 0.5f * (hs - 1.0f))});
+ }
+ return retVal;
+}
+
+std::vector<Anchor> generateAnchors(const int baseSize,
+ const std::vector<int>& ratios,
+ const std::vector<int>& scales) {
+ Anchor baseAnchor{0.0f, 0.0f, baseSize - 1.0f, baseSize - 1.0f};
+ auto ratioAnchors = ratioEnum(baseAnchor, ratios);
+ std::vector<Anchor> retVal;
+
+ for (const auto& ra : ratioAnchors) {
+ auto addon = scaleEnum(ra, scales);
+ retVal.insert(retVal.end(), addon.begin(), addon.end());
+ }
+ return retVal;
+}
+
+void ModelRetinaFace::generateAnchorsFpn() {
+ auto cfg = anchorCfg;
+ std::sort(cfg.begin(), cfg.end(), [](const AnchorCfgLine& x, const AnchorCfgLine& y) {
+ return x.stride > y.stride;
+ });
+
+ for (const auto& cfgLine : cfg) {
+ anchorsFpn.emplace(cfgLine.stride, generateAnchors(cfgLine.baseSize, cfgLine.ratios, cfgLine.scales));
+ }
+}
+
+std::vector<size_t> thresholding(const ov::Tensor& scoresTensor, const int anchorNum, const float confidenceThreshold) {
+ std::vector<size_t> indices;
+ indices.reserve(ModelRetinaFace::INIT_VECTOR_SIZE);
+ auto shape = scoresTensor.get_shape();
+ size_t restAnchors = shape[1] - anchorNum;
+ const float* scoresPtr = scoresTensor.data<float>();
+
+ for (size_t x = anchorNum; x < shape[1]; ++x) {
+ for (size_t y = 0; y < shape[2]; ++y) {
+ for (size_t z = 0; z < shape[3]; ++z) {
+ auto idx = (x * shape[2] + y) * shape[3] + z;
+ auto score = scoresPtr[idx];
+ if (score >= confidenceThreshold) {
+ indices.push_back((y * shape[3] + z) * restAnchors + (x - anchorNum));
+ }
+ }
+ }
+ }
+
+ return indices;
+}
+
+void filterScores(std::vector<float>& scores,
+ const std::vector<size_t>& indices,
+ const ov::Tensor& scoresTensor,
+ const int anchorNum) {
+ const auto& shape = scoresTensor.get_shape();
+ const float* scoresPtr = scoresTensor.data<float>();
+ const auto start = shape[2] * shape[3] * anchorNum;
+
+ for (auto i : indices) {
+ auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
+ scores.push_back(scoresPtr[start + offset]);
+ }
+}
+
+void filterBoxes(std::vector<Anchor>& boxes,
+ const std::vector<size_t>& indices,
+ const ov::Tensor& boxesTensor,
+ int anchorNum,
+ const std::vector<Anchor>& anchors) {
+ const auto& shape = boxesTensor.get_shape();
+ const float* boxesPtr = boxesTensor.data<float>();
+ const auto boxPredLen = shape[1] / anchorNum;
+ const auto blockWidth = shape[2] * shape[3];
+
+ for (auto i : indices) {
+ auto offset = blockWidth * boxPredLen * (i % anchorNum) + (i / anchorNum);
+
+ const auto dx = boxesPtr[offset];
+ const auto dy = boxesPtr[offset + blockWidth];
+ const auto dw = boxesPtr[offset + blockWidth * 2];
+ const auto dh = boxesPtr[offset + blockWidth * 3];
+
+ const auto predCtrX = dx * anchors[i].getWidth() + anchors[i].getXCenter();
+ const auto predCtrY = dy * anchors[i].getHeight() + anchors[i].getYCenter();
+ const auto predW = exp(dw) * anchors[i].getWidth();
+ const auto predH = exp(dh) * anchors[i].getHeight();
+
+ boxes.push_back({static_cast<float>(predCtrX - 0.5f * (predW - 1.0f)),
+ static_cast<float>(predCtrY - 0.5f * (predH - 1.0f)),
+ static_cast<float>(predCtrX + 0.5f * (predW - 1.0f)),
+ static_cast<float>(predCtrY + 0.5f * (predH - 1.0f))});
+ }
+}
+
+void filterLandmarks(std::vector<cv::Point2f>& landmarks,
+ const std::vector<size_t>& indices,
+ const ov::Tensor& landmarksTensor,
+ int anchorNum,
+ const std::vector<Anchor>& anchors,
+ const float landmarkStd) {
+ const auto& shape = landmarksTensor.get_shape();
+ const float* landmarksPtr = landmarksTensor.data<float>();
+ const auto landmarkPredLen = shape[1] / anchorNum;
+ const auto blockWidth = shape[2] * shape[3];
+
+ for (auto i : indices) {
+ for (int j = 0; j < ModelRetinaFace::LANDMARKS_NUM; ++j) {
+ auto offset = (i % anchorNum) * landmarkPredLen * shape[2] * shape[3] + i / anchorNum;
+ auto deltaX = landmarksPtr[offset + j * 2 * blockWidth] * landmarkStd;
+ auto deltaY = landmarksPtr[offset + (j * 2 + 1) * blockWidth] * landmarkStd;
+ landmarks.push_back({deltaX * anchors[i].getWidth() + anchors[i].getXCenter(),
+ deltaY * anchors[i].getHeight() + anchors[i].getYCenter()});
+ }
+ }
+}
+
+void filterMasksScores(std::vector<float>& masks,
+ const std::vector<size_t>& indices,
+ const ov::Tensor& maskScoresTensor,
+ const int anchorNum) {
+ auto shape = maskScoresTensor.get_shape();
+ const float* maskScoresPtr = maskScoresTensor.data<float>();
+ auto start = shape[2] * shape[3] * anchorNum * 2;
+
+ for (auto i : indices) {
+ auto offset = (i % anchorNum) * shape[2] * shape[3] + i / anchorNum;
+ masks.push_back(maskScoresPtr[start + offset]);
+ }
+}
+
+std::unique_ptr<ResultBase> ModelRetinaFace::postprocess(InferenceResult& infResult) {
+ std::vector<float> scores;
+ scores.reserve(INIT_VECTOR_SIZE);
+ std::vector<Anchor> boxes;
+ boxes.reserve(INIT_VECTOR_SIZE);
+ std::vector<cv::Point2f> landmarks;
+ std::vector<float> masks;
+
+ if (shouldDetectLandmarks) {
+ landmarks.reserve(INIT_VECTOR_SIZE);
+ }
+ if (shouldDetectMasks) {
+ masks.reserve(INIT_VECTOR_SIZE);
+ }
+
+ // --------------------------- Gather & Filter output from all levels
+ // ----------------------------------------------------------
+ for (size_t idx = 0; idx < anchorCfg.size(); ++idx) {
+ const auto boxRaw = infResult.outputsData[separateOutputsNames[OUT_BOXES][idx]];
+ const auto scoresRaw = infResult.outputsData[separateOutputsNames[OUT_SCORES][idx]];
+ auto s = anchorCfg[idx].stride;
+ auto anchorNum = anchorsFpn[s].size();
+
+ auto validIndices = thresholding(scoresRaw, anchorNum, confidenceThreshold);
+ filterScores(scores, validIndices, scoresRaw, anchorNum);
+ filterBoxes(boxes, validIndices, boxRaw, anchorNum, anchors[idx]);
+ if (shouldDetectLandmarks) {
+ const auto landmarksRaw = infResult.outputsData[separateOutputsNames[OUT_LANDMARKS][idx]];
+ filterLandmarks(landmarks, validIndices, landmarksRaw, anchorNum, anchors[idx], landmarkStd);
+ }
+ if (shouldDetectMasks) {
+ const auto masksRaw = infResult.outputsData[separateOutputsNames[OUT_MASKSCORES][idx]];
+ filterMasksScores(masks, validIndices, masksRaw, anchorNum);
+ }
+ }
+ // --------------------------- Apply Non-maximum Suppression
+ // ---------------------------------------------------------- !shouldDetectLandmarks determines nms behavior, if
+ // true - boundaries are included in areas calculation
+ const auto keep = nms(boxes, scores, boxIOUThreshold, !shouldDetectLandmarks);
+
+ // --------------------------- Create detection result objects
+ // --------------------------------------------------------
+ RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);
+
+ const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+ const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+ const auto scaleX = static_cast<float>(netInputWidth) / imgWidth;
+ const auto scaleY = static_cast<float>(netInputHeight) / imgHeight;
+
+ result->objects.reserve(keep.size());
+ result->landmarks.reserve(keep.size() * ModelRetinaFace::LANDMARKS_NUM);
+ for (auto i : keep) {
+ DetectedObject desc;
+ desc.confidence = scores[i];
+ //--- Scaling coordinates
+ boxes[i].left /= scaleX;
+ boxes[i].top /= scaleY;
+ boxes[i].right /= scaleX;
+ boxes[i].bottom /= scaleY;
+
+ desc.x = clamp(boxes[i].left, 0.f, static_cast<float>(imgWidth));
+ desc.y = clamp(boxes[i].top, 0.f, static_cast<float>(imgHeight));
+ desc.width = clamp(boxes[i].getWidth(), 0.f, static_cast<float>(imgWidth));
+ desc.height = clamp(boxes[i].getHeight(), 0.f, static_cast<float>(imgHeight));
+ //--- Default label 0 - Face. If detecting masks then labels would be 0 - No Mask, 1 - Mask
+ desc.labelID = shouldDetectMasks ? (masks[i] > maskThreshold) : 0;
+ desc.label = labels[desc.labelID];
+ result->objects.push_back(desc);
+
+ //--- Scaling landmarks coordinates
+ for (size_t l = 0; l < ModelRetinaFace::LANDMARKS_NUM && shouldDetectLandmarks; ++l) {
+ landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x =
+ clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].x / scaleX, 0.f, static_cast<float>(imgWidth));
+ landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y =
+ clamp(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l].y / scaleY, 0.f, static_cast<float>(imgHeight));
+ result->landmarks.push_back(landmarks[i * ModelRetinaFace::LANDMARKS_NUM + l]);
+ }
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/detection_model_retinaface_pt.cpp b/python/openvino/runtime/common/models/src/detection_model_retinaface_pt.cpp
new file mode 100644
index 0000000..8322c3c
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_retinaface_pt.cpp
@@ -0,0 +1,277 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_retinaface_pt.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/nms.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+ModelRetinaFacePT::ModelRetinaFacePT(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ float boxIOUThreshold,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, {"Face"}, layout), // Default label is "Face"
+ landmarksNum(0),
+ boxIOUThreshold(boxIOUThreshold) {}
+
+void ModelRetinaFacePT::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("RetinaFacePT model wrapper expects models that have only 1 input");
+ }
+
+ const ov::Shape& inputShape = model->input().get_shape();
+ const ov::Layout& inputLayout = getInputLayout(model->input());
+
+ if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("Expected 3-channel input");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ inputTransform.setPrecision(ppp, model->input().get_any_name());
+ ppp.input().tensor().set_layout({"NHWC"});
+
+ if (useAutoResize) {
+ ppp.input().tensor().set_spatial_dynamic_shape();
+
+ ppp.input()
+ .preprocess()
+ .convert_element_type(ov::element::f32)
+ .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+ }
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Reading image input parameters -------------------------------------------
+ inputsNames.push_back(model->input().get_any_name());
+ netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+ netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ if (model->outputs().size() != 3) {
+ throw std::logic_error("RetinaFace model wrapper expects models that have 3 outputs");
+ }
+
+ landmarksNum = 0;
+
+ outputsNames.resize(2);
+ std::vector<uint32_t> outputsSizes[OUT_MAX];
+ const ov::Layout chw("CHW");
+ const ov::Layout nchw("NCHW");
+ for (auto& output : model->outputs()) {
+ auto outTensorName = output.get_any_name();
+ outputsNames.push_back(outTensorName);
+ ppp.output(outTensorName)
+ .tensor()
+ .set_element_type(ov::element::f32)
+ .set_layout(output.get_shape().size() == 4 ? nchw : chw);
+
+ if (outTensorName.find("bbox") != std::string::npos) {
+ outputsNames[OUT_BOXES] = outTensorName;
+ } else if (outTensorName.find("cls") != std::string::npos) {
+ outputsNames[OUT_SCORES] = outTensorName;
+ } else if (outTensorName.find("landmark") != std::string::npos) {
+ // Landmarks might be optional, if it is present, resize names array to fit landmarks output name to the
+ // last item of array Considering that other outputs names are already filled in or will be filled later
+ outputsNames.resize(std::max(outputsNames.size(), (size_t)OUT_LANDMARKS + 1));
+ outputsNames[OUT_LANDMARKS] = outTensorName;
+ landmarksNum =
+ output.get_shape()[ov::layout::width_idx(chw)] / 2; // Each landmark consist of 2 variables (x and y)
+ } else {
+ continue;
+ }
+ }
+
+ if (outputsNames[OUT_BOXES] == "" || outputsNames[OUT_SCORES] == "") {
+ throw std::logic_error("Bbox or cls layers are not found");
+ }
+
+ model = ppp.build();
+ priors = generatePriorData();
+}
+
+std::vector<size_t> ModelRetinaFacePT::filterByScore(const ov::Tensor& scoresTensor, const float confidenceThreshold) {
+ std::vector<size_t> indicies;
+ const auto& shape = scoresTensor.get_shape();
+ const float* scoresPtr = scoresTensor.data<float>();
+
+ for (size_t x = 0; x < shape[1]; ++x) {
+ const auto idx = (x * shape[2] + 1);
+ const auto score = scoresPtr[idx];
+ if (score >= confidenceThreshold) {
+ indicies.push_back(x);
+ }
+ }
+
+ return indicies;
+}
+
+std::vector<float> ModelRetinaFacePT::getFilteredScores(const ov::Tensor& scoresTensor,
+ const std::vector<size_t>& indicies) {
+ const auto& shape = scoresTensor.get_shape();
+ const float* scoresPtr = scoresTensor.data<float>();
+
+ std::vector<float> scores;
+ scores.reserve(indicies.size());
+
+ for (auto i : indicies) {
+ scores.push_back(scoresPtr[i * shape[2] + 1]);
+ }
+ return scores;
+}
+
+std::vector<cv::Point2f> ModelRetinaFacePT::getFilteredLandmarks(const ov::Tensor& landmarksTensor,
+ const std::vector<size_t>& indicies,
+ int imgWidth,
+ int imgHeight) {
+ const auto& shape = landmarksTensor.get_shape();
+ const float* landmarksPtr = landmarksTensor.data<float>();
+
+ std::vector<cv::Point2f> landmarks(landmarksNum * indicies.size());
+
+ for (size_t i = 0; i < indicies.size(); i++) {
+ const size_t idx = indicies[i];
+ const auto& prior = priors[idx];
+ for (size_t j = 0; j < landmarksNum; j++) {
+ landmarks[i * landmarksNum + j].x =
+ clamp(prior.cX + landmarksPtr[idx * shape[2] + j * 2] * variance[0] * prior.width, 0.f, 1.f) * imgWidth;
+ landmarks[i * landmarksNum + j].y =
+ clamp(prior.cY + landmarksPtr[idx * shape[2] + j * 2 + 1] * variance[0] * prior.height, 0.f, 1.f) *
+ imgHeight;
+ }
+ }
+ return landmarks;
+}
+
+std::vector<ModelRetinaFacePT::Box> ModelRetinaFacePT::generatePriorData() {
+ const float globalMinSizes[][2] = {{16, 32}, {64, 128}, {256, 512}};
+ const float steps[] = {8., 16., 32.};
+ std::vector<ModelRetinaFacePT::Box> anchors;
+ for (size_t stepNum = 0; stepNum < arraySize(steps); stepNum++) {
+ const int featureW = static_cast<int>(std::round(netInputWidth / steps[stepNum]));
+ const int featureH = static_cast<int>(std::round(netInputHeight / steps[stepNum]));
+
+ const auto& minSizes = globalMinSizes[stepNum];
+ for (int i = 0; i < featureH; i++) {
+ for (int j = 0; j < featureW; j++) {
+ for (auto minSize : minSizes) {
+ const float sKX = minSize / netInputWidth;
+ const float sKY = minSize / netInputHeight;
+ const float denseCY = (i + 0.5f) * steps[stepNum] / netInputHeight;
+ const float denseCX = (j + 0.5f) * steps[stepNum] / netInputWidth;
+ anchors.push_back(ModelRetinaFacePT::Box{denseCX, denseCY, sKX, sKY});
+ }
+ }
+ }
+ }
+ return anchors;
+}
+
+std::vector<Anchor> ModelRetinaFacePT::getFilteredProposals(const ov::Tensor& boxesTensor,
+ const std::vector<size_t>& indicies,
+ int imgWidth,
+ int imgHeight) {
+ std::vector<Anchor> rects;
+ rects.reserve(indicies.size());
+
+ const auto& shape = boxesTensor.get_shape();
+ const float* boxesPtr = boxesTensor.data<float>();
+
+ if (shape[1] != priors.size()) {
+ throw std::logic_error("rawBoxes size is not equal to priors size");
+ }
+
+ for (auto i : indicies) {
+ const auto pRawBox = reinterpret_cast<const Box*>(boxesPtr + i * shape[2]);
+ const auto& prior = priors[i];
+ const float cX = priors[i].cX + pRawBox->cX * variance[0] * prior.width;
+ const float cY = priors[i].cY + pRawBox->cY * variance[0] * prior.height;
+ const float width = prior.width * exp(pRawBox->width * variance[1]);
+ const float height = prior.height * exp(pRawBox->height * variance[1]);
+ rects.push_back(Anchor{clamp(cX - width / 2, 0.f, 1.f) * imgWidth,
+ clamp(cY - height / 2, 0.f, 1.f) * imgHeight,
+ clamp(cX + width / 2, 0.f, 1.f) * imgWidth,
+ clamp(cY + height / 2, 0.f, 1.f) * imgHeight});
+ }
+
+ return rects;
+}
+
+std::unique_ptr<ResultBase> ModelRetinaFacePT::postprocess(InferenceResult& infResult) {
+ // (raw_output, scale_x, scale_y, face_prob_threshold, image_size):
+ const auto boxesTensor = infResult.outputsData[outputsNames[OUT_BOXES]];
+ const auto scoresTensor = infResult.outputsData[outputsNames[OUT_SCORES]];
+
+ const auto& validIndicies = filterByScore(scoresTensor, confidenceThreshold);
+ const auto& scores = getFilteredScores(scoresTensor, validIndicies);
+
+ const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+ const auto& landmarks = landmarksNum ? getFilteredLandmarks(infResult.outputsData[outputsNames[OUT_LANDMARKS]],
+ validIndicies,
+ internalData.inputImgWidth,
+ internalData.inputImgHeight)
+ : std::vector<cv::Point2f>();
+
+ const auto& proposals =
+ getFilteredProposals(boxesTensor, validIndicies, internalData.inputImgWidth, internalData.inputImgHeight);
+
+ const auto& keptIndicies = nms(proposals, scores, boxIOUThreshold, !landmarksNum);
+
+ // --------------------------- Create detection result objects
+ // --------------------------------------------------------
+ RetinaFaceDetectionResult* result = new RetinaFaceDetectionResult(infResult.frameId, infResult.metaData);
+
+ result->objects.reserve(keptIndicies.size());
+ result->landmarks.reserve(keptIndicies.size() * landmarksNum);
+ for (auto i : keptIndicies) {
+ DetectedObject desc;
+ desc.confidence = scores[i];
+
+ //--- Scaling coordinates
+ desc.x = proposals[i].left;
+ desc.y = proposals[i].top;
+ desc.width = proposals[i].getWidth();
+ desc.height = proposals[i].getHeight();
+
+ desc.labelID = 0;
+ desc.label = labels[desc.labelID];
+ result->objects.push_back(desc);
+
+ //--- Filtering landmarks coordinates
+ for (uint32_t l = 0; l < landmarksNum; ++l) {
+ result->landmarks.emplace_back(landmarks[i * landmarksNum + l].x, landmarks[i * landmarksNum + l].y);
+ }
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/detection_model_ssd.cpp b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp
new file mode 100644
index 0000000..ef741ee
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_ssd.cpp
@@ -0,0 +1,281 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_ssd.h"
+
+#include <algorithm>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/ocv_common.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+struct InputData;
+
+ModelSSD::ModelSSD(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ const std::vector<std::string>& labels,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {}
+
+std::shared_ptr<InternalModelData> ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) {
+ if (inputsNames.size() > 1) {
+ const auto& imageInfoTensor = request.get_tensor(inputsNames[1]);
+ const auto info = imageInfoTensor.data<float>();
+ info[0] = static_cast<float>(netInputHeight);
+ info[1] = static_cast<float>(netInputWidth);
+ info[2] = 1;
+ request.set_tensor(inputsNames[1], imageInfoTensor);
+ }
+
+ return DetectionModel::preprocess(inputData, request);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocess(InferenceResult& infResult) {
+ return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult);
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& infResult) {
+ const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor();
+ size_t detectionsNum = detectionsTensor.get_shape()[detectionsNumId];
+ const float* detections = detectionsTensor.data<float>();
+
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+ auto retVal = std::unique_ptr<ResultBase>(result);
+
+ const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+ for (size_t i = 0; i < detectionsNum; i++) {
+ float image_id = detections[i * objectSize + 0];
+ if (image_id < 0) {
+ break;
+ }
+
+ float confidence = detections[i * objectSize + 2];
+
+ /** Filtering out objects with confidence < confidence_threshold probability **/
+ if (confidence > confidenceThreshold) {
+ DetectedObject desc;
+
+ desc.confidence = confidence;
+ desc.labelID = static_cast<int>(detections[i * objectSize + 1]);
+ desc.label = getLabelName(desc.labelID);
+
+ desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth,
+ 0.f,
+ static_cast<float>(internalData.inputImgWidth));
+ desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight,
+ 0.f,
+ static_cast<float>(internalData.inputImgHeight));
+ desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth,
+ 0.f,
+ static_cast<float>(internalData.inputImgWidth)) -
+ desc.x;
+ desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight,
+ 0.f,
+ static_cast<float>(internalData.inputImgHeight)) -
+ desc.y;
+
+ result->objects.push_back(desc);
+ }
+ }
+
+ return retVal;
+}
+
+std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) {
+ const float* boxes = infResult.outputsData[outputsNames[0]].data<float>();
+ size_t detectionsNum = infResult.outputsData[outputsNames[0]].get_shape()[detectionsNumId];
+ const float* labels = infResult.outputsData[outputsNames[1]].data<float>();
+ const float* scores = outputsNames.size() > 2 ? infResult.outputsData[outputsNames[2]].data<float>() : nullptr;
+
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+ auto retVal = std::unique_ptr<ResultBase>(result);
+
+ const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+ // In models with scores are stored in separate output, coordinates are normalized to [0,1]
+ // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight]
+ float widthScale = static_cast<float>(internalData.inputImgWidth) / (scores ? 1 : netInputWidth);
+ float heightScale = static_cast<float>(internalData.inputImgHeight) / (scores ? 1 : netInputHeight);
+
+ for (size_t i = 0; i < detectionsNum; i++) {
+ float confidence = scores ? scores[i] : boxes[i * objectSize + 4];
+
+ /** Filtering out objects with confidence < confidence_threshold probability **/
+ if (confidence > confidenceThreshold) {
+ DetectedObject desc;
+
+ desc.confidence = confidence;
+ desc.labelID = static_cast<int>(labels[i]);
+ desc.label = getLabelName(desc.labelID);
+
+ desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth));
+ desc.y =
+ clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight));
+ desc.width =
+ clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast<float>(internalData.inputImgWidth)) -
+ desc.x;
+ desc.height =
+ clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast<float>(internalData.inputImgHeight)) -
+ desc.y;
+
+ result->objects.push_back(desc);
+ }
+ }
+
+ return retVal;
+}
+
+void ModelSSD::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ ov::preprocess::PrePostProcessor ppp(model);
+ for (const auto& input : model->inputs()) {
+ auto inputTensorName = input.get_any_name();
+ const ov::Shape& shape = input.get_shape();
+ ov::Layout inputLayout = getInputLayout(input);
+
+ if (shape.size() == 4) { // 1st input contains images
+ if (inputsNames.empty()) {
+ inputsNames.push_back(inputTensorName);
+ } else {
+ inputsNames[0] = inputTensorName;
+ }
+
+ inputTransform.setPrecision(ppp, inputTensorName);
+ ppp.input(inputTensorName).tensor().set_layout({"NHWC"});
+
+ if (useAutoResize) {
+ ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape();
+
+ ppp.input(inputTensorName)
+ .preprocess()
+ .convert_element_type(ov::element::f32)
+ .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+ }
+
+ ppp.input(inputTensorName).model().set_layout(inputLayout);
+
+ netInputWidth = shape[ov::layout::width_idx(inputLayout)];
+ netInputHeight = shape[ov::layout::height_idx(inputLayout)];
+ } else if (shape.size() == 2) { // 2nd input contains image info
+ inputsNames.resize(2);
+ inputsNames[1] = inputTensorName;
+ ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32);
+ } else {
+ throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) +
+ "D "
+ "input layer '" +
+ input.get_any_name() +
+ "'. "
+ "Only 2D and 4D input layers are supported");
+ }
+ }
+ model = ppp.build();
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ if (model->outputs().size() == 1) {
+ prepareSingleOutput(model);
+ } else {
+ prepareMultipleOutputs(model);
+ }
+}
+
+void ModelSSD::prepareSingleOutput(std::shared_ptr<ov::Model>& model) {
+ const auto& output = model->output();
+ outputsNames.push_back(output.get_any_name());
+
+ const ov::Shape& shape = output.get_shape();
+ const ov::Layout& layout("NCHW");
+ if (shape.size() != 4) {
+ throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size()));
+ }
+ detectionsNumId = ov::layout::height_idx(layout);
+ objectSize = shape[ov::layout::width_idx(layout)];
+ if (objectSize != 7) {
+ throw std::logic_error("SSD single output must have 7 as a last dimension, but had " +
+ std::to_string(objectSize));
+ }
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout);
+ model = ppp.build();
+}
+
+void ModelSSD::prepareMultipleOutputs(std::shared_ptr<ov::Model>& model) {
+ const ov::OutputVector& outputs = model->outputs();
+ for (auto& output : outputs) {
+ const auto& tensorNames = output.get_names();
+ for (const auto& name : tensorNames) {
+ if (name.find("boxes") != std::string::npos) {
+ outputsNames.push_back(name);
+ break;
+ } else if (name.find("labels") != std::string::npos) {
+ outputsNames.push_back(name);
+ break;
+ } else if (name.find("scores") != std::string::npos) {
+ outputsNames.push_back(name);
+ break;
+ }
+ }
+ }
+ if (outputsNames.size() != 2 && outputsNames.size() != 3) {
+ throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " +
+ std::to_string(outputsNames.size()));
+ }
+ std::sort(outputsNames.begin(), outputsNames.end());
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ const auto& boxesShape = model->output(outputsNames[0]).get_partial_shape().get_max_shape();
+
+ ov::Layout boxesLayout;
+ if (boxesShape.size() == 2) {
+ boxesLayout = "NC";
+ detectionsNumId = ov::layout::batch_idx(boxesLayout);
+ objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)];
+
+ if (objectSize != 5) {
+ throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required");
+ }
+ } else if (boxesShape.size() == 3) {
+ boxesLayout = "CHW";
+ detectionsNumId = ov::layout::height_idx(boxesLayout);
+ objectSize = boxesShape[ov::layout::width_idx(boxesLayout)];
+
+ if (objectSize != 4) {
+ throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required");
+ }
+ } else {
+ throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " +
+ std::to_string(boxesShape.size()));
+ }
+
+ ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout);
+
+ for (const auto& outName : outputsNames) {
+ ppp.output(outName).tensor().set_element_type(ov::element::f32);
+ }
+ model = ppp.build();
+}
diff --git a/python/openvino/runtime/common/models/src/detection_model_yolo.cpp b/python/openvino/runtime/common/models/src/detection_model_yolo.cpp
new file mode 100644
index 0000000..2c4fb1d
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_yolo.cpp
@@ -0,0 +1,481 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolo.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+std::vector<float> defaultAnchors[] = {
+ // YOLOv1v2
+ {0.57273f, 0.677385f, 1.87446f, 2.06253f, 3.33843f, 5.47434f, 7.88282f, 3.52778f, 9.77052f, 9.16828f},
+ // YOLOv3
+ {10.0f,
+ 13.0f,
+ 16.0f,
+ 30.0f,
+ 33.0f,
+ 23.0f,
+ 30.0f,
+ 61.0f,
+ 62.0f,
+ 45.0f,
+ 59.0f,
+ 119.0f,
+ 116.0f,
+ 90.0f,
+ 156.0f,
+ 198.0f,
+ 373.0f,
+ 326.0f},
+ // YOLOv4
+ {12.0f,
+ 16.0f,
+ 19.0f,
+ 36.0f,
+ 40.0f,
+ 28.0f,
+ 36.0f,
+ 75.0f,
+ 76.0f,
+ 55.0f,
+ 72.0f,
+ 146.0f,
+ 142.0f,
+ 110.0f,
+ 192.0f,
+ 243.0f,
+ 459.0f,
+ 401.0f},
+ // YOLOv4_Tiny
+ {10.0f, 14.0f, 23.0f, 27.0f, 37.0f, 58.0f, 81.0f, 82.0f, 135.0f, 169.0f, 344.0f, 319.0f},
+ // YOLOF
+ {16.0f, 16.0f, 32.0f, 32.0f, 64.0f, 64.0f, 128.0f, 128.0f, 256.0f, 256.0f, 512.0f, 512.0f}};
+
+const std::vector<int64_t> defaultMasks[] = {
+ // YOLOv1v2
+ {},
+ // YOLOv3
+ {},
+ // YOLOv4
+ {0, 1, 2, 3, 4, 5, 6, 7, 8},
+ // YOLOv4_Tiny
+ {1, 2, 3, 3, 4, 5},
+ // YOLOF
+ {0, 1, 2, 3, 4, 5}};
+
+static inline float sigmoid(float x) {
+ return 1.f / (1.f + exp(-x));
+}
+
+static inline float linear(float x) {
+ return x;
+}
+
+ModelYolo::ModelYolo(const std::string& modelFileName,
+ float confidenceThreshold,
+ bool useAutoResize,
+ bool useAdvancedPostprocessing,
+ float boxIOUThreshold,
+ const std::vector<std::string>& labels,
+ const std::vector<float>& anchors,
+ const std::vector<int64_t>& masks,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout),
+ boxIOUThreshold(boxIOUThreshold),
+ useAdvancedPostprocessing(useAdvancedPostprocessing),
+ yoloVersion(YOLO_V3),
+ presetAnchors(anchors),
+ presetMasks(masks) {}
+
+void ModelYolo::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("YOLO model wrapper accepts models that have only 1 input");
+ }
+
+ const auto& input = model->input();
+ const ov::Shape& inputShape = model->input().get_shape();
+ ov::Layout inputLayout = getInputLayout(input);
+
+ if (inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("Expected 3-channel input");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+ if (useAutoResize) {
+ ppp.input().tensor().set_spatial_dynamic_shape();
+
+ ppp.input()
+ .preprocess()
+ .convert_element_type(ov::element::f32)
+ .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+ }
+
+ ppp.input().model().set_layout(inputLayout);
+
+ //--- Reading image input parameters
+ inputsNames.push_back(model->input().get_any_name());
+ netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+ netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ const ov::OutputVector& outputs = model->outputs();
+ std::map<std::string, ov::Shape> outShapes;
+ for (auto& out : outputs) {
+ ppp.output(out.get_any_name()).tensor().set_element_type(ov::element::f32);
+ if (out.get_shape().size() == 4) {
+ if (out.get_shape()[ov::layout::height_idx("NCHW")] != out.get_shape()[ov::layout::width_idx("NCHW")] &&
+ out.get_shape()[ov::layout::height_idx("NHWC")] == out.get_shape()[ov::layout::width_idx("NHWC")]) {
+ ppp.output(out.get_any_name()).model().set_layout("NHWC");
+ // outShapes are saved before ppp.build() thus set yoloRegionLayout as it is in model before ppp.build()
+ yoloRegionLayout = "NHWC";
+ }
+ // yolo-v1-tiny-tf out shape is [1, 21125] thus set layout only for 4 dim tensors
+ ppp.output(out.get_any_name()).tensor().set_layout("NCHW");
+ }
+ outputsNames.push_back(out.get_any_name());
+ outShapes[out.get_any_name()] = out.get_shape();
+ }
+ model = ppp.build();
+
+ yoloVersion = YOLO_V3;
+ bool isRegionFound = false;
+ for (const auto& op : model->get_ordered_ops()) {
+ if (std::string("RegionYolo") == op->get_type_name()) {
+ auto regionYolo = std::dynamic_pointer_cast<ov::op::v0::RegionYolo>(op);
+
+ if (regionYolo) {
+ if (!regionYolo->get_mask().size()) {
+ yoloVersion = YOLO_V1V2;
+ }
+
+ const auto& opName = op->get_friendly_name();
+ for (const auto& out : outputs) {
+ if (out.get_node()->get_friendly_name() == opName ||
+ out.get_node()->get_input_node_ptr(0)->get_friendly_name() == opName) {
+ isRegionFound = true;
+ regions.emplace(out.get_any_name(), Region(regionYolo));
+ }
+ }
+ }
+ }
+ }
+
+ if (!isRegionFound) {
+ switch (outputsNames.size()) {
+ case 1:
+ yoloVersion = YOLOF;
+ break;
+ case 2:
+ yoloVersion = YOLO_V4_TINY;
+ break;
+ case 3:
+ yoloVersion = YOLO_V4;
+ break;
+ }
+
+ int num = yoloVersion == YOLOF ? 6 : 3;
+ isObjConf = yoloVersion == YOLOF ? 0 : 1;
+ int i = 0;
+
+ auto chosenMasks = presetMasks.size() ? presetMasks : defaultMasks[yoloVersion];
+ if (chosenMasks.size() != num * outputs.size()) {
+ throw std::runtime_error(std::string("Invalid size of masks array, got ") +
+ std::to_string(presetMasks.size()) + ", should be " +
+ std::to_string(num * outputs.size()));
+ }
+
+ std::sort(outputsNames.begin(),
+ outputsNames.end(),
+ [&outShapes, this](const std::string& x, const std::string& y) {
+ return outShapes[x][ov::layout::height_idx(yoloRegionLayout)] >
+ outShapes[y][ov::layout::height_idx(yoloRegionLayout)];
+ });
+
+ for (const auto& name : outputsNames) {
+ const auto& shape = outShapes[name];
+ if (shape[ov::layout::channels_idx(yoloRegionLayout)] % num != 0) {
+ throw std::logic_error(std::string("Output tensor ") + name + " has wrong channel dimension");
+ }
+ regions.emplace(
+ name,
+ Region(shape[ov::layout::channels_idx(yoloRegionLayout)] / num - 4 - (isObjConf ? 1 : 0),
+ 4,
+ presetAnchors.size() ? presetAnchors : defaultAnchors[yoloVersion],
+ std::vector<int64_t>(chosenMasks.begin() + i * num, chosenMasks.begin() + (i + 1) * num),
+ shape[ov::layout::width_idx(yoloRegionLayout)],
+ shape[ov::layout::height_idx(yoloRegionLayout)]));
+ i++;
+ }
+ } else {
+ // Currently externally set anchors and masks are supported only for YoloV4
+ if (presetAnchors.size() || presetMasks.size()) {
+ slog::warn << "Preset anchors and mask can be set for YoloV4 model only. "
+ "This model is not YoloV4, so these options will be ignored."
+ << slog::endl;
+ }
+ }
+}
+
+std::unique_ptr<ResultBase> ModelYolo::postprocess(InferenceResult& infResult) {
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+ std::vector<DetectedObject> objects;
+
+ // Parsing outputs
+ const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
+
+ for (auto& output : infResult.outputsData) {
+ this->parseYOLOOutput(output.first,
+ output.second,
+ netInputHeight,
+ netInputWidth,
+ internalData.inputImgHeight,
+ internalData.inputImgWidth,
+ objects);
+ }
+
+ if (useAdvancedPostprocessing) {
+ // Advanced postprocessing
+ // Checking IOU threshold conformance
+ // For every i-th object we're finding all objects it intersects with, and comparing confidence
+ // If i-th object has greater confidence than all others, we include it into result
+ for (const auto& obj1 : objects) {
+ bool isGoodResult = true;
+ for (const auto& obj2 : objects) {
+ if (obj1.labelID == obj2.labelID && obj1.confidence < obj2.confidence &&
+ intersectionOverUnion(obj1, obj2) >= boxIOUThreshold) { // if obj1 is the same as obj2, condition
+ // expression will evaluate to false anyway
+ isGoodResult = false;
+ break;
+ }
+ }
+ if (isGoodResult) {
+ result->objects.push_back(obj1);
+ }
+ }
+ } else {
+ // Classic postprocessing
+ std::sort(objects.begin(), objects.end(), [](const DetectedObject& x, const DetectedObject& y) {
+ return x.confidence > y.confidence;
+ });
+ for (size_t i = 0; i < objects.size(); ++i) {
+ if (objects[i].confidence == 0)
+ continue;
+ for (size_t j = i + 1; j < objects.size(); ++j)
+ if (intersectionOverUnion(objects[i], objects[j]) >= boxIOUThreshold)
+ objects[j].confidence = 0;
+ result->objects.push_back(objects[i]);
+ }
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}
+
+void ModelYolo::parseYOLOOutput(const std::string& output_name,
+ const ov::Tensor& tensor,
+ const unsigned long resized_im_h,
+ const unsigned long resized_im_w,
+ const unsigned long original_im_h,
+ const unsigned long original_im_w,
+ std::vector<DetectedObject>& objects) {
+ // --------------------------- Extracting layer parameters -------------------------------------
+ auto it = regions.find(output_name);
+ if (it == regions.end()) {
+ throw std::runtime_error(std::string("Can't find output layer with name ") + output_name);
+ }
+ auto& region = it->second;
+
+ int sideW = 0;
+ int sideH = 0;
+ unsigned long scaleH;
+ unsigned long scaleW;
+ switch (yoloVersion) {
+ case YOLO_V1V2:
+ sideH = region.outputHeight;
+ sideW = region.outputWidth;
+ scaleW = region.outputWidth;
+ scaleH = region.outputHeight;
+ break;
+ case YOLO_V3:
+ case YOLO_V4:
+ case YOLO_V4_TINY:
+ case YOLOF:
+ sideH = static_cast<int>(tensor.get_shape()[ov::layout::height_idx("NCHW")]);
+ sideW = static_cast<int>(tensor.get_shape()[ov::layout::width_idx("NCHW")]);
+ scaleW = resized_im_w;
+ scaleH = resized_im_h;
+ break;
+ }
+
+ auto entriesNum = sideW * sideH;
+ const float* outData = tensor.data<float>();
+
+ auto postprocessRawData =
+ (yoloVersion == YOLO_V4 || yoloVersion == YOLO_V4_TINY || yoloVersion == YOLOF) ? sigmoid : linear;
+
+ // --------------------------- Parsing YOLO Region output -------------------------------------
+ for (int i = 0; i < entriesNum; ++i) {
+ int row = i / sideW;
+ int col = i % sideW;
+ for (int n = 0; n < region.num; ++n) {
+ //--- Getting region data
+ int obj_index = calculateEntryIndex(entriesNum,
+ region.coords,
+ region.classes + isObjConf,
+ n * entriesNum + i,
+ region.coords);
+ int box_index =
+ calculateEntryIndex(entriesNum, region.coords, region.classes + isObjConf, n * entriesNum + i, 0);
+ float scale = isObjConf ? postprocessRawData(outData[obj_index]) : 1;
+
+ //--- Preliminary check for confidence threshold conformance
+ if (scale >= confidenceThreshold) {
+ //--- Calculating scaled region's coordinates
+ float x, y;
+ if (yoloVersion == YOLOF) {
+ x = (static_cast<float>(col) / sideW +
+ outData[box_index + 0 * entriesNum] * region.anchors[2 * n] / scaleW) *
+ original_im_w;
+ y = (static_cast<float>(row) / sideH +
+ outData[box_index + 1 * entriesNum] * region.anchors[2 * n + 1] / scaleH) *
+ original_im_h;
+ } else {
+ x = static_cast<float>((col + postprocessRawData(outData[box_index + 0 * entriesNum])) / sideW *
+ original_im_w);
+ y = static_cast<float>((row + postprocessRawData(outData[box_index + 1 * entriesNum])) / sideH *
+ original_im_h);
+ }
+ float height = static_cast<float>(std::exp(outData[box_index + 3 * entriesNum]) *
+ region.anchors[2 * n + 1] * original_im_h / scaleH);
+ float width = static_cast<float>(std::exp(outData[box_index + 2 * entriesNum]) * region.anchors[2 * n] *
+ original_im_w / scaleW);
+
+ DetectedObject obj;
+ obj.x = clamp(x - width / 2, 0.f, static_cast<float>(original_im_w));
+ obj.y = clamp(y - height / 2, 0.f, static_cast<float>(original_im_h));
+ obj.width = clamp(width, 0.f, static_cast<float>(original_im_w - obj.x));
+ obj.height = clamp(height, 0.f, static_cast<float>(original_im_h - obj.y));
+
+ for (size_t j = 0; j < region.classes; ++j) {
+ int class_index = calculateEntryIndex(entriesNum,
+ region.coords,
+ region.classes + isObjConf,
+ n * entriesNum + i,
+ region.coords + isObjConf + j);
+ float prob = scale * postprocessRawData(outData[class_index]);
+
+ //--- Checking confidence threshold conformance and adding region to the list
+ if (prob >= confidenceThreshold) {
+ obj.confidence = prob;
+ obj.labelID = j;
+ obj.label = getLabelName(obj.labelID);
+ objects.push_back(obj);
+ }
+ }
+ }
+ }
+ }
+}
+
+int ModelYolo::calculateEntryIndex(int totalCells, int lcoords, size_t lclasses, int location, int entry) {
+ int n = location / totalCells;
+ int loc = location % totalCells;
+ return (n * (lcoords + lclasses) + entry) * totalCells + loc;
+}
+
+double ModelYolo::intersectionOverUnion(const DetectedObject& o1, const DetectedObject& o2) {
+ double overlappingWidth = fmin(o1.x + o1.width, o2.x + o2.width) - fmax(o1.x, o2.x);
+ double overlappingHeight = fmin(o1.y + o1.height, o2.y + o2.height) - fmax(o1.y, o2.y);
+ double intersectionArea =
+ (overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth;
+ double unionArea = o1.width * o1.height + o2.width * o2.height - intersectionArea;
+ return intersectionArea / unionArea;
+}
+
+ModelYolo::Region::Region(const std::shared_ptr<ov::op::v0::RegionYolo>& regionYolo) {
+ coords = regionYolo->get_num_coords();
+ classes = regionYolo->get_num_classes();
+ auto mask = regionYolo->get_mask();
+ num = mask.size();
+
+ auto shape = regionYolo->get_input_shape(0);
+ outputWidth = shape[3];
+ outputHeight = shape[2];
+
+ if (num) {
+ // Parsing YoloV3 parameters
+ anchors.resize(num * 2);
+
+ for (int i = 0; i < num; ++i) {
+ anchors[i * 2] = regionYolo->get_anchors()[mask[i] * 2];
+ anchors[i * 2 + 1] = regionYolo->get_anchors()[mask[i] * 2 + 1];
+ }
+ } else {
+ // Parsing YoloV2 parameters
+ num = regionYolo->get_num_regions();
+ anchors = regionYolo->get_anchors();
+ if (anchors.empty()) {
+ anchors = defaultAnchors[YOLO_V1V2];
+ num = 5;
+ }
+ }
+}
+
+ModelYolo::Region::Region(size_t classes,
+ int coords,
+ const std::vector<float>& anchors,
+ const std::vector<int64_t>& masks,
+ size_t outputWidth,
+ size_t outputHeight)
+ : classes(classes),
+ coords(coords),
+ outputWidth(outputWidth),
+ outputHeight(outputHeight) {
+ num = masks.size();
+
+ if (anchors.size() == 0 || anchors.size() % 2 != 0) {
+ throw std::runtime_error("Explicitly initialized region should have non-empty even-sized regions vector");
+ }
+
+ if (num) {
+ this->anchors.resize(num * 2);
+
+ for (int i = 0; i < num; ++i) {
+ this->anchors[i * 2] = anchors[masks[i] * 2];
+ this->anchors[i * 2 + 1] = anchors[masks[i] * 2 + 1];
+ }
+ } else {
+ this->anchors = anchors;
+ num = anchors.size() / 2;
+ }
+}
diff --git a/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp b/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp
new file mode 100644
index 0000000..132eb9e
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_yolov3_onnx.cpp
@@ -0,0 +1,188 @@
+/*
+// Copyright (C) 2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolov3_onnx.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+#include "utils/image_utils.h"
+
+ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName,
+ float confidenceThreshold,
+ const std::vector<std::string>& labels,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {
+ interpolationMode = cv::INTER_CUBIC;
+ resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
+ }
+
+
+void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare inputs ------------------------------------------------------
+ const ov::OutputVector& inputs = model->inputs();
+ if (inputs.size() != 2) {
+ throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ inputsNames.reserve(inputs.size());
+ for (auto& input : inputs) {
+ const ov::Shape& currentShape = input.get_shape();
+ std::string currentName = input.get_any_name();
+ const ov::Layout& currentLayout = getInputLayout(input);
+
+ if (currentShape.size() == 4) {
+ if (currentShape[ov::layout::channels_idx(currentLayout)] != 3) {
+ throw std::logic_error("Expected 4D image input with 3 channels");
+ }
+ inputsNames[0] = currentName;
+ netInputWidth = currentShape[ov::layout::width_idx(currentLayout)];
+ netInputHeight = currentShape[ov::layout::height_idx(currentLayout)];
+ ppp.input(currentName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+ } else if (currentShape.size() == 2) {
+ if (currentShape[ov::layout::channels_idx(currentLayout)] != 2) {
+ throw std::logic_error("Expected 2D image info input with 2 channels");
+ }
+ inputsNames[1] = currentName;
+ ppp.input(currentName).tensor().set_element_type(ov::element::i32);
+ }
+ ppp.input(currentName).model().set_layout(currentLayout);
+ }
+
+ // --------------------------- Prepare outputs -----------------------------------------------------
+ const ov::OutputVector& outputs = model->outputs();
+ if (outputs.size() != 3) {
+ throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs");
+ }
+
+ for (auto& output : outputs) {
+ const ov::Shape& currentShape = output.get_partial_shape().get_max_shape();
+ std::string currentName = output.get_any_name();
+ if (currentShape.back() == 3) {
+ indicesOutputName = currentName;
+ ppp.output(currentName).tensor().set_element_type(ov::element::i32);
+ } else if (currentShape[2] == 4) {
+ boxesOutputName = currentName;
+ ppp.output(currentName).tensor().set_element_type(ov::element::f32);
+ } else if (currentShape[1] == numberOfClasses) {
+ scoresOutputName = currentName;
+ ppp.output(currentName).tensor().set_element_type(ov::element::f32);
+ } else {
+ throw std::logic_error("Expected shapes [:,:,4], [:,"
+ + std::to_string(numberOfClasses) + ",:] and [:,3] for outputs");
+ }
+ outputsNames.push_back(currentName);
+ }
+ model = ppp.build();
+}
+
+std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData& inputData,
+ ov::InferRequest& request) {
+ const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+
+ cv::Mat info(cv::Size(1, 2), CV_32SC1);
+ info.at<int>(0, 0) = origImg.rows;
+ info.at<int>(0, 1) = origImg.cols;
+ auto allocator = std::make_shared<SharedTensorAllocator>(info);
+ ov::Tensor infoInput = ov::Tensor(ov::element::i32, ov::Shape({1, 2}), ov::Allocator(allocator));
+
+ request.set_tensor(inputsNames[1], infoInput);
+
+ return ImageModel::preprocess(inputData, request);
+}
+
+namespace {
+float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) {
+ const float* scoresPtr = scoresTensor.data<float>();
+ const auto shape = scoresTensor.get_shape();
+ int N = shape[2];
+
+ return scoresPtr[classInd * N + boxInd];
+}
+}
+
+std::unique_ptr<ResultBase> ModelYoloV3ONNX::postprocess(InferenceResult& infResult) {
+ // Get info about input image
+ const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
+ const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
+
+ // Get outputs tensors
+ const ov::Tensor& boxes = infResult.outputsData[boxesOutputName];
+ const float* boxesPtr = boxes.data<float>();
+
+ const ov::Tensor& scores = infResult.outputsData[scoresOutputName];
+ const ov::Tensor& indices = infResult.outputsData[indicesOutputName];
+
+ const int* indicesData = indices.data<int>();
+ const auto indicesShape = indices.get_shape();
+ const auto boxShape = boxes.get_shape();
+
+ // Generate detection results
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+ size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0];
+ int indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1];
+
+ for (size_t i = 0; i < numberOfBoxes; ++i) {
+ int batchInd = indicesData[i * indicesStride];
+ int classInd = indicesData[i * indicesStride + 1];
+ int boxInd = indicesData[i * indicesStride + 2];
+
+ if (batchInd == -1) {
+ break;
+ }
+
+ float score = getScore(scores, classInd, boxInd);
+
+ if (score > confidenceThreshold) {
+ DetectedObject obj;
+ size_t startPos = boxShape[2] * boxInd;
+
+ auto x = boxesPtr[startPos + 1];
+ auto y = boxesPtr[startPos];
+ auto width = boxesPtr[startPos + 3] - x;
+ auto height = boxesPtr[startPos + 2] - y;
+
+ // Create new detected box
+ obj.x = clamp(x, 0.f, static_cast<float>(imgWidth));
+ obj.y = clamp(y, 0.f, static_cast<float>(imgHeight));
+ obj.height = clamp(height, 0.f, static_cast<float>(imgHeight));
+ obj.width = clamp(width, 0.f, static_cast<float>(imgWidth));
+ obj.confidence = score;
+ obj.labelID = classInd;
+ obj.label = getLabelName(classInd);
+
+ result->objects.push_back(obj);
+
+ }
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/detection_model_yolox.cpp b/python/openvino/runtime/common/models/src/detection_model_yolox.cpp
new file mode 100644
index 0000000..1e434ff
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/detection_model_yolox.cpp
@@ -0,0 +1,194 @@
+/*
+// Copyright (C) 2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/detection_model_yolox.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+#include "utils/image_utils.h"
+#include "utils/nms.hpp"
+
+ModelYoloX::ModelYoloX(const std::string& modelFileName,
+ float confidenceThreshold,
+ float boxIOUThreshold,
+ const std::vector<std::string>& labels,
+ const std::string& layout)
+ : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout),
+ boxIOUThreshold(boxIOUThreshold) {
+ resizeMode = RESIZE_KEEP_ASPECT;
+}
+
+void ModelYoloX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ const ov::OutputVector& inputs = model->inputs();
+ if (inputs.size() != 1) {
+ throw std::logic_error("YOLOX model wrapper accepts models that have only 1 input");
+ }
+
+ //--- Check image input
+ const auto& input = model->input();
+ const ov::Shape& inputShape = model->input().get_shape();
+ ov::Layout inputLayout = getInputLayout(input);
+
+ if (inputShape.size() != 4 && inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("Expected 4D image input with 3 channels");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+ ppp.input().model().set_layout(inputLayout);
+
+ //--- Reading image input parameters
+ inputsNames.push_back(input.get_any_name());
+ netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+ netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+ setStridesGrids();
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ if (model->outputs().size() != 1) {
+ throw std::logic_error("YoloX model wrapper expects models that have only 1 output");
+ }
+ const auto& output = model->output();
+ outputsNames.push_back(output.get_any_name());
+ const ov::Shape& shape = output.get_shape();
+
+ if (shape.size() != 3) {
+ throw std::logic_error("YOLOX single output must have 3 dimensions, but had " + std::to_string(shape.size()));
+ }
+ ppp.output().tensor().set_element_type(ov::element::f32);
+
+ model = ppp.build();
+}
+
+void ModelYoloX::setStridesGrids() {
+ std::vector<size_t> strides = {8, 16, 32};
+ std::vector<size_t> hsizes(3);
+ std::vector<size_t> wsizes(3);
+
+ for (size_t i = 0; i < strides.size(); ++i) {
+ hsizes[i] = netInputHeight / strides[i];
+ wsizes[i] = netInputWidth / strides[i];
+ }
+
+ for (size_t size_index = 0; size_index < hsizes.size(); ++size_index) {
+ for (size_t h_index = 0; h_index < hsizes[size_index]; ++h_index) {
+ for (size_t w_index = 0; w_index < wsizes[size_index]; ++w_index) {
+ grids.emplace_back(w_index, h_index);
+ expandedStrides.push_back(strides[size_index]);
+ }
+ }
+ }
+}
+
+std::shared_ptr<InternalModelData> ModelYoloX::preprocess(const InputData& inputData,
+ ov::InferRequest& request) {
+ const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+ double scale = std::min(static_cast<double>(netInputWidth) / origImg.cols,
+ static_cast<double>(netInputHeight) / origImg.rows);
+
+ cv::Mat resizedImage = resizeImageExt(origImg, netInputWidth, netInputHeight, resizeMode,
+ interpolationMode, nullptr, cv::Scalar(114, 114, 114));
+
+ request.set_input_tensor(wrapMat2Tensor(resizedImage));
+ return std::make_shared<InternalScaleData>(origImg.cols, origImg.rows, scale, scale);
+}
+
+std::unique_ptr<ResultBase> ModelYoloX::postprocess(InferenceResult& infResult) {
+ // Get metadata about input image shape and scale
+ const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
+
+ // Get output tensor
+ const ov::Tensor& output = infResult.outputsData[outputsNames[0]];
+ const auto& outputShape = output.get_shape();
+ float* outputPtr = output.data<float>();
+
+ // Generate detection results
+ DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
+
+ // Update coordinates according to strides
+ for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) {
+ size_t startPos = outputShape[2] * box_index;
+ outputPtr[startPos] = (outputPtr[startPos] + grids[box_index].first) * expandedStrides[box_index];
+ outputPtr[startPos + 1] = (outputPtr[startPos + 1] + grids[box_index].second) * expandedStrides[box_index];
+ outputPtr[startPos + 2] = std::exp(outputPtr[startPos + 2]) * expandedStrides[box_index];
+ outputPtr[startPos + 3] = std::exp(outputPtr[startPos + 3]) * expandedStrides[box_index];
+ }
+
+ // Filter predictions
+ std::vector<Anchor> validBoxes;
+ std::vector<float> scores;
+ std::vector<size_t> classes;
+ for (size_t box_index = 0; box_index < expandedStrides.size(); ++box_index) {
+ size_t startPos = outputShape[2] * box_index;
+ float score = outputPtr[startPos + 4];
+ if (score < confidenceThreshold)
+ continue;
+ float maxClassScore = -1;
+ size_t mainClass = 0;
+ for (size_t class_index = 0; class_index < numberOfClasses; ++class_index) {
+ if (outputPtr[startPos + 5 + class_index] > maxClassScore) {
+ maxClassScore = outputPtr[startPos + 5 + class_index];
+ mainClass = class_index;
+ }
+ }
+
+ // Filter by score
+ score *= maxClassScore;
+ if (score < confidenceThreshold)
+ continue;
+
+ // Add successful boxes
+ scores.push_back(score);
+ classes.push_back(mainClass);
+ Anchor trueBox = {outputPtr[startPos + 0] - outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] - outputPtr[startPos + 3] / 2,
+ outputPtr[startPos + 0] + outputPtr[startPos + 2] / 2, outputPtr[startPos + 1] + outputPtr[startPos + 3] / 2};
+ validBoxes.push_back(Anchor({trueBox.left / scale.scaleX, trueBox.top / scale.scaleY,
+ trueBox.right / scale.scaleX, trueBox.bottom / scale.scaleY}));
+ }
+
+ // NMS for valid boxes
+ std::vector<int> keep = nms(validBoxes, scores, boxIOUThreshold, true);
+ for (auto& index: keep) {
+ // Create new detected box
+ DetectedObject obj;
+ obj.x = clamp(validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
+ obj.y = clamp(validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight));
+ obj.height = clamp(validBoxes[index].bottom - validBoxes[index].top, 0.f, static_cast<float>(scale.inputImgHeight));
+ obj.width = clamp(validBoxes[index].right - validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
+ obj.confidence = scores[index];
+ obj.labelID = classes[index];
+ obj.label = getLabelName(classes[index]);
+ result->objects.push_back(obj);
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/hpe_model_associative_embedding.cpp b/python/openvino/runtime/common/models/src/hpe_model_associative_embedding.cpp
new file mode 100644
index 0000000..33a3604
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/hpe_model_associative_embedding.cpp
@@ -0,0 +1,264 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/hpe_model_associative_embedding.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/associative_embedding_decoder.h"
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+const cv::Vec3f HpeAssociativeEmbedding::meanPixel = cv::Vec3f::all(128);
+const float HpeAssociativeEmbedding::detectionThreshold = 0.1f;
+const float HpeAssociativeEmbedding::tagThreshold = 1.0f;
+
+HpeAssociativeEmbedding::HpeAssociativeEmbedding(const std::string& modelFileName,
+ double aspectRatio,
+ int targetSize,
+ float confidenceThreshold,
+ const std::string& layout,
+ float delta,
+ RESIZE_MODE resizeMode)
+ : ImageModel(modelFileName, false, layout),
+ aspectRatio(aspectRatio),
+ targetSize(targetSize),
+ confidenceThreshold(confidenceThreshold),
+ delta(delta) {
+ resizeMode = resizeMode;
+ interpolationMode = cv::INTER_CUBIC;
+ }
+
+void HpeAssociativeEmbedding::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input Tensors ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("HPE AE model wrapper supports topologies with only 1 input.");
+ }
+ inputsNames.push_back(model->input().get_any_name());
+
+ const ov::Shape& inputShape = model->input().get_shape();
+ const ov::Layout& inputLayout = getInputLayout(model->input());
+
+ if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+ inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("3-channel 4-dimensional model's input is expected");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Prepare output Tensors -----------------------------------------------------
+ const ov::OutputVector& outputs = model->outputs();
+ if (outputs.size() != 2 && outputs.size() != 3) {
+ throw std::logic_error("HPE AE model model wrapper supports topologies only with 2 or 3 outputs");
+ }
+
+ for (const auto& output : model->outputs()) {
+ const auto& outTensorName = output.get_any_name();
+ ppp.output(outTensorName).tensor().set_element_type(ov::element::f32);
+
+ for (const auto& name : output.get_names()) {
+ outputsNames.push_back(name);
+ }
+
+ const ov::Shape& outputShape = output.get_shape();
+ if (outputShape.size() != 4 && outputShape.size() != 5) {
+ throw std::logic_error("output tensors are expected to be 4-dimensional or 5-dimensional");
+ }
+ if (outputShape[ov::layout::batch_idx("NC...")] != 1 || outputShape[ov::layout::channels_idx("NC...")] != 17) {
+ throw std::logic_error("output tensors are expected to have 1 batch size and 17 channels");
+ }
+ }
+ model = ppp.build();
+
+ embeddingsTensorName = findTensorByName("embeddings", outputsNames);
+ heatmapsTensorName = findTensorByName("heatmaps", outputsNames);
+ try {
+ nmsHeatmapsTensorName = findTensorByName("nms_heatmaps", outputsNames);
+ } catch (const std::runtime_error&) { nmsHeatmapsTensorName = heatmapsTensorName; }
+
+ changeInputSize(model);
+}
+
+void HpeAssociativeEmbedding::changeInputSize(std::shared_ptr<ov::Model>& model) {
+ ov::Shape inputShape = model->input().get_shape();
+ const ov::Layout& layout = ov::layout::get_layout(model->input());
+ const auto batchId = ov::layout::batch_idx(layout);
+ const auto heightId = ov::layout::height_idx(layout);
+ const auto widthId = ov::layout::width_idx(layout);
+
+ if (!targetSize) {
+ targetSize = static_cast<int>(std::min(inputShape[heightId], inputShape[widthId]));
+ }
+ int inputHeight = aspectRatio >= 1.0 ? targetSize : static_cast<int>(std::round(targetSize / aspectRatio));
+ int inputWidth = aspectRatio >= 1.0 ? static_cast<int>(std::round(targetSize * aspectRatio)) : targetSize;
+ int height = static_cast<int>((inputHeight + stride - 1) / stride) * stride;
+ int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride;
+ inputShape[batchId] = 1;
+ inputShape[heightId] = height;
+ inputShape[widthId] = width;
+ inputLayerSize = cv::Size(width, height);
+
+ model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> HpeAssociativeEmbedding::preprocess(const InputData& inputData,
+ ov::InferRequest& request) {
+ auto& image = inputData.asRef<ImageInputData>().inputImage;
+ cv::Rect roi;
+ auto paddedImage = resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi);
+ if (inputLayerSize.height - stride >= roi.height || inputLayerSize.width - stride >= roi.width) {
+ slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+ }
+ request.set_input_tensor(wrapMat2Tensor(paddedImage));
+
+ return std::make_shared<InternalScaleData>(paddedImage.cols,
+ paddedImage.rows,
+ image.size().width / static_cast<float>(roi.width),
+ image.size().height / static_cast<float>(roi.height));
+}
+
+std::unique_ptr<ResultBase> HpeAssociativeEmbedding::postprocess(InferenceResult& infResult) {
+ HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData);
+
+ const auto& aembds = infResult.outputsData[embeddingsTensorName];
+ const ov::Shape& aembdsShape = aembds.get_shape();
+ float* const aembdsMapped = aembds.data<float>();
+ std::vector<cv::Mat> aembdsMaps = split(aembdsMapped, aembdsShape);
+
+ const auto& heats = infResult.outputsData[heatmapsTensorName];
+ const ov::Shape& heatMapsShape = heats.get_shape();
+ float* const heatMapsMapped = heats.data<float>();
+ std::vector<cv::Mat> heatMaps = split(heatMapsMapped, heatMapsShape);
+
+ std::vector<cv::Mat> nmsHeatMaps = heatMaps;
+ if (nmsHeatmapsTensorName != heatmapsTensorName) {
+ const auto& nmsHeats = infResult.outputsData[nmsHeatmapsTensorName];
+ const ov::Shape& nmsHeatMapsShape = nmsHeats.get_shape();
+ float* const nmsHeatMapsMapped = nmsHeats.data<float>();
+ nmsHeatMaps = split(nmsHeatMapsMapped, nmsHeatMapsShape);
+ }
+ std::vector<HumanPose> poses = extractPoses(heatMaps, aembdsMaps, nmsHeatMaps);
+
+ // Rescale poses to the original image
+ const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
+ const float outputScale = inputLayerSize.width / static_cast<float>(heatMapsShape[3]);
+ float shiftX = 0.0, shiftY = 0.0;
+ float scaleX = 1.0, scaleY = 1.0;
+
+ if (resizeMode == RESIZE_KEEP_ASPECT_LETTERBOX) {
+ scaleX = scaleY = std::min(scale.scaleX, scale.scaleY);
+ if (aspectRatio >= 1.0)
+ shiftX = static_cast<float>((targetSize * scaleX * aspectRatio - scale.inputImgWidth * scaleX) / 2);
+ else
+ shiftY = static_cast<float>((targetSize * scaleY / aspectRatio - scale.inputImgHeight * scaleY) / 2);
+ scaleX = scaleY *= outputScale;
+ } else {
+ scaleX = scale.scaleX * outputScale;
+ scaleY = scale.scaleY * outputScale;
+ }
+
+ for (auto& pose : poses) {
+ for (auto& keypoint : pose.keypoints) {
+ if (keypoint != cv::Point2f(-1, -1)) {
+ keypoint.x = keypoint.x * scaleX + shiftX;
+ keypoint.y = keypoint.y * scaleY + shiftY;
+ }
+ }
+ result->poses.push_back(pose);
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}
+
+std::string HpeAssociativeEmbedding::findTensorByName(const std::string& tensorName,
+ const std::vector<std::string>& outputsNames) {
+ std::vector<std::string> suitableLayers;
+ for (auto& outputName : outputsNames) {
+ if (outputName.rfind(tensorName, 0) == 0) {
+ suitableLayers.push_back(outputName);
+ }
+ }
+ if (suitableLayers.empty()) {
+ throw std::runtime_error("Suitable tensor for " + tensorName + " output is not found");
+ } else if (suitableLayers.size() > 1) {
+ throw std::runtime_error("More than 1 tensor matched to " + tensorName + " output");
+ }
+ return suitableLayers[0];
+}
+
+std::vector<cv::Mat> HpeAssociativeEmbedding::split(float* data, const ov::Shape& shape) {
+ std::vector<cv::Mat> flattenData(shape[1]);
+ for (size_t i = 0; i < flattenData.size(); i++) {
+ flattenData[i] = cv::Mat(shape[2], shape[3], CV_32FC1, data + i * shape[2] * shape[3]);
+ }
+ return flattenData;
+}
+
+std::vector<HumanPose> HpeAssociativeEmbedding::extractPoses(std::vector<cv::Mat>& heatMaps,
+ const std::vector<cv::Mat>& aembdsMaps,
+ const std::vector<cv::Mat>& nmsHeatMaps) const {
+ std::vector<std::vector<Peak>> allPeaks(numJoints);
+ for (int i = 0; i < numJoints; i++) {
+ findPeaks(nmsHeatMaps, aembdsMaps, allPeaks, i, maxNumPeople, detectionThreshold);
+ }
+ std::vector<Pose> allPoses = matchByTag(allPeaks, maxNumPeople, numJoints, tagThreshold);
+ // swap for all poses
+ for (auto& pose : allPoses) {
+ for (size_t j = 0; j < numJoints; j++) {
+ Peak& peak = pose.getPeak(j);
+ std::swap(peak.keypoint.x, peak.keypoint.y);
+ }
+ }
+ std::vector<HumanPose> poses;
+ for (size_t i = 0; i < allPoses.size(); i++) {
+ Pose& pose = allPoses[i];
+ // Filtering poses with low mean scores
+ if (pose.getMeanScore() <= confidenceThreshold) {
+ continue;
+ }
+ for (size_t j = 0; j < heatMaps.size(); j++) {
+ heatMaps[j] = cv::abs(heatMaps[j]);
+ }
+ adjustAndRefine(allPoses, heatMaps, aembdsMaps, i, delta);
+ std::vector<cv::Point2f> keypoints;
+ for (size_t j = 0; j < numJoints; j++) {
+ Peak& peak = pose.getPeak(j);
+ keypoints.push_back(peak.keypoint);
+ }
+ poses.push_back({keypoints, pose.getMeanScore()});
+ }
+ return poses;
+}
diff --git a/python/openvino/runtime/common/models/src/hpe_model_openpose.cpp b/python/openvino/runtime/common/models/src/hpe_model_openpose.cpp
new file mode 100644
index 0000000..d8b4cb6
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/hpe_model_openpose.cpp
@@ -0,0 +1,256 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/hpe_model_openpose.h"
+
+#include <algorithm>
+#include <cmath>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/openpose_decoder.h"
+#include "models/results.h"
+
+const cv::Vec3f HPEOpenPose::meanPixel = cv::Vec3f::all(128);
+const float HPEOpenPose::minPeaksDistance = 3.0f;
+const float HPEOpenPose::midPointsScoreThreshold = 0.05f;
+const float HPEOpenPose::foundMidPointsRatioThreshold = 0.8f;
+const float HPEOpenPose::minSubsetScore = 0.2f;
+
+HPEOpenPose::HPEOpenPose(const std::string& modelFileName,
+ double aspectRatio,
+ int targetSize,
+ float confidenceThreshold,
+ const std::string& layout)
+ : ImageModel(modelFileName, false, layout),
+ aspectRatio(aspectRatio),
+ targetSize(targetSize),
+ confidenceThreshold(confidenceThreshold) {
+ resizeMode = RESIZE_KEEP_ASPECT;
+ interpolationMode = cv::INTER_CUBIC;
+ }
+
+void HPEOpenPose::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("HPE OpenPose model wrapper supports topologies with only 1 input");
+ }
+ inputsNames.push_back(model->input().get_any_name());
+ const ov::Shape& inputShape = model->input().get_shape();
+ const ov::Layout& inputLayout = getInputLayout(model->input());
+
+ if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+ inputShape[ov::layout::channels_idx(inputLayout)] != 3)
+ throw std::logic_error("3-channel 4-dimensional model's input is expected");
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ const ov::OutputVector& outputs = model->outputs();
+ if (outputs.size() != 2) {
+ throw std::runtime_error("HPE OpenPose supports topologies with only 2 outputs");
+ }
+
+ const ov::Layout outputLayout("NCHW");
+ for (const auto& output : model->outputs()) {
+ const auto& outTensorName = output.get_any_name();
+ ppp.output(outTensorName).tensor().set_element_type(ov::element::f32).set_layout(outputLayout);
+ outputsNames.push_back(outTensorName);
+ }
+ model = ppp.build();
+
+ const size_t batchId = ov::layout::batch_idx(outputLayout);
+ const size_t channelsId = ov::layout::channels_idx(outputLayout);
+ const size_t widthId = ov::layout::width_idx(outputLayout);
+ const size_t heightId = ov::layout::height_idx(outputLayout);
+
+ ov::Shape heatmapsOutputShape = model->outputs().front().get_shape();
+ ov::Shape pafsOutputShape = model->outputs().back().get_shape();
+ if (heatmapsOutputShape[channelsId] > pafsOutputShape[channelsId]) {
+ std::swap(heatmapsOutputShape, pafsOutputShape);
+ std::swap(outputsNames[0], outputsNames[1]);
+ }
+
+ if (heatmapsOutputShape.size() != 4 || heatmapsOutputShape[batchId] != 1 ||
+ heatmapsOutputShape[ov::layout::channels_idx(outputLayout)] != keypointsNumber + 1) {
+ throw std::logic_error("1x" + std::to_string(keypointsNumber + 1) +
+ "xHFMxWFM dimension of model's heatmap is expected");
+ }
+ if (pafsOutputShape.size() != 4 || pafsOutputShape[batchId] != 1 ||
+ pafsOutputShape[channelsId] != 2 * (keypointsNumber + 1)) {
+ throw std::logic_error("1x" + std::to_string(2 * (keypointsNumber + 1)) +
+ "xHFMxWFM dimension of model's output is expected");
+ }
+ if (pafsOutputShape[heightId] != heatmapsOutputShape[heightId] ||
+ pafsOutputShape[widthId] != heatmapsOutputShape[widthId]) {
+ throw std::logic_error("output and heatmap are expected to have matching last two dimensions");
+ }
+
+ changeInputSize(model);
+}
+
+void HPEOpenPose::changeInputSize(std::shared_ptr<ov::Model>& model) {
+ ov::Shape inputShape = model->input().get_shape();
+ const ov::Layout& layout = ov::layout::get_layout(model->inputs().front());
+ const auto batchId = ov::layout::batch_idx(layout);
+ const auto heightId = ov::layout::height_idx(layout);
+ const auto widthId = ov::layout::width_idx(layout);
+
+ if (!targetSize) {
+ targetSize = inputShape[heightId];
+ }
+ int height = static_cast<int>((targetSize + stride - 1) / stride) * stride;
+ int inputWidth = static_cast<int>(std::round(targetSize * aspectRatio));
+ int width = static_cast<int>((inputWidth + stride - 1) / stride) * stride;
+ inputShape[batchId] = 1;
+ inputShape[heightId] = height;
+ inputShape[widthId] = width;
+ inputLayerSize = cv::Size(width, height);
+ model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> HPEOpenPose::preprocess(const InputData& inputData, ov::InferRequest& request) {
+ auto& image = inputData.asRef<ImageInputData>().inputImage;
+ cv::Rect roi;
+ auto paddedImage =
+ resizeImageExt(image, inputLayerSize.width, inputLayerSize.height, resizeMode, interpolationMode, &roi);
+ if (inputLayerSize.width < roi.width)
+ throw std::runtime_error("The image aspect ratio doesn't fit current model shape");
+
+ if (inputLayerSize.width - stride >= roi.width) {
+ slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+ }
+
+ request.set_input_tensor(wrapMat2Tensor(paddedImage));
+ return std::make_shared<InternalScaleData>(paddedImage.cols,
+ paddedImage.rows,
+ image.cols / static_cast<float>(roi.width),
+ image.rows / static_cast<float>(roi.height));
+}
+
+std::unique_ptr<ResultBase> HPEOpenPose::postprocess(InferenceResult& infResult) {
+ HumanPoseResult* result = new HumanPoseResult(infResult.frameId, infResult.metaData);
+
+ const auto& heatMapsMapped = infResult.outputsData[outputsNames[0]];
+ const auto& outputMapped = infResult.outputsData[outputsNames[1]];
+
+ const ov::Shape& outputShape = outputMapped.get_shape();
+ const ov::Shape& heatMapShape = heatMapsMapped.get_shape();
+
+ float* const predictions = outputMapped.data<float>();
+ float* const heats = heatMapsMapped.data<float>();
+
+ std::vector<cv::Mat> heatMaps(keypointsNumber);
+ for (size_t i = 0; i < heatMaps.size(); i++) {
+ heatMaps[i] =
+ cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, heats + i * heatMapShape[2] * heatMapShape[3]);
+ }
+ resizeFeatureMaps(heatMaps);
+
+ std::vector<cv::Mat> pafs(outputShape[1]);
+ for (size_t i = 0; i < pafs.size(); i++) {
+ pafs[i] =
+ cv::Mat(heatMapShape[2], heatMapShape[3], CV_32FC1, predictions + i * heatMapShape[2] * heatMapShape[3]);
+ }
+ resizeFeatureMaps(pafs);
+
+ std::vector<HumanPose> poses = extractPoses(heatMaps, pafs);
+
+ const auto& scale = infResult.internalModelData->asRef<InternalScaleData>();
+ float scaleX = stride / upsampleRatio * scale.scaleX;
+ float scaleY = stride / upsampleRatio * scale.scaleY;
+ for (auto& pose : poses) {
+ for (auto& keypoint : pose.keypoints) {
+ if (keypoint != cv::Point2f(-1, -1)) {
+ keypoint.x *= scaleX;
+ keypoint.y *= scaleY;
+ }
+ }
+ }
+ for (size_t i = 0; i < poses.size(); ++i) {
+ result->poses.push_back(poses[i]);
+ }
+
+ return std::unique_ptr<ResultBase>(result);
+}
+
+void HPEOpenPose::resizeFeatureMaps(std::vector<cv::Mat>& featureMaps) const {
+ for (auto& featureMap : featureMaps) {
+ cv::resize(featureMap, featureMap, cv::Size(), upsampleRatio, upsampleRatio, cv::INTER_CUBIC);
+ }
+}
+
+class FindPeaksBody : public cv::ParallelLoopBody {
+public:
+ FindPeaksBody(const std::vector<cv::Mat>& heatMaps,
+ float minPeaksDistance,
+ std::vector<std::vector<Peak>>& peaksFromHeatMap,
+ float confidenceThreshold)
+ : heatMaps(heatMaps),
+ minPeaksDistance(minPeaksDistance),
+ peaksFromHeatMap(peaksFromHeatMap),
+ confidenceThreshold(confidenceThreshold) {}
+
+ void operator()(const cv::Range& range) const override {
+ for (int i = range.start; i < range.end; i++) {
+ findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i, confidenceThreshold);
+ }
+ }
+
+private:
+ const std::vector<cv::Mat>& heatMaps;
+ float minPeaksDistance;
+ std::vector<std::vector<Peak>>& peaksFromHeatMap;
+ float confidenceThreshold;
+};
+
+std::vector<HumanPose> HPEOpenPose::extractPoses(const std::vector<cv::Mat>& heatMaps,
+ const std::vector<cv::Mat>& pafs) const {
+ std::vector<std::vector<Peak>> peaksFromHeatMap(heatMaps.size());
+ FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance, peaksFromHeatMap, confidenceThreshold);
+ cv::parallel_for_(cv::Range(0, static_cast<int>(heatMaps.size())), findPeaksBody);
+ int peaksBefore = 0;
+ for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) {
+ peaksBefore += static_cast<int>(peaksFromHeatMap[heatmapId - 1].size());
+ for (auto& peak : peaksFromHeatMap[heatmapId]) {
+ peak.id += peaksBefore;
+ }
+ }
+ std::vector<HumanPose> poses = groupPeaksToPoses(peaksFromHeatMap,
+ pafs,
+ keypointsNumber,
+ midPointsScoreThreshold,
+ foundMidPointsRatioThreshold,
+ minJointsNumber,
+ minSubsetScore);
+ return poses;
+}
diff --git a/python/openvino/runtime/common/models/src/image_model.cpp b/python/openvino/runtime/common/models/src/image_model.cpp
new file mode 100644
index 0000000..511faf3
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/image_model.cpp
@@ -0,0 +1,57 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/image_model.h"
+
+#include <stdexcept>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+
+ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout)
+ : ModelBase(modelFileName, layout),
+ useAutoResize(useAutoResize) {}
+
+std::shared_ptr<InternalModelData> ImageModel::preprocess(const InputData& inputData, ov::InferRequest& request) {
+ const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
+ auto img = inputTransform(origImg);
+
+ if (!useAutoResize) {
+ // /* Resize and copy data from the image to the input tensor */
+ const ov::Tensor& frameTensor = request.get_tensor(inputsNames[0]); // first input should be image
+ const ov::Shape& tensorShape = frameTensor.get_shape();
+ const ov::Layout layout("NHWC");
+ const size_t width = tensorShape[ov::layout::width_idx(layout)];
+ const size_t height = tensorShape[ov::layout::height_idx(layout)];
+ const size_t channels = tensorShape[ov::layout::channels_idx(layout)];
+ if (static_cast<size_t>(img.channels()) != channels) {
+ throw std::runtime_error("The number of channels for model input and image must match");
+ }
+ if (channels != 1 && channels != 3) {
+ throw std::runtime_error("Unsupported number of channels");
+ }
+ img = resizeImageExt(img, width, height, resizeMode, interpolationMode);
+ }
+ request.set_tensor(inputsNames[0], wrapMat2Tensor(img));
+ return std::make_shared<InternalImageModelData>(origImg.cols, origImg.rows);
+}
diff --git a/python/openvino/runtime/common/models/src/jpeg_restoration_model.cpp b/python/openvino/runtime/common/models/src/jpeg_restoration_model.cpp
new file mode 100644
index 0000000..8eb3ae1
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/jpeg_restoration_model.cpp
@@ -0,0 +1,167 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/image_model.h"
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/jpeg_restoration_model.h"
+#include "models/results.h"
+
+JPEGRestorationModel::JPEGRestorationModel(const std::string& modelFileName,
+ const cv::Size& inputImgSize,
+ bool _jpegCompression,
+ const std::string& layout)
+ : ImageModel(modelFileName, false, layout) {
+ netInputHeight = inputImgSize.height;
+ netInputWidth = inputImgSize.width;
+ jpegCompression = _jpegCompression;
+}
+
+void JPEGRestorationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output -------------------------------------------------
+ // --------------------------- Prepare input ------------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 input");
+ }
+ inputsNames.push_back(model->input().get_any_name());
+
+ const ov::Shape& inputShape = model->input().get_shape();
+ const ov::Layout& inputLayout = getInputLayout(model->input());
+
+ if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+ inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("3-channel 4-dimensional model's input is expected");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC");
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ const ov::OutputVector& outputs = model->outputs();
+ if (outputs.size() != 1) {
+ throw std::logic_error("The JPEG Restoration model wrapper supports topologies with only 1 output");
+ }
+ const ov::Shape& outputShape = model->output().get_shape();
+ const ov::Layout outputLayout{"NCHW"};
+ if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 ||
+ outputShape[ov::layout::channels_idx(outputLayout)] != 3) {
+ throw std::logic_error("3-channel 4-dimensional model's output is expected");
+ }
+
+ outputsNames.push_back(model->output().get_any_name());
+ ppp.output().tensor().set_element_type(ov::element::f32);
+ model = ppp.build();
+
+ changeInputSize(model);
+}
+
+void JPEGRestorationModel::changeInputSize(std::shared_ptr<ov::Model>& model) {
+ ov::Shape inputShape = model->input().get_shape();
+ const ov::Layout& layout = ov::layout::get_layout(model->input());
+
+ const auto batchId = ov::layout::batch_idx(layout);
+ const auto heightId = ov::layout::height_idx(layout);
+ const auto widthId = ov::layout::width_idx(layout);
+
+ if (inputShape[heightId] % stride || inputShape[widthId] % stride) {
+ throw std::logic_error("The shape of the model input must be divisible by stride");
+ }
+
+ netInputHeight = static_cast<int>((netInputHeight + stride - 1) / stride) * stride;
+ netInputWidth = static_cast<int>((netInputWidth + stride - 1) / stride) * stride;
+
+ inputShape[batchId] = 1;
+ inputShape[heightId] = netInputHeight;
+ inputShape[widthId] = netInputWidth;
+
+ model->reshape(inputShape);
+}
+
+std::shared_ptr<InternalModelData> JPEGRestorationModel::preprocess(const InputData& inputData,
+ ov::InferRequest& request) {
+ cv::Mat image = inputData.asRef<ImageInputData>().inputImage;
+ const size_t h = image.rows;
+ const size_t w = image.cols;
+ cv::Mat resizedImage;
+ if (jpegCompression) {
+ std::vector<uchar> encimg;
+ std::vector<int> params{cv::IMWRITE_JPEG_QUALITY, 40};
+ cv::imencode(".jpg", image, encimg, params);
+ image = cv::imdecode(cv::Mat(encimg), 3);
+ }
+
+ if (netInputHeight - stride < h && h <= netInputHeight && netInputWidth - stride < w && w <= netInputWidth) {
+ int bottom = netInputHeight - h;
+ int right = netInputWidth - w;
+ cv::copyMakeBorder(image, resizedImage, 0, bottom, 0, right, cv::BORDER_CONSTANT, 0);
+ } else {
+ slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+ cv::resize(image, resizedImage, cv::Size(netInputWidth, netInputHeight));
+ }
+ request.set_input_tensor(wrapMat2Tensor(resizedImage));
+
+ return std::make_shared<InternalImageModelData>(image.cols, image.rows);
+}
+
+std::unique_ptr<ResultBase> JPEGRestorationModel::postprocess(InferenceResult& infResult) {
+ ImageResult* result = new ImageResult;
+ *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+
+ const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+ const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+ std::vector<cv::Mat> imgPlanes;
+ const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape();
+ const size_t outHeight = static_cast<int>(outputShape[2]);
+ const size_t outWidth = static_cast<int>(outputShape[3]);
+ const size_t numOfPixels = outWidth * outHeight;
+ imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
+ cv::Mat resultImg;
+ cv::merge(imgPlanes, resultImg);
+
+ if (netInputHeight - stride < static_cast<size_t>(inputImgSize.inputImgHeight) &&
+ static_cast<size_t>(inputImgSize.inputImgHeight) <= netInputHeight &&
+ netInputWidth - stride < static_cast<size_t>(inputImgSize.inputImgWidth) &&
+ static_cast<size_t>(inputImgSize.inputImgWidth) <= netInputWidth) {
+ result->resultImage = resultImg(cv::Rect(0, 0, inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+ } else {
+ cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+ }
+
+ result->resultImage.convertTo(result->resultImage, CV_8UC3, 255);
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/model_base.cpp b/python/openvino/runtime/common/models/src/model_base.cpp
new file mode 100644
index 0000000..c2ebd1b
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/model_base.cpp
@@ -0,0 +1,67 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/model_base.h"
+
+#include <utility>
+
+#include <openvino/openvino.hpp>
+
+#include <utils/common.hpp>
+#include <utils/config_factory.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+std::shared_ptr<ov::Model> ModelBase::prepareModel(ov::Core& core) {
+ // --------------------------- Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
+ /** Read model **/
+ slog::info << "Reading model " << modelFileName << slog::endl;
+ std::shared_ptr<ov::Model> model = core.read_model(modelFileName);
+ logBasicModelInfo(model);
+ // -------------------------- Reading all outputs names and customizing I/O tensors (in inherited classes)
+ prepareInputsOutputs(model);
+
+ /** Set batch size to 1 **/
+ ov::set_batch(model, 1);
+
+ return model;
+}
+
+ov::CompiledModel ModelBase::compileModel(const ModelConfig& config, ov::Core& core) {
+ this->config = config;
+ auto model = prepareModel(core);
+ compiledModel = core.compile_model(model, config.deviceName, config.compiledModelConfig);
+ logCompiledModelInfo(compiledModel, modelFileName, config.deviceName);
+ return compiledModel;
+}
+
+ov::Layout ModelBase::getInputLayout(const ov::Output<ov::Node>& input) {
+ const ov::Shape& inputShape = input.get_shape();
+ ov::Layout layout = ov::layout::get_layout(input);
+ if (layout.empty()) {
+ if (inputsLayouts.empty()) {
+ layout = getLayoutFromShape(inputShape);
+ slog::warn << "Automatically detected layout '" << layout.to_string() << "' for input '"
+ << input.get_any_name() << "' will be used." << slog::endl;
+ } else if (inputsLayouts.size() == 1) {
+ layout = inputsLayouts.begin()->second;
+ } else {
+ layout = inputsLayouts[input.get_any_name()];
+ }
+ }
+
+ return layout;
+}
diff --git a/python/openvino/runtime/common/models/src/openpose_decoder.cpp b/python/openvino/runtime/common/models/src/openpose_decoder.cpp
new file mode 100644
index 0000000..6d51607
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/openpose_decoder.cpp
@@ -0,0 +1,345 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/openpose_decoder.h"
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include <utils/common.hpp>
+
+#include "models/results.h"
+
+Peak::Peak(const int id, const cv::Point2f& pos, const float score) : id(id), pos(pos), score(score) {}
+
+HumanPoseByPeaksIndices::HumanPoseByPeaksIndices(const int keypointsNumber)
+ : peaksIndices(std::vector<int>(keypointsNumber, -1)),
+ nJoints(0),
+ score(0.0f) {}
+
+TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, const int secondJointIdx, const float score)
+ : firstJointIdx(firstJointIdx),
+ secondJointIdx(secondJointIdx),
+ score(score) {}
+
+void findPeaks(const std::vector<cv::Mat>& heatMaps,
+ const float minPeaksDistance,
+ std::vector<std::vector<Peak>>& allPeaks,
+ int heatMapId,
+ float confidenceThreshold) {
+ std::vector<cv::Point> peaks;
+ const cv::Mat& heatMap = heatMaps[heatMapId];
+ const float* heatMapData = heatMap.ptr<float>();
+ size_t heatMapStep = heatMap.step1();
+ for (int y = -1; y < heatMap.rows + 1; y++) {
+ for (int x = -1; x < heatMap.cols + 1; x++) {
+ float val = 0;
+ if (x >= 0 && y >= 0 && x < heatMap.cols && y < heatMap.rows) {
+ val = heatMapData[y * heatMapStep + x];
+ val = val >= confidenceThreshold ? val : 0;
+ }
+
+ float left_val = 0;
+ if (y >= 0 && x < (heatMap.cols - 1) && y < heatMap.rows) {
+ left_val = heatMapData[y * heatMapStep + x + 1];
+ left_val = left_val >= confidenceThreshold ? left_val : 0;
+ }
+
+ float right_val = 0;
+ if (x > 0 && y >= 0 && y < heatMap.rows) {
+ right_val = heatMapData[y * heatMapStep + x - 1];
+ right_val = right_val >= confidenceThreshold ? right_val : 0;
+ }
+
+ float top_val = 0;
+ if (x >= 0 && x < heatMap.cols && y < (heatMap.rows - 1)) {
+ top_val = heatMapData[(y + 1) * heatMapStep + x];
+ top_val = top_val >= confidenceThreshold ? top_val : 0;
+ }
+
+ float bottom_val = 0;
+ if (x >= 0 && y > 0 && x < heatMap.cols) {
+ bottom_val = heatMapData[(y - 1) * heatMapStep + x];
+ bottom_val = bottom_val >= confidenceThreshold ? bottom_val : 0;
+ }
+
+ if ((val > left_val) && (val > right_val) && (val > top_val) && (val > bottom_val)) {
+ peaks.push_back(cv::Point(x, y));
+ }
+ }
+ }
+ std::sort(peaks.begin(), peaks.end(), [](const cv::Point& a, const cv::Point& b) {
+ return a.x < b.x;
+ });
+ std::vector<bool> isActualPeak(peaks.size(), true);
+ int peakCounter = 0;
+ std::vector<Peak>& peaksWithScoreAndID = allPeaks[heatMapId];
+ for (size_t i = 0; i < peaks.size(); i++) {
+ if (isActualPeak[i]) {
+ for (size_t j = i + 1; j < peaks.size(); j++) {
+ if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) +
+ (peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) {
+ isActualPeak[j] = false;
+ }
+ }
+ peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at<float>(peaks[i])));
+ }
+ }
+}
+
+std::vector<HumanPose> groupPeaksToPoses(const std::vector<std::vector<Peak>>& allPeaks,
+ const std::vector<cv::Mat>& pafs,
+ const size_t keypointsNumber,
+ const float midPointsScoreThreshold,
+ const float foundMidPointsRatioThreshold,
+ const int minJointsNumber,
+ const float minSubsetScore) {
+ static const std::pair<int, int> limbIdsHeatmap[] = {{2, 3},
+ {2, 6},
+ {3, 4},
+ {4, 5},
+ {6, 7},
+ {7, 8},
+ {2, 9},
+ {9, 10},
+ {10, 11},
+ {2, 12},
+ {12, 13},
+ {13, 14},
+ {2, 1},
+ {1, 15},
+ {15, 17},
+ {1, 16},
+ {16, 18},
+ {3, 17},
+ {6, 18}};
+ static const std::pair<int, int> limbIdsPaf[] = {{31, 32},
+ {39, 40},
+ {33, 34},
+ {35, 36},
+ {41, 42},
+ {43, 44},
+ {19, 20},
+ {21, 22},
+ {23, 24},
+ {25, 26},
+ {27, 28},
+ {29, 30},
+ {47, 48},
+ {49, 50},
+ {53, 54},
+ {51, 52},
+ {55, 56},
+ {37, 38},
+ {45, 46}};
+
+ std::vector<Peak> candidates;
+ for (const auto& peaks : allPeaks) {
+ candidates.insert(candidates.end(), peaks.begin(), peaks.end());
+ }
+ std::vector<HumanPoseByPeaksIndices> subset(0, HumanPoseByPeaksIndices(keypointsNumber));
+ for (size_t k = 0; k < arraySize(limbIdsPaf); k++) {
+ std::vector<TwoJointsConnection> connections;
+ const int mapIdxOffset = keypointsNumber + 1;
+ std::pair<cv::Mat, cv::Mat> scoreMid = {pafs[limbIdsPaf[k].first - mapIdxOffset],
+ pafs[limbIdsPaf[k].second - mapIdxOffset]};
+ const int idxJointA = limbIdsHeatmap[k].first - 1;
+ const int idxJointB = limbIdsHeatmap[k].second - 1;
+ const std::vector<Peak>& candA = allPeaks[idxJointA];
+ const std::vector<Peak>& candB = allPeaks[idxJointB];
+ const size_t nJointsA = candA.size();
+ const size_t nJointsB = candB.size();
+ if (nJointsA == 0 && nJointsB == 0) {
+ continue;
+ } else if (nJointsA == 0) {
+ for (size_t i = 0; i < nJointsB; i++) {
+ int num = 0;
+ for (size_t j = 0; j < subset.size(); j++) {
+ if (subset[j].peaksIndices[idxJointB] == candB[i].id) {
+ num++;
+ continue;
+ }
+ }
+ if (num == 0) {
+ HumanPoseByPeaksIndices personKeypoints(keypointsNumber);
+ personKeypoints.peaksIndices[idxJointB] = candB[i].id;
+ personKeypoints.nJoints = 1;
+ personKeypoints.score = candB[i].score;
+ subset.push_back(personKeypoints);
+ }
+ }
+ continue;
+ } else if (nJointsB == 0) {
+ for (size_t i = 0; i < nJointsA; i++) {
+ int num = 0;
+ for (size_t j = 0; j < subset.size(); j++) {
+ if (subset[j].peaksIndices[idxJointA] == candA[i].id) {
+ num++;
+ continue;
+ }
+ }
+ if (num == 0) {
+ HumanPoseByPeaksIndices personKeypoints(keypointsNumber);
+ personKeypoints.peaksIndices[idxJointA] = candA[i].id;
+ personKeypoints.nJoints = 1;
+ personKeypoints.score = candA[i].score;
+ subset.push_back(personKeypoints);
+ }
+ }
+ continue;
+ }
+
+ std::vector<TwoJointsConnection> tempJointConnections;
+ for (size_t i = 0; i < nJointsA; i++) {
+ for (size_t j = 0; j < nJointsB; j++) {
+ cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5;
+ cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y));
+ cv::Point2f vec = candB[j].pos - candA[i].pos;
+ double norm_vec = cv::norm(vec);
+ if (norm_vec == 0) {
+ continue;
+ }
+ vec /= norm_vec;
+ float score = vec.x * scoreMid.first.at<float>(mid) + vec.y * scoreMid.second.at<float>(mid);
+ int height_n = pafs[0].rows / 2;
+ float suc_ratio = 0.0f;
+ float mid_score = 0.0f;
+ const int mid_num = 10;
+ const float scoreThreshold = -100.0f;
+ if (score > scoreThreshold) {
+ float p_sum = 0;
+ int p_count = 0;
+ cv::Size2f step((candB[j].pos.x - candA[i].pos.x) / (mid_num - 1),
+ (candB[j].pos.y - candA[i].pos.y) / (mid_num - 1));
+ for (int n = 0; n < mid_num; n++) {
+ cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width),
+ cvRound(candA[i].pos.y + n * step.height));
+ cv::Point2f pred(scoreMid.first.at<float>(midPoint), scoreMid.second.at<float>(midPoint));
+ score = vec.x * pred.x + vec.y * pred.y;
+ if (score > midPointsScoreThreshold) {
+ p_sum += score;
+ p_count++;
+ }
+ }
+ suc_ratio = static_cast<float>(p_count / mid_num);
+ float ratio = p_count > 0 ? p_sum / p_count : 0.0f;
+ mid_score = ratio + static_cast<float>(std::min(height_n / norm_vec - 1, 0.0));
+ }
+ if (mid_score > 0 && suc_ratio > foundMidPointsRatioThreshold) {
+ tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score));
+ }
+ }
+ }
+ if (!tempJointConnections.empty()) {
+ std::sort(tempJointConnections.begin(),
+ tempJointConnections.end(),
+ [](const TwoJointsConnection& a, const TwoJointsConnection& b) {
+ return (a.score > b.score);
+ });
+ }
+ size_t num_limbs = std::min(nJointsA, nJointsB);
+ size_t cnt = 0;
+ std::vector<int> occurA(nJointsA, 0);
+ std::vector<int> occurB(nJointsB, 0);
+ for (size_t row = 0; row < tempJointConnections.size(); row++) {
+ if (cnt == num_limbs) {
+ break;
+ }
+ const int& indexA = tempJointConnections[row].firstJointIdx;
+ const int& indexB = tempJointConnections[row].secondJointIdx;
+ const float& score = tempJointConnections[row].score;
+ if (occurA[indexA] == 0 && occurB[indexB] == 0) {
+ connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score));
+ cnt++;
+ occurA[indexA] = 1;
+ occurB[indexB] = 1;
+ }
+ }
+ if (connections.empty()) {
+ continue;
+ }
+
+ bool extraJointConnections = (k == 17 || k == 18);
+ if (k == 0) {
+ subset = std::vector<HumanPoseByPeaksIndices>(connections.size(), HumanPoseByPeaksIndices(keypointsNumber));
+ for (size_t i = 0; i < connections.size(); i++) {
+ const int& indexA = connections[i].firstJointIdx;
+ const int& indexB = connections[i].secondJointIdx;
+ subset[i].peaksIndices[idxJointA] = indexA;
+ subset[i].peaksIndices[idxJointB] = indexB;
+ subset[i].nJoints = 2;
+ subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score;
+ }
+ } else if (extraJointConnections) {
+ for (size_t i = 0; i < connections.size(); i++) {
+ const int& indexA = connections[i].firstJointIdx;
+ const int& indexB = connections[i].secondJointIdx;
+ for (size_t j = 0; j < subset.size(); j++) {
+ if (subset[j].peaksIndices[idxJointA] == indexA && subset[j].peaksIndices[idxJointB] == -1) {
+ subset[j].peaksIndices[idxJointB] = indexB;
+ } else if (subset[j].peaksIndices[idxJointB] == indexB && subset[j].peaksIndices[idxJointA] == -1) {
+ subset[j].peaksIndices[idxJointA] = indexA;
+ }
+ }
+ }
+ continue;
+ } else {
+ for (size_t i = 0; i < connections.size(); i++) {
+ const int& indexA = connections[i].firstJointIdx;
+ const int& indexB = connections[i].secondJointIdx;
+ bool num = false;
+ for (size_t j = 0; j < subset.size(); j++) {
+ if (subset[j].peaksIndices[idxJointA] == indexA) {
+ subset[j].peaksIndices[idxJointB] = indexB;
+ subset[j].nJoints++;
+ subset[j].score += candidates[indexB].score + connections[i].score;
+ num = true;
+ }
+ }
+ if (!num) {
+ HumanPoseByPeaksIndices hpWithScore(keypointsNumber);
+ hpWithScore.peaksIndices[idxJointA] = indexA;
+ hpWithScore.peaksIndices[idxJointB] = indexB;
+ hpWithScore.nJoints = 2;
+ hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score;
+ subset.push_back(hpWithScore);
+ }
+ }
+ }
+ }
+ std::vector<HumanPose> poses;
+ for (const auto& subsetI : subset) {
+ if (subsetI.nJoints < minJointsNumber || subsetI.score / subsetI.nJoints < minSubsetScore) {
+ continue;
+ }
+ int position = -1;
+ HumanPose pose{std::vector<cv::Point2f>(keypointsNumber, cv::Point2f(-1.0f, -1.0f)),
+ subsetI.score * std::max(0, subsetI.nJoints - 1)};
+ for (const auto& peakIdx : subsetI.peaksIndices) {
+ position++;
+ if (peakIdx >= 0) {
+ pose.keypoints[position] = candidates[peakIdx].pos;
+ pose.keypoints[position].x += 0.5;
+ pose.keypoints[position].y += 0.5;
+ }
+ }
+ poses.push_back(pose);
+ }
+ return poses;
+}
diff --git a/python/openvino/runtime/common/models/src/segmentation_model.cpp b/python/openvino/runtime/common/models/src/segmentation_model.cpp
new file mode 100644
index 0000000..82a153b
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/segmentation_model.cpp
@@ -0,0 +1,157 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/segmentation_model.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <fstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+SegmentationModel::SegmentationModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout)
+ : ImageModel(modelFileName, useAutoResize, layout) {}
+
+std::vector<std::string> SegmentationModel::loadLabels(const std::string& labelFilename) {
+ std::vector<std::string> labelsList;
+
+ /* Read labels (if any) */
+ if (!labelFilename.empty()) {
+ std::ifstream inputFile(labelFilename);
+ if (!inputFile.is_open())
+ throw std::runtime_error("Can't open the labels file: " + labelFilename);
+ std::string label;
+ while (std::getline(inputFile, label)) {
+ labelsList.push_back(label);
+ }
+ if (labelsList.empty())
+ throw std::logic_error("File is empty: " + labelFilename);
+ }
+
+ return labelsList;
+}
+
+void SegmentationModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output ---------------------------------------------
+ // --------------------------- Prepare input -----------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("Segmentation model wrapper supports topologies with only 1 input");
+ }
+ const auto& input = model->input();
+ inputsNames.push_back(input.get_any_name());
+
+ const ov::Layout& inputLayout = getInputLayout(input);
+ const ov::Shape& inputShape = input.get_shape();
+ if (inputShape.size() != 4 || inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("3-channel 4-dimensional model's input is expected");
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
+
+ if (useAutoResize) {
+ ppp.input().tensor().set_spatial_dynamic_shape();
+
+ ppp.input()
+ .preprocess()
+ .convert_element_type(ov::element::f32)
+ .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR);
+ }
+
+ ppp.input().model().set_layout(inputLayout);
+ model = ppp.build();
+ // --------------------------- Prepare output -----------------------------------------------------
+ if (model->outputs().size() != 1) {
+ throw std::logic_error("Segmentation model wrapper supports topologies with only 1 output");
+ }
+
+ const auto& output = model->output();
+ outputsNames.push_back(output.get_any_name());
+
+ const ov::Shape& outputShape = output.get_shape();
+ ov::Layout outputLayout("");
+ switch (outputShape.size()) {
+ case 3:
+ outputLayout = "CHW";
+ outChannels = 1;
+ outHeight = static_cast<int>(outputShape[ov::layout::height_idx(outputLayout)]);
+ outWidth = static_cast<int>(outputShape[ov::layout::width_idx(outputLayout)]);
+ break;
+ case 4:
+ outputLayout = "NCHW";
+ outChannels = static_cast<int>(outputShape[ov::layout::channels_idx(outputLayout)]);
+ outHeight = static_cast<int>(outputShape[ov::layout::height_idx(outputLayout)]);
+ outWidth = static_cast<int>(outputShape[ov::layout::width_idx(outputLayout)]);
+ break;
+ default:
+ throw std::logic_error("Unexpected output tensor shape. Only 4D and 3D outputs are supported.");
+ }
+}
+
+std::unique_ptr<ResultBase> SegmentationModel::postprocess(InferenceResult& infResult) {
+ ImageResult* result = new ImageResult(infResult.frameId, infResult.metaData);
+ const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+ const auto& outTensor = infResult.getFirstOutputTensor();
+
+ result->resultImage = cv::Mat(outHeight, outWidth, CV_8UC1);
+
+ if (outChannels == 1 && outTensor.get_element_type() == ov::element::i32) {
+ cv::Mat predictions(outHeight, outWidth, CV_32SC1, outTensor.data<int32_t>());
+ predictions.convertTo(result->resultImage, CV_8UC1);
+ } else if (outChannels == 1 && outTensor.get_element_type() == ov::element::i64) {
+ cv::Mat predictions(outHeight, outWidth, CV_32SC1);
+ const auto data = outTensor.data<int64_t>();
+ for (size_t i = 0; i < predictions.total(); ++i) {
+ reinterpret_cast<int32_t*>(predictions.data)[i] = int32_t(data[i]);
+ }
+ predictions.convertTo(result->resultImage, CV_8UC1);
+ } else if (outTensor.get_element_type() == ov::element::f32) {
+ const float* data = outTensor.data<float>();
+ for (int rowId = 0; rowId < outHeight; ++rowId) {
+ for (int colId = 0; colId < outWidth; ++colId) {
+ int classId = 0;
+ float maxProb = -1.0f;
+ for (int chId = 0; chId < outChannels; ++chId) {
+ float prob = data[chId * outHeight * outWidth + rowId * outWidth + colId];
+ if (prob > maxProb) {
+ classId = chId;
+ maxProb = prob;
+ }
+ } // nChannels
+
+ result->resultImage.at<uint8_t>(rowId, colId) = classId;
+ } // width
+ } // height
+ }
+
+ cv::resize(result->resultImage,
+ result->resultImage,
+ cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight),
+ 0,
+ 0,
+ cv::INTER_NEAREST);
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/style_transfer_model.cpp b/python/openvino/runtime/common/models/src/style_transfer_model.cpp
new file mode 100644
index 0000000..53e8561
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/style_transfer_model.cpp
@@ -0,0 +1,107 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/style_transfer_model.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+StyleTransferModel::StyleTransferModel(const std::string& modelFileName, const std::string& layout)
+ : ImageModel(modelFileName, false, layout) {}
+
+void StyleTransferModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output ---------------------------------------------
+ // --------------------------- Prepare input --------------------------------------------------
+ if (model->inputs().size() != 1) {
+ throw std::logic_error("Style transfer model wrapper supports topologies with only 1 input");
+ }
+
+ inputsNames.push_back(model->input().get_any_name());
+
+ const ov::Shape& inputShape = model->input().get_shape();
+ ov::Layout inputLayout = getInputLayout(model->input());
+
+ if (inputShape.size() != 4 || inputShape[ov::layout::batch_idx(inputLayout)] != 1 ||
+ inputShape[ov::layout::channels_idx(inputLayout)] != 3) {
+ throw std::logic_error("3-channel 4-dimensional model's input is expected");
+ }
+
+ netInputWidth = inputShape[ov::layout::width_idx(inputLayout)];
+ netInputHeight = inputShape[ov::layout::height_idx(inputLayout)];
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ ppp.input().preprocess().convert_element_type(ov::element::f32);
+ ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC");
+
+ ppp.input().model().set_layout(inputLayout);
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ const ov::OutputVector& outputs = model->outputs();
+ if (outputs.size() != 1) {
+ throw std::logic_error("Style transfer model wrapper supports topologies with only 1 output");
+ }
+ outputsNames.push_back(model->output().get_any_name());
+
+ const ov::Shape& outputShape = model->output().get_shape();
+ ov::Layout outputLayout{"NCHW"};
+ if (outputShape.size() != 4 || outputShape[ov::layout::batch_idx(outputLayout)] != 1 ||
+ outputShape[ov::layout::channels_idx(outputLayout)] != 3) {
+ throw std::logic_error("3-channel 4-dimensional model's output is expected");
+ }
+
+ ppp.output().tensor().set_element_type(ov::element::f32);
+ model = ppp.build();
+}
+
+std::unique_ptr<ResultBase> StyleTransferModel::postprocess(InferenceResult& infResult) {
+ ImageResult* result = new ImageResult;
+ *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+
+ const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
+ const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+ const ov::Shape& outputShape = infResult.getFirstOutputTensor().get_shape();
+ size_t outHeight = static_cast<int>(outputShape[2]);
+ size_t outWidth = static_cast<int>(outputShape[3]);
+ size_t numOfPixels = outWidth * outHeight;
+
+ std::vector<cv::Mat> imgPlanes;
+ imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
+ cv::Mat resultImg;
+ cv::merge(imgPlanes, resultImg);
+ cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
+
+ result->resultImage.convertTo(result->resultImage, CV_8UC3);
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/models/src/super_resolution_model.cpp b/python/openvino/runtime/common/models/src/super_resolution_model.cpp
new file mode 100644
index 0000000..164991a
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/super_resolution_model.cpp
@@ -0,0 +1,207 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/super_resolution_model.h"
+
+#include <stddef.h>
+
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+SuperResolutionModel::SuperResolutionModel(const std::string& modelFileName,
+ const cv::Size& inputImgSize,
+ const std::string& layout)
+ : ImageModel(modelFileName, false, layout) {
+ netInputHeight = inputImgSize.height;
+ netInputWidth = inputImgSize.width;
+}
+
+void SuperResolutionModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output ---------------------------------------------
+ // --------------------------- Prepare input --------------------------------------------------
+ const ov::OutputVector& inputs = model->inputs();
+ if (inputs.size() != 1 && inputs.size() != 2) {
+ throw std::logic_error("Super resolution model wrapper supports topologies with 1 or 2 inputs only");
+ }
+ std::string lrInputTensorName = inputs.begin()->get_any_name();
+ inputsNames.push_back(lrInputTensorName);
+ ov::Shape lrShape = inputs.begin()->get_shape();
+ if (lrShape.size() != 4) {
+ throw std::logic_error("Number of dimensions for an input must be 4");
+ }
+ // in case of 2 inputs they have the same layouts
+ ov::Layout inputLayout = getInputLayout(model->inputs().front());
+
+ auto channelsId = ov::layout::channels_idx(inputLayout);
+ auto heightId = ov::layout::height_idx(inputLayout);
+ auto widthId = ov::layout::width_idx(inputLayout);
+
+ if (lrShape[channelsId] != 1 && lrShape[channelsId] != 3) {
+ throw std::logic_error("Input layer is expected to have 1 or 3 channels");
+ }
+
+ // A model like single-image-super-resolution-???? may take bicubic interpolation of the input image as the
+ // second input
+ std::string bicInputTensorName;
+ if (inputs.size() == 2) {
+ bicInputTensorName = (++inputs.begin())->get_any_name();
+ inputsNames.push_back(bicInputTensorName);
+ ov::Shape bicShape = (++inputs.begin())->get_shape();
+ if (bicShape.size() != 4) {
+ throw std::logic_error("Number of dimensions for both inputs must be 4");
+ }
+ if (lrShape[widthId] >= bicShape[widthId] && lrShape[heightId] >= bicShape[heightId]) {
+ std::swap(bicShape, lrShape);
+ inputsNames[0].swap(inputsNames[1]);
+ } else if (!(lrShape[widthId] <= bicShape[widthId] && lrShape[heightId] <= bicShape[heightId])) {
+ throw std::logic_error("Each spatial dimension of one input must surpass or be equal to a spatial"
+ "dimension of another input");
+ }
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ for (const auto& input : inputs) {
+ ppp.input(input.get_any_name()).tensor().set_element_type(ov::element::u8).set_layout("NHWC");
+
+ ppp.input(input.get_any_name()).model().set_layout(inputLayout);
+ }
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ const ov::OutputVector& outputs = model->outputs();
+ if (outputs.size() != 1) {
+ throw std::logic_error("Super resolution model wrapper supports topologies with only 1 output");
+ }
+
+ outputsNames.push_back(outputs.begin()->get_any_name());
+ ppp.output().tensor().set_element_type(ov::element::f32);
+ model = ppp.build();
+
+ const ov::Shape& outShape = model->output().get_shape();
+
+ const ov::Layout outputLayout("NCHW");
+ const auto outWidth = outShape[ov::layout::width_idx(outputLayout)];
+ const auto inWidth = lrShape[ov::layout::width_idx(outputLayout)];
+ changeInputSize(model, static_cast<int>(outWidth / inWidth));
+}
+
+void SuperResolutionModel::changeInputSize(std::shared_ptr<ov::Model>& model, int coeff) {
+ std::map<std::string, ov::PartialShape> shapes;
+ const ov::Layout& layout = ov::layout::get_layout(model->inputs().front());
+ const auto batchId = ov::layout::batch_idx(layout);
+ const auto heightId = ov::layout::height_idx(layout);
+ const auto widthId = ov::layout::width_idx(layout);
+
+ const ov::OutputVector& inputs = model->inputs();
+ std::string lrInputTensorName = inputs.begin()->get_any_name();
+ ov::Shape lrShape = inputs.begin()->get_shape();
+
+ if (inputs.size() == 2) {
+ std::string bicInputTensorName = (++inputs.begin())->get_any_name();
+ ov::Shape bicShape = (++inputs.begin())->get_shape();
+ if (lrShape[heightId] >= bicShape[heightId] && lrShape[widthId] >= bicShape[widthId]) {
+ std::swap(bicShape, lrShape);
+ std::swap(bicInputTensorName, lrInputTensorName);
+ }
+ bicShape[batchId] = 1;
+ bicShape[heightId] = coeff * netInputHeight;
+ bicShape[widthId] = coeff * netInputWidth;
+ shapes[bicInputTensorName] = ov::PartialShape(bicShape);
+ }
+
+ lrShape[batchId] = 1;
+ lrShape[heightId] = netInputHeight;
+ lrShape[widthId] = netInputWidth;
+ shapes[lrInputTensorName] = ov::PartialShape(lrShape);
+
+ model->reshape(shapes);
+}
+
+std::shared_ptr<InternalModelData> SuperResolutionModel::preprocess(const InputData& inputData,
+ ov::InferRequest& request) {
+ auto imgData = inputData.asRef<ImageInputData>();
+ auto& img = imgData.inputImage;
+
+ const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]);
+ const ov::Layout layout("NHWC");
+
+ if (img.channels() != static_cast<int>(lrInputTensor.get_shape()[ov::layout::channels_idx(layout)])) {
+ cv::cvtColor(img, img, cv::COLOR_BGR2GRAY);
+ }
+
+ if (static_cast<size_t>(img.cols) != netInputWidth || static_cast<size_t>(img.rows) != netInputHeight) {
+ slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+ }
+ const size_t height = lrInputTensor.get_shape()[ov::layout::height_idx(layout)];
+ const size_t width = lrInputTensor.get_shape()[ov::layout::width_idx(layout)];
+ img = resizeImageExt(img, width, height);
+ request.set_tensor(inputsNames[0], wrapMat2Tensor(img));
+
+ if (inputsNames.size() == 2) {
+ const ov::Tensor bicInputTensor = request.get_tensor(inputsNames[1]);
+ const int h = static_cast<int>(bicInputTensor.get_shape()[ov::layout::height_idx(layout)]);
+ const int w = static_cast<int>(bicInputTensor.get_shape()[ov::layout::width_idx(layout)]);
+ cv::Mat resized;
+ cv::resize(img, resized, cv::Size(w, h), 0, 0, cv::INTER_CUBIC);
+ request.set_tensor(inputsNames[1], wrapMat2Tensor(resized));
+ }
+
+ return std::make_shared<InternalImageModelData>(img.cols, img.rows);
+}
+
+std::unique_ptr<ResultBase> SuperResolutionModel::postprocess(InferenceResult& infResult) {
+ ImageResult* result = new ImageResult;
+ *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+ const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+ std::vector<cv::Mat> imgPlanes;
+ const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape();
+ const size_t outChannels = static_cast<int>(outShape[1]);
+ const size_t outHeight = static_cast<int>(outShape[2]);
+ const size_t outWidth = static_cast<int>(outShape[3]);
+ const size_t numOfPixels = outWidth * outHeight;
+ if (outChannels == 3) {
+ imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
+ } else {
+ imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
+ // Post-processing for text-image-super-resolution models
+ cv::threshold(imgPlanes[0], imgPlanes[0], 0.5f, 1.0f, cv::THRESH_BINARY);
+ }
+
+ for (auto& img : imgPlanes) {
+ img.convertTo(img, CV_8UC1, 255);
+ }
+ cv::Mat resultImg;
+ cv::merge(imgPlanes, resultImg);
+ result->resultImage = resultImg;
+
+ return std::unique_ptr<ResultBase>(result);
+}
diff --git a/python/openvino/runtime/common/monitors/CMakeLists.txt b/python/openvino/runtime/common/monitors/CMakeLists.txt
new file mode 100644
index 0000000..1bfe0b9
--- /dev/null
+++ b/python/openvino/runtime/common/monitors/CMakeLists.txt
@@ -0,0 +1,38 @@
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+find_package(OpenCV REQUIRED COMPONENTS core imgproc)
+
+set(SOURCES
+ src/cpu_monitor.cpp
+ src/memory_monitor.cpp
+ src/presenter.cpp)
+
+set(HEADERS
+ include/monitors/cpu_monitor.h
+ include/monitors/memory_monitor.h
+ include/monitors/presenter.h)
+
+if(WIN32)
+ list(APPEND SOURCES src/query_wrapper.cpp)
+ list(APPEND HEADERS include/monitors/query_wrapper.h)
+endif()
+# Create named folders for the sources within the .vcproj
+# Empty name lists them directly under the .vcproj
+source_group("src" FILES ${SOURCES})
+source_group("include" FILES ${HEADERS})
+
+add_library(monitors STATIC ${SOURCES} ${HEADERS})
+target_include_directories(monitors PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
+target_link_libraries(monitors PRIVATE opencv_core opencv_imgproc)
+if(WIN32)
+ target_link_libraries(monitors PRIVATE pdh)
+
+ target_compile_definitions(monitors PRIVATE
+ # Prevents Windows.h from adding unnecessary includes
+ WIN32_LEAN_AND_MEAN
+ # Prevents Windows.h from defining min/max as macros
+ NOMINMAX
+ )
+endif()
diff --git a/python/openvino/runtime/common/monitors/include/monitors/cpu_monitor.h b/python/openvino/runtime/common/monitors/include/monitors/cpu_monitor.h
new file mode 100644
index 0000000..38d2845
--- /dev/null
+++ b/python/openvino/runtime/common/monitors/include/monitors/cpu_monitor.h
@@ -0,0 +1,28 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <vector>
+
+class CpuMonitor {
+public:
+ CpuMonitor();
+ ~CpuMonitor();
+ void setHistorySize(std::size_t size);
+ std::size_t getHistorySize() const;
+ void collectData();
+ std::deque<std::vector<double>> getLastHistory() const;
+ std::vector<double> getMeanCpuLoad() const;
+
+private:
+ unsigned samplesNumber;
+ unsigned historySize;
+ std::vector<double> cpuLoadSum;
+ std::deque<std::vector<double>> cpuLoadHistory;
+ class PerformanceCounter;
+ std::unique_ptr<PerformanceCounter> performanceCounter;
+};
diff --git a/python/openvino/runtime/common/monitors/include/monitors/memory_monitor.h b/python/openvino/runtime/common/monitors/include/monitors/memory_monitor.h
new file mode 100644
index 0000000..9eda10f
--- /dev/null
+++ b/python/openvino/runtime/common/monitors/include/monitors/memory_monitor.h
@@ -0,0 +1,34 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <deque>
+#include <memory>
+
+class MemoryMonitor {
+public:
+ MemoryMonitor();
+ ~MemoryMonitor();
+ void setHistorySize(std::size_t size);
+ std::size_t getHistorySize() const;
+ void collectData();
+ std::deque<std::pair<double, double>> getLastHistory() const;
+ double getMeanMem() const; // in GiB
+ double getMeanSwap() const;
+ double getMaxMem() const;
+ double getMaxSwap() const;
+ double getMemTotal() const;
+ double getMaxMemTotal() const; // a system may have hotpluggable memory
+private:
+ unsigned samplesNumber;
+ std::size_t historySize;
+ double memSum, swapSum;
+ double maxMem, maxSwap;
+ double memTotal;
+ double maxMemTotal;
+ std::deque<std::pair<double, double>> memSwapUsageHistory;
+ class PerformanceCounter;
+ std::unique_ptr<PerformanceCounter> performanceCounter;
+};
diff --git a/python/openvino/runtime/common/monitors/include/monitors/presenter.h b/python/openvino/runtime/common/monitors/include/monitors/presenter.h
new file mode 100644
index 0000000..c6587a0
--- /dev/null
+++ b/python/openvino/runtime/common/monitors/include/monitors/presenter.h
@@ -0,0 +1,44 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <chrono>
+#include <map>
+#include <ostream>
+#include <set>
+
+#include <opencv2/imgproc.hpp>
+
+#include "cpu_monitor.h"
+#include "memory_monitor.h"
+
+enum class MonitorType{CpuAverage, DistributionCpu, Memory};
+
+class Presenter {
+public:
+ explicit Presenter(std::set<MonitorType> enabledMonitors = {},
+ int yPos = 20,
+ cv::Size graphSize = {150, 60},
+ std::size_t historySize = 20);
+ explicit Presenter(const std::string& keys,
+ int yPos = 20,
+ cv::Size graphSize = {150, 60},
+ std::size_t historySize = 20);
+ void addRemoveMonitor(MonitorType monitor);
+ void handleKey(int key); // handles C, D, M, H keys
+ void drawGraphs(cv::Mat& frame);
+ std::vector<std::string> reportMeans() const;
+
+ const int yPos;
+ const cv::Size graphSize;
+ const int graphPadding;
+private:
+ std::chrono::steady_clock::time_point prevTimeStamp;
+ std::size_t historySize;
+ CpuMonitor cpuMonitor;
+ bool distributionCpuEnabled;
+ MemoryMonitor memoryMonitor;
+ std::ostringstream strStream;
+};
diff --git a/python/openvino/runtime/common/monitors/include/monitors/query_wrapper.h b/python/openvino/runtime/common/monitors/include/monitors/query_wrapper.h
new file mode 100644
index 0000000..d69f548
--- /dev/null
+++ b/python/openvino/runtime/common/monitors/include/monitors/query_wrapper.h
@@ -0,0 +1,17 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <Pdh.h>
+class QueryWrapper {
+public:
+ QueryWrapper();
+ ~QueryWrapper();
+ QueryWrapper(const QueryWrapper&) = delete;
+ QueryWrapper& operator=(const QueryWrapper&) = delete;
+ operator PDH_HQUERY() const;
+private:
+ PDH_HQUERY query;
+};
diff --git a/python/openvino/runtime/common/monitors/src/cpu_monitor.cpp b/python/openvino/runtime/common/monitors/src/cpu_monitor.cpp
new file mode 100644
index 0000000..e5172a2
--- /dev/null
+++ b/python/openvino/runtime/common/monitors/src/cpu_monitor.cpp
@@ -0,0 +1,206 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "monitors/cpu_monitor.h"
+
+#include <algorithm>
+#ifdef _WIN32
+#include "monitors/query_wrapper.h"
+#include <string>
+#include <system_error>
+#include <PdhMsg.h>
+#include <Windows.h>
+
+namespace {
+const std::size_t nCores = []() {
+ SYSTEM_INFO sysinfo;
+ GetSystemInfo(&sysinfo);
+ return sysinfo.dwNumberOfProcessors;
+ }();
+}
+
+class CpuMonitor::PerformanceCounter {
+public:
+ PerformanceCounter() : coreTimeCounters(nCores) {
+ PDH_STATUS status;
+ for (std::size_t i = 0; i < nCores; ++i) {
+ std::wstring fullCounterPath{L"\\Processor(" + std::to_wstring(i) + L")\\% Processor Time"};
+ status = PdhAddCounterW(query, fullCounterPath.c_str(), 0, &coreTimeCounters[i]);
+ if (ERROR_SUCCESS != status) {
+ throw std::system_error(status, std::system_category(), "PdhAddCounterW() failed");
+ }
+ status = PdhSetCounterScaleFactor(coreTimeCounters[i], -2); // scale counter to [0, 1]
+ if (ERROR_SUCCESS != status) {
+ throw std::system_error(status, std::system_category(), "PdhSetCounterScaleFactor() failed");
+ }
+ }
+ status = PdhCollectQueryData(query);
+ if (ERROR_SUCCESS != status) {
+ throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed");
+ }
+ }
+
+ std::vector<double> getCpuLoad() {
+ PDH_STATUS status;
+ status = PdhCollectQueryData(query);
+ if (ERROR_SUCCESS != status) {
+ throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed");
+ }
+
+ PDH_FMT_COUNTERVALUE displayValue;
+ std::vector<double> cpuLoad(coreTimeCounters.size());
+ for (std::size_t i = 0; i < coreTimeCounters.size(); ++i) {
+ status = PdhGetFormattedCounterValue(coreTimeCounters[i], PDH_FMT_DOUBLE, NULL,
+ &displayValue);
+ switch (status) {
+ case ERROR_SUCCESS: break;
+ // PdhGetFormattedCounterValue() can sometimes return PDH_CALC_NEGATIVE_DENOMINATOR for some reason
+ case PDH_CALC_NEGATIVE_DENOMINATOR: return {};
+ default:
+ throw std::system_error(status, std::system_category(), "PdhGetFormattedCounterValue() failed");
+ }
+ if (PDH_CSTATUS_VALID_DATA != displayValue.CStatus && PDH_CSTATUS_NEW_DATA != displayValue.CStatus) {
+ throw std::runtime_error("Error in counter data");
+ }
+
+ cpuLoad[i] = displayValue.doubleValue;
+ }
+ return cpuLoad;
+ }
+
+private:
+ QueryWrapper query;
+ std::vector<PDH_HCOUNTER> coreTimeCounters;
+};
+
+#elif __linux__
+#include <chrono>
+#include <regex>
+#include <utility>
+#include <fstream>
+#include <unistd.h>
+
+namespace {
+const long clockTicks = sysconf(_SC_CLK_TCK);
+
+const std::size_t nCores = sysconf(_SC_NPROCESSORS_CONF);
+
+std::vector<unsigned long> getIdleCpuStat() {
+ std::vector<unsigned long> idleCpuStat(nCores);
+ std::ifstream procStat("/proc/stat");
+ std::string line;
+ std::smatch match;
+ std::regex coreJiffies("^cpu(\\d+)\\s+"
+ "(\\d+)\\s+"
+ "(\\d+)\\s+"
+ "(\\d+)\\s+"
+ "(\\d+)\\s+" // idle
+ "(\\d+)"); // iowait
+
+ while (std::getline(procStat, line)) {
+ if (std::regex_search(line, match, coreJiffies)) {
+ // it doesn't handle overflow of sum and overflows of /proc/stat values
+ unsigned long idleInfo = stoul(match[5]) + stoul(match[6]),
+ coreId = stoul(match[1]);
+ if (nCores <= coreId) {
+ throw std::runtime_error("The number of cores has changed");
+ }
+ idleCpuStat[coreId] = idleInfo;
+ }
+ }
+ return idleCpuStat;
+}
+}
+
+class CpuMonitor::PerformanceCounter {
+public:
+ PerformanceCounter() : prevIdleCpuStat{getIdleCpuStat()}, prevTimePoint{std::chrono::steady_clock::now()} {}
+
+ std::vector<double> getCpuLoad() {
+ std::vector<unsigned long> idleCpuStat = getIdleCpuStat();
+ auto timePoint = std::chrono::steady_clock::now();
+ // don't update data too frequently which may result in negative values for cpuLoad.
+ // It may happen when collectData() is called just after setHistorySize().
+ if (timePoint - prevTimePoint > std::chrono::milliseconds{100}) {
+ std::vector<double> cpuLoad(nCores);
+ for (std::size_t i = 0; i < idleCpuStat.size(); ++i) {
+ double idleDiff = idleCpuStat[i] - prevIdleCpuStat[i];
+ typedef std::chrono::duration<double, std::chrono::seconds::period> Sec;
+ cpuLoad[i] = 1.0
+ - idleDiff / clockTicks / std::chrono::duration_cast<Sec>(timePoint - prevTimePoint).count();
+ }
+ prevIdleCpuStat = std::move(idleCpuStat);
+ prevTimePoint = timePoint;
+ return cpuLoad;
+ }
+ return {};
+ }
+private:
+ std::vector<unsigned long> prevIdleCpuStat;
+ std::chrono::steady_clock::time_point prevTimePoint;
+};
+
+#else
+// not implemented
+namespace {
+const std::size_t nCores{0};
+}
+
+class CpuMonitor::PerformanceCounter {
+public:
+ std::vector<double> getCpuLoad() {return {};};
+};
+#endif
+
+CpuMonitor::CpuMonitor() :
+ samplesNumber{0},
+ historySize{0},
+ cpuLoadSum(nCores, 0) {}
+
+// PerformanceCounter is incomplete in header and destructor can't be defined implicitly
+CpuMonitor::~CpuMonitor() = default;
+
+void CpuMonitor::setHistorySize(std::size_t size) {
+ if (0 == historySize && 0 != size) {
+ performanceCounter.reset(new PerformanceCounter);
+ } else if (0 != historySize && 0 == size) {
+ performanceCounter.reset();
+ }
+ historySize = size;
+ std::ptrdiff_t newSize = static_cast<std::ptrdiff_t>(std::min(size, cpuLoadHistory.size()));
+ cpuLoadHistory.erase(cpuLoadHistory.begin(), cpuLoadHistory.end() - newSize);
+}
+
+void CpuMonitor::collectData() {
+ std::vector<double> cpuLoad = performanceCounter->getCpuLoad();
+
+ if (!cpuLoad.empty()) {
+ for (std::size_t i = 0; i < cpuLoad.size(); ++i) {
+ cpuLoadSum[i] += cpuLoad[i];
+ }
+ ++samplesNumber;
+
+ cpuLoadHistory.push_back(std::move(cpuLoad));
+ if (cpuLoadHistory.size() > historySize) {
+ cpuLoadHistory.pop_front();
+ }
+ }
+}
+
+std::size_t CpuMonitor::getHistorySize() const {
+ return historySize;
+}
+
+std::deque<std::vector<double>> CpuMonitor::getLastHistory() const {
+ return cpuLoadHistory;
+}
+
+std::vector<double> CpuMonitor::getMeanCpuLoad() const {
+ std::vector<double> meanCpuLoad;
+ meanCpuLoad.reserve(cpuLoadSum.size());
+ for (double coreLoad : cpuLoadSum) {
+ meanCpuLoad.push_back(samplesNumber ? coreLoad / samplesNumber : 0);
+ }
+ return meanCpuLoad;
+}
diff --git a/python/openvino/runtime/common/monitors/src/memory_monitor.cpp b/python/openvino/runtime/common/monitors/src/memory_monitor.cpp
new file mode 100644
index 0000000..70879d6
--- /dev/null
+++ b/python/openvino/runtime/common/monitors/src/memory_monitor.cpp
@@ -0,0 +1,213 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "monitors/memory_monitor.h"
+
+struct MemState {
+ double memTotal, usedMem, usedSwap;
+};
+
+#ifdef _WIN32
+#include "monitors/query_wrapper.h"
+#include <algorithm>
+#define PSAPI_VERSION 2
+#include <system_error>
+#include <Windows.h>
+#include <PdhMsg.h>
+#include <Psapi.h>
+
+namespace {
+double getMemTotal() {
+ PERFORMANCE_INFORMATION performanceInformation;
+ if (!GetPerformanceInfo(&performanceInformation, sizeof(performanceInformation))) {
+ throw std::runtime_error("GetPerformanceInfo() failed");
+ }
+ return static_cast<double>(performanceInformation.PhysicalTotal * performanceInformation.PageSize)
+ / (1024 * 1024 * 1024);
+}
+}
+
+class MemoryMonitor::PerformanceCounter {
+public:
+ PerformanceCounter() {
+ PDH_STATUS status = PdhAddCounterW(query, L"\\Paging File(_Total)\\% Usage", 0, &pagingFileUsageCounter);
+ if (ERROR_SUCCESS != status) {
+ throw std::system_error(status, std::system_category(), "PdhAddCounterW() failed");
+ }
+ status = PdhSetCounterScaleFactor(pagingFileUsageCounter, -2); // scale counter to [0, 1]
+ if (ERROR_SUCCESS != status) {
+ throw std::system_error(status, std::system_category(), "PdhSetCounterScaleFactor() failed");
+ }
+ }
+
+ MemState getMemState() {
+ PERFORMANCE_INFORMATION performanceInformation;
+ if (!GetPerformanceInfo(&performanceInformation, sizeof(performanceInformation))) {
+ throw std::runtime_error("GetPerformanceInfo() failed");
+ }
+
+ PDH_STATUS status;
+ status = PdhCollectQueryData(query);
+ if (ERROR_SUCCESS != status) {
+ throw std::system_error(status, std::system_category(), "PdhCollectQueryData() failed");
+ }
+ PDH_FMT_COUNTERVALUE displayValue;
+ status = PdhGetFormattedCounterValue(pagingFileUsageCounter, PDH_FMT_DOUBLE, NULL, &displayValue);
+ if (ERROR_SUCCESS != status) {
+ throw std::system_error(status, std::system_category(), "PdhGetFormattedCounterValue() failed");
+ }
+ if (PDH_CSTATUS_VALID_DATA != displayValue.CStatus && PDH_CSTATUS_NEW_DATA != displayValue.CStatus) {
+ throw std::runtime_error("Error in counter data");
+ }
+
+ double pagingFilesSize = static_cast<double>(
+ (performanceInformation.CommitLimit - performanceInformation.PhysicalTotal)
+ * performanceInformation.PageSize) / (1024 * 1024 * 1024);
+ return {static_cast<double>(performanceInformation.PhysicalTotal * performanceInformation.PageSize)
+ / (1024 * 1024 * 1024),
+ static_cast<double>(
+ (performanceInformation.PhysicalTotal - performanceInformation.PhysicalAvailable)
+ * performanceInformation.PageSize) / (1024 * 1024 * 1024),
+ pagingFilesSize * displayValue.doubleValue};
+ }
+private:
+ QueryWrapper query;
+ PDH_HCOUNTER pagingFileUsageCounter;
+};
+
+#elif __linux__
+#include <fstream>
+#include <utility>
+#include <vector>
+#include <regex>
+
+namespace {
+std::pair<std::pair<double, double>, std::pair<double, double>> getAvailableMemSwapTotalMemSwap() {
+ double memAvailable = 0, swapFree = 0, memTotal = 0, swapTotal = 0;
+ std::regex memRegex("^(.+):\\s+(\\d+) kB$");
+ std::string line;
+ std::smatch match;
+ std::ifstream meminfo("/proc/meminfo");
+ while (std::getline(meminfo, line)) {
+ if (std::regex_match(line, match, memRegex)) {
+ if ("MemAvailable" == match[1]) {
+ memAvailable = stod(match[2]) / (1024 * 1024);
+ } else if ("SwapFree" == match[1]) {
+ swapFree = stod(match[2]) / (1024 * 1024);
+ } else if ("MemTotal" == match[1]) {
+ memTotal = stod(match[2]) / (1024 * 1024);
+ } else if ("SwapTotal" == match[1]) {
+ swapTotal = stod(match[2]) / (1024 * 1024);
+ }
+ }
+ }
+ if (0 == memTotal) {
+ throw std::runtime_error("Can't get MemTotal");
+ }
+ return {{memAvailable, swapFree}, {memTotal, swapTotal}};
+}
+
+double getMemTotal() {
+ return getAvailableMemSwapTotalMemSwap().second.first;
+}
+}
+
+class MemoryMonitor::PerformanceCounter {
+public:
+ MemState getMemState() {
+ std::pair<std::pair<double, double>, std::pair<double, double>> availableMemSwapTotalMemSwap
+ = getAvailableMemSwapTotalMemSwap();
+ double memTotal = availableMemSwapTotalMemSwap.second.first;
+ double swapTotal = availableMemSwapTotalMemSwap.second.second;
+ return {memTotal, memTotal - availableMemSwapTotalMemSwap.first.first, swapTotal - availableMemSwapTotalMemSwap.first.second};
+ }
+};
+
+#else
+// not implemented
+namespace {
+double getMemTotal() {return 0.0;}
+}
+
+class MemoryMonitor::PerformanceCounter {
+public:
+ MemState getMemState() {return {0.0, 0.0, 0.0};}
+};
+#endif
+
+MemoryMonitor::MemoryMonitor() :
+ samplesNumber{0},
+ historySize{0},
+ memSum{0.0},
+ swapSum{0.0},
+ maxMem{0.0},
+ maxSwap{0.0},
+ memTotal{0.0},
+ maxMemTotal{0.0} {}
+
+// PerformanceCounter is incomplete in header and destructor can't be defined implicitly
+MemoryMonitor::~MemoryMonitor() = default;
+
+void MemoryMonitor::setHistorySize(std::size_t size) {
+ if (0 == historySize && 0 != size) {
+ performanceCounter.reset(new MemoryMonitor::PerformanceCounter);
+ // memTotal is not initialized in constructor because for linux its initialization involves constructing
+ // std::regex which is unimplemented and throws an exception for gcc 4.8.5 (default for CentOS 7.4).
+ // Delaying initialization triggers the error only when the monitor is used
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53631
+ memTotal = ::getMemTotal();
+ } else if (0 != historySize && 0 == size) {
+ performanceCounter.reset();
+ }
+ historySize = size;
+ std::size_t newSize = std::min(size, memSwapUsageHistory.size());
+ memSwapUsageHistory.erase(memSwapUsageHistory.begin(), memSwapUsageHistory.end() - newSize);
+}
+
+void MemoryMonitor::collectData() {
+ MemState memState = performanceCounter->getMemState();
+ maxMemTotal = std::max(maxMemTotal, memState.memTotal);
+ memSum += memState.usedMem;
+ swapSum += memState.usedSwap;
+ ++samplesNumber;
+ maxMem = std::max(maxMem, memState.usedMem);
+ maxSwap = std::max(maxSwap, memState.usedSwap);
+
+ memSwapUsageHistory.emplace_back(memState.usedMem, memState.usedSwap);
+ if (memSwapUsageHistory.size() > historySize) {
+ memSwapUsageHistory.pop_front();
+ }
+}
+
+std::size_t MemoryMonitor::getHistorySize() const {
+ return historySize;
+}
+
+std::deque<std::pair<double, double>> MemoryMonitor::getLastHistory() const {
+ return memSwapUsageHistory;
+}
+
+double MemoryMonitor::getMeanMem() const {
+ return samplesNumber ? memSum / samplesNumber : 0;
+}
+
+double MemoryMonitor::getMeanSwap() const {
+ return samplesNumber ? swapSum / samplesNumber : 0;
+}
+
+double MemoryMonitor::getMaxMem() const {
+ return maxMem;
+}
+
+double MemoryMonitor::getMaxSwap() const {
+ return maxSwap;
+}
+
+double MemoryMonitor::getMemTotal() const {
+ return memTotal;
+}
+
+double MemoryMonitor::getMaxMemTotal() const {
+ return maxMemTotal;
+}
diff --git a/python/openvino/runtime/common/monitors/src/presenter.cpp b/python/openvino/runtime/common/monitors/src/presenter.cpp
new file mode 100644
index 0000000..61f5e15
--- /dev/null
+++ b/python/openvino/runtime/common/monitors/src/presenter.cpp
@@ -0,0 +1,330 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cctype>
+#include <chrono>
+#include <iomanip>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "monitors/presenter.h"
+
+namespace {
+const std::map<int, MonitorType> keyToMonitorType{
+ {'C', MonitorType::CpuAverage},
+ {'D', MonitorType::DistributionCpu},
+ {'M', MonitorType::Memory}};
+
+std::set<MonitorType> strKeysToMonitorSet(const std::string& keys) {
+ std::set<MonitorType> enabledMonitors;
+ if (keys == "h") {
+ return enabledMonitors;
+ }
+ for (unsigned char key: keys) {
+ if (key == 'h') {
+ throw std::runtime_error("Unacceptable combination of monitor types-can't show and hide info at the same time");
+ }
+ auto iter = keyToMonitorType.find(std::toupper(key));
+ if (keyToMonitorType.end() == iter) {
+ throw std::runtime_error("Unknown monitor type");
+ } else {
+ enabledMonitors.insert(iter->second);
+ }
+ }
+ return enabledMonitors;
+}
+}
+
+Presenter::Presenter(std::set<MonitorType> enabledMonitors,
+ int yPos,
+ cv::Size graphSize,
+ std::size_t historySize) :
+ yPos{yPos},
+ graphSize{graphSize},
+ graphPadding{std::max(1, static_cast<int>(graphSize.width * 0.05))},
+ historySize{historySize},
+ distributionCpuEnabled{false},
+ strStream{std::ios_base::app} {
+ for (MonitorType monitor : enabledMonitors) {
+ addRemoveMonitor(monitor);
+ }
+}
+
+Presenter::Presenter(const std::string& keys, int yPos, cv::Size graphSize, std::size_t historySize) :
+ Presenter{strKeysToMonitorSet(keys), yPos, graphSize, historySize} {}
+
+void Presenter::addRemoveMonitor(MonitorType monitor) {
+ unsigned updatedHistorySize = 1;
+ if (historySize > 1) {
+ int sampleStep = std::max(1, static_cast<int>(graphSize.width / (historySize - 1)));
+ // +1 to plot graphSize.width/sampleStep segments
+ // add round up to and an extra element if don't reach graph edge
+ updatedHistorySize = (graphSize.width + sampleStep - 1) / sampleStep + 1;
+ }
+ switch(monitor) {
+ case MonitorType::CpuAverage: {
+ if (cpuMonitor.getHistorySize() > 1 && distributionCpuEnabled) {
+ cpuMonitor.setHistorySize(1);
+ } else if (cpuMonitor.getHistorySize() > 1 && !distributionCpuEnabled) {
+ cpuMonitor.setHistorySize(0);
+ } else { // cpuMonitor.getHistorySize() <= 1
+ cpuMonitor.setHistorySize(updatedHistorySize);
+ }
+ break;
+ }
+ case MonitorType::DistributionCpu: {
+ if (distributionCpuEnabled) {
+ distributionCpuEnabled = false;
+ if (1 == cpuMonitor.getHistorySize()) { // cpuMonitor was used only for DistributionCpu => disable it
+ cpuMonitor.setHistorySize(0);
+ }
+ } else {
+ distributionCpuEnabled = true;
+ cpuMonitor.setHistorySize(std::max(std::size_t{1}, cpuMonitor.getHistorySize()));
+ }
+ break;
+ }
+ case MonitorType::Memory: {
+ if (memoryMonitor.getHistorySize() > 1) {
+ memoryMonitor.setHistorySize(0);
+ } else {
+ memoryMonitor.setHistorySize(updatedHistorySize);
+ }
+ break;
+ }
+ }
+}
+
+void Presenter::handleKey(int key) {
+ key = std::toupper(key);
+ if ('H' == key) {
+ if (0 == cpuMonitor.getHistorySize() && memoryMonitor.getHistorySize() <= 1) {
+ addRemoveMonitor(MonitorType::CpuAverage);
+ addRemoveMonitor(MonitorType::DistributionCpu);
+ addRemoveMonitor(MonitorType::Memory);
+ } else {
+ cpuMonitor.setHistorySize(0);
+ distributionCpuEnabled = false;
+ memoryMonitor.setHistorySize(0);
+ }
+ } else {
+ auto iter = keyToMonitorType.find(key);
+ if (keyToMonitorType.end() != iter) {
+ addRemoveMonitor(iter->second);
+ }
+ }
+}
+
+void Presenter::drawGraphs(cv::Mat& frame) {
+ const std::chrono::steady_clock::time_point curTimeStamp = std::chrono::steady_clock::now();
+ if (curTimeStamp - prevTimeStamp >= std::chrono::milliseconds{1000}) {
+ prevTimeStamp = curTimeStamp;
+ if (0 != cpuMonitor.getHistorySize()) {
+ cpuMonitor.collectData();
+ }
+ if (memoryMonitor.getHistorySize() > 1) {
+ memoryMonitor.collectData();
+ }
+ }
+
+ int numberOfEnabledMonitors = (cpuMonitor.getHistorySize() > 1) + distributionCpuEnabled
+ + (memoryMonitor.getHistorySize() > 1);
+ int panelWidth = graphSize.width * numberOfEnabledMonitors
+ + std::max(0, numberOfEnabledMonitors - 1) * graphPadding;
+ while (panelWidth > frame.cols) {
+ panelWidth = std::max(0, panelWidth - graphSize.width - graphPadding);
+ --numberOfEnabledMonitors; // can't draw all monitors
+ }
+ int graphPos = std::max(0, (frame.cols - 1 - panelWidth) / 2);
+ int textGraphSplittingLine = graphSize.height / 5;
+ int graphRectHeight = graphSize.height - textGraphSplittingLine;
+ int sampleStep = 1;
+ unsigned possibleHistorySize = 1;
+ if (historySize > 1) {
+ sampleStep = std::max(1, static_cast<int>(graphSize.width / (historySize - 1)));
+ possibleHistorySize = (graphSize.width + sampleStep - 1) / sampleStep + 1;
+ }
+
+ if (cpuMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) {
+ std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory();
+ cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
+ if (!intersection.area()) {
+ return;
+ }
+ cv::Mat graph = frame(intersection);
+ graph = graph / 2 + cv::Scalar{127, 127, 127};
+
+ int lineXPos = graph.cols - 1;
+ std::vector<cv::Point> averageLoad(lastHistory.size());
+
+ for (int i = lastHistory.size() - 1; i >= 0; --i) {
+ double mean = std::accumulate(lastHistory[i].begin(), lastHistory[i].end(), 0.0) / lastHistory[i].size();
+ averageLoad[i] = {lineXPos, graphSize.height - static_cast<int>(mean * graphRectHeight)};
+ lineXPos -= sampleStep;
+ }
+
+ cv::polylines(graph, averageLoad, false, {255, 0, 0}, 2);
+ cv::rectangle(frame, cv::Rect{
+ cv::Point{graphPos, yPos + textGraphSplittingLine},
+ cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}
+ }, {0, 0, 0});
+ strStream.str("CPU");
+ if (!lastHistory.empty()) {
+ strStream << ": " << std::fixed << std::setprecision(1)
+ << std::accumulate(lastHistory.back().begin(), lastHistory.back().end(), 0.0)
+ / lastHistory.back().size() * 100 << '%';
+ }
+ int baseline;
+ int textWidth = cv::getTextSize(strStream.str(),
+ cv::FONT_HERSHEY_SIMPLEX,
+ textGraphSplittingLine * 0.04,
+ 1,
+ &baseline).width;
+ cv::putText(graph,
+ strStream.str(),
+ cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1},
+ cv::FONT_HERSHEY_SIMPLEX,
+ textGraphSplittingLine * 0.04,
+ {70, 0, 0},
+ 1);
+ graphPos += graphSize.width + graphPadding;
+ }
+
+ if (distributionCpuEnabled && --numberOfEnabledMonitors >= 0) {
+ std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory();
+ cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
+ if (!intersection.area()) {
+ return;
+ }
+ cv::Mat graph = frame(intersection);
+ graph = graph / 2 + cv::Scalar{127, 127, 127};
+
+ if (!lastHistory.empty()) {
+ int rectXPos = 0;
+ int step = (graph.cols + lastHistory.back().size() - 1) / lastHistory.back().size(); // round up
+ double sum = 0;
+ for (double coreLoad : lastHistory.back()) {
+ sum += coreLoad;
+ int height = static_cast<int>(graphRectHeight * coreLoad);
+ cv::Rect pillar{cv::Point{rectXPos, graph.rows - height}, cv::Size{step, height}};
+ cv::rectangle(graph, pillar, {255, 0, 0}, cv::FILLED);
+ cv::rectangle(graph, pillar, {0, 0, 0});
+ rectXPos += step;
+ }
+ sum /= lastHistory.back().size();
+ int yLine = graph.rows - static_cast<int>(graphRectHeight * sum);
+ cv::line(graph, cv::Point{0, yLine}, cv::Point{graph.cols, yLine}, {0, 255, 0}, 2);
+ }
+ cv::Rect border{cv::Point{graphPos, yPos + textGraphSplittingLine},
+ cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}};
+ cv::rectangle(frame, border, {0, 0, 0});
+ strStream.str("Core load");
+ if (!lastHistory.empty()) {
+ strStream << ": " << std::fixed << std::setprecision(1)
+ << std::accumulate(lastHistory.back().begin(), lastHistory.back().end(), 0.0)
+ / lastHistory.back().size() * 100 << '%';
+ }
+ int baseline;
+ int textWidth = cv::getTextSize(strStream.str(),
+ cv::FONT_HERSHEY_SIMPLEX,
+ textGraphSplittingLine * 0.04,
+ 1,
+ &baseline).width;
+ cv::putText(graph,
+ strStream.str(),
+ cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1},
+ cv::FONT_HERSHEY_SIMPLEX,
+ textGraphSplittingLine * 0.04,
+ {0, 70, 0});
+ graphPos += graphSize.width + graphPadding;
+ }
+
+ if (memoryMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) {
+ std::deque<std::pair<double, double>> lastHistory = memoryMonitor.getLastHistory();
+ cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
+ if (!intersection.area()) {
+ return;
+ }
+ cv::Mat graph = frame(intersection);
+ graph = graph / 2 + cv::Scalar{127, 127, 127};
+ int histxPos = graph.cols - 1;
+ double range = std::min(memoryMonitor.getMaxMemTotal() + memoryMonitor.getMaxSwap(),
+ (memoryMonitor.getMaxMem() + memoryMonitor.getMaxSwap()) * 1.2);
+ if (lastHistory.size() > 1) {
+ for (auto memUsageIt = lastHistory.rbegin(); memUsageIt != lastHistory.rend() - 1; ++memUsageIt) {
+ constexpr double SWAP_THRESHOLD = 10.0 / 1024; // 10 MiB
+ cv::Vec3b color =
+ (memoryMonitor.getMemTotal() * 0.95 > memUsageIt->first) || (memUsageIt->second < SWAP_THRESHOLD) ?
+ cv::Vec3b{0, 255, 255} :
+ cv::Vec3b{0, 0, 255};
+ cv::Point right{histxPos,
+ graph.rows - static_cast<int>(graphRectHeight * (memUsageIt->first + memUsageIt->second) / range)};
+ cv::Point left{histxPos - sampleStep,
+ graph.rows - static_cast<int>(
+ graphRectHeight * ((memUsageIt + 1)->first + (memUsageIt + 1)->second) / range)};
+ cv::line(graph, right, left, color, 2);
+ histxPos -= sampleStep;
+ }
+ }
+
+ cv::Rect border{cv::Point{graphPos, yPos + textGraphSplittingLine},
+ cv::Size{graphSize.width, graphSize.height - textGraphSplittingLine}};
+ cv::rectangle(frame, {border}, {0, 0, 0});
+ if (lastHistory.empty()) {
+ strStream.str("Memory");
+ } else {
+ strStream.str("");
+ strStream << std::fixed << std::setprecision(1) << lastHistory.back().first << " + "
+ << lastHistory.back().second << " GiB";
+ }
+ int baseline;
+ int textWidth = cv::getTextSize(strStream.str(),
+ cv::FONT_HERSHEY_SIMPLEX,
+ textGraphSplittingLine * 0.04,
+ 1,
+ &baseline).width;
+ cv::putText(graph,
+ strStream.str(),
+ cv::Point{(graphSize.width - textWidth) / 2, textGraphSplittingLine - 1},
+ cv::FONT_HERSHEY_SIMPLEX,
+ textGraphSplittingLine * 0.04,
+ {0, 35, 35});
+ }
+}
+
+std::vector<std::string> Presenter::reportMeans() const {
+ std::vector<std::string> collectedData;
+ if (cpuMonitor.getHistorySize() > 1 || distributionCpuEnabled || memoryMonitor.getHistorySize() > 1) {
+ collectedData.push_back("Resources usage:");
+ }
+ if (cpuMonitor.getHistorySize() > 1) {
+ std::ostringstream collectedDataStream;
+ collectedDataStream << std::fixed << std::setprecision(1);
+ collectedDataStream << "\tMean core utilization: ";
+ for (double mean : cpuMonitor.getMeanCpuLoad()) {
+ collectedDataStream << mean * 100 << "% ";
+ }
+ collectedData.push_back(collectedDataStream.str());
+ }
+ if (distributionCpuEnabled) {
+ std::ostringstream collectedDataStream;
+ collectedDataStream << std::fixed << std::setprecision(1);
+ std::vector<double> meanCpuLoad = cpuMonitor.getMeanCpuLoad();
+ double mean = std::accumulate(meanCpuLoad.begin(), meanCpuLoad.end(), 0.0) / meanCpuLoad.size();
+ collectedDataStream << "\tMean CPU utilization: " << mean * 100 << "%";
+ collectedData.push_back(collectedDataStream.str());
+ }
+ if (memoryMonitor.getHistorySize() > 1) {
+ std::ostringstream collectedDataStream;
+ collectedDataStream << std::fixed << std::setprecision(1);
+ collectedDataStream << "\tMemory mean usage: " << memoryMonitor.getMeanMem() << " GiB";
+ collectedData.push_back(collectedDataStream.str());
+ collectedDataStream.str("");
+ collectedDataStream << "\tMean swap usage: " << memoryMonitor.getMeanSwap() << " GiB";
+ collectedData.push_back(collectedDataStream.str());
+ }
+
+ return collectedData;
+}
diff --git a/python/openvino/runtime/common/monitors/src/query_wrapper.cpp b/python/openvino/runtime/common/monitors/src/query_wrapper.cpp
new file mode 100644
index 0000000..5c238d1
--- /dev/null
+++ b/python/openvino/runtime/common/monitors/src/query_wrapper.cpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "monitors/query_wrapper.h"
+
+#include <Windows.h>
+#include <system_error>
+
+QueryWrapper::QueryWrapper() {
+ PDH_STATUS status = PdhOpenQuery(NULL, NULL, &query);
+ if (ERROR_SUCCESS != status) {
+ throw std::system_error(status, std::system_category(), "PdhOpenQuery() failed");
+ }
+}
+QueryWrapper::~QueryWrapper() {
+ PdhCloseQuery(query);
+}
+
+QueryWrapper::operator PDH_HQUERY() const {
+ return query;
+}
diff --git a/python/openvino/runtime/common/pipelines/CMakeLists.txt b/python/openvino/runtime/common/pipelines/CMakeLists.txt
new file mode 100644
index 0000000..b8b128a
--- /dev/null
+++ b/python/openvino/runtime/common/pipelines/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+file(GLOB SOURCES ./src/*.cpp)
+file(GLOB HEADERS ./include/pipelines/*.h)
+
+# Create named folders for the sources within the .vcproj
+# Empty name lists them directly under the .vcproj
+source_group("src" FILES ${SOURCES})
+source_group("include" FILES ${HEADERS})
+
+add_library(pipelines STATIC ${SOURCES} ${HEADERS})
+target_include_directories(pipelines PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
+target_link_libraries(pipelines PRIVATE openvino::runtime models utils opencv_core opencv_imgproc)
diff --git a/python/openvino/runtime/common/pipelines/include/pipelines/async_pipeline.h b/python/openvino/runtime/common/pipelines/include/pipelines/async_pipeline.h
new file mode 100644
index 0000000..6661c00
--- /dev/null
+++ b/python/openvino/runtime/common/pipelines/include/pipelines/async_pipeline.h
@@ -0,0 +1,121 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <stdint.h>
+
+#include <condition_variable>
+#include <exception>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include <openvino/openvino.hpp>
+
+#include <models/results.h>
+#include <utils/performance_metrics.hpp>
+
+#include "pipelines/requests_pool.h"
+
+class ModelBase;
+struct InputData;
+struct MetaData;
+struct ModelConfig;
+
+/// This is base class for asynchronous pipeline
+/// Derived classes should add functions for data submission and output processing
+class AsyncPipeline {
+public:
+ /// Loads model and performs required initialization
+ /// @param modelInstance pointer to model object. Object it points to should not be destroyed manually after passing
+ /// pointer to this function.
+ /// @param config - fine tuning configuration for model
+ /// @param core - reference to ov::Core instance to use.
+ /// If it is omitted, new instance of ov::Core will be created inside.
+ AsyncPipeline(std::unique_ptr<ModelBase>&& modelInstance, const ModelConfig& config, ov::Core& core);
+ virtual ~AsyncPipeline();
+
+ /// Waits until either output data becomes available or pipeline allows to submit more input data.
+ /// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is
+ /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if
+ /// any result is ready.
+ void waitForData(bool shouldKeepOrder = true);
+
+ /// @returns true if there's available infer requests in the pool
+ /// and next frame can be submitted for processing, false otherwise.
+ bool isReadyToProcess() {
+ return requestsPool->isIdleRequestAvailable();
+ }
+
+ /// Waits for all currently submitted requests to be completed.
+ ///
+ void waitForTotalCompletion() {
+ if (requestsPool)
+ requestsPool->waitForTotalCompletion();
+ }
+
+ /// Submits data to the model for inference
+ /// @param inputData - input data to be submitted
+ /// @param metaData - shared pointer to metadata container.
+ /// Might be null. This pointer will be passed through pipeline and put to the final result structure.
+ /// @returns -1 if image cannot be scheduled for processing (there's no free InferRequest available).
+ /// Otherwise returns unique sequential frame ID for this particular request. Same frame ID will be written in the
+ /// result structure.
+ virtual int64_t submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData);
+
+ /// Gets available data from the queue
+ /// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is
+ /// ready (so results can be extracted in the same order as they were submitted). Otherwise, function will return if
+ /// any result is ready.
+ virtual std::unique_ptr<ResultBase> getResult(bool shouldKeepOrder = true);
+
+ PerformanceMetrics getInferenceMetircs() {
+ return inferenceMetrics;
+ }
+ PerformanceMetrics getPreprocessMetrics() {
+ return preprocessMetrics;
+ }
+ PerformanceMetrics getPostprocessMetrics() {
+ return postprocessMetrics;
+ }
+
+protected:
+ /// Returns processed result, if available
+ /// @param shouldKeepOrder if true, function will return processed data sequentially,
+ /// keeping original frames order (as they were submitted). Otherwise, function will return processed data in random
+ /// order.
+ /// @returns InferenceResult with processed information or empty InferenceResult (with negative frameID) if there's
+ /// no any results yet.
+ virtual InferenceResult getInferenceResult(bool shouldKeepOrder);
+
+ std::unique_ptr<RequestsPool> requestsPool;
+ std::unordered_map<int64_t, InferenceResult> completedInferenceResults;
+
+ ov::CompiledModel compiledModel;
+
+ std::mutex mtx;
+ std::condition_variable condVar;
+
+ int64_t inputFrameId = 0;
+ int64_t outputFrameId = 0;
+
+ std::exception_ptr callbackException = nullptr;
+
+ std::unique_ptr<ModelBase> model;
+ PerformanceMetrics inferenceMetrics;
+ PerformanceMetrics preprocessMetrics;
+ PerformanceMetrics postprocessMetrics;
+};
diff --git a/python/openvino/runtime/common/pipelines/include/pipelines/metadata.h b/python/openvino/runtime/common/pipelines/include/pipelines/metadata.h
new file mode 100644
index 0000000..aca18ee
--- /dev/null
+++ b/python/openvino/runtime/common/pipelines/include/pipelines/metadata.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (C) 2018-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+#include <utils/ocv_common.hpp>
+
+struct MetaData {
+ virtual ~MetaData() {}
+
+ template <class T>
+ T& asRef() {
+ return dynamic_cast<T&>(*this);
+ }
+
+ template <class T>
+ const T& asRef() const {
+ return dynamic_cast<const T&>(*this);
+ }
+};
+
+struct ImageMetaData : public MetaData {
+ cv::Mat img;
+ std::chrono::steady_clock::time_point timeStamp;
+
+ ImageMetaData() {}
+
+ ImageMetaData(cv::Mat img, std::chrono::steady_clock::time_point timeStamp) : img(img), timeStamp(timeStamp) {}
+};
+
+struct ClassificationImageMetaData : public ImageMetaData {
+ unsigned int groundTruthId;
+
+ ClassificationImageMetaData(cv::Mat img,
+ std::chrono::steady_clock::time_point timeStamp,
+ unsigned int groundTruthId)
+ : ImageMetaData(img, timeStamp),
+ groundTruthId(groundTruthId) {}
+};
diff --git a/python/openvino/runtime/common/pipelines/include/pipelines/requests_pool.h b/python/openvino/runtime/common/pipelines/include/pipelines/requests_pool.h
new file mode 100644
index 0000000..d9b220e
--- /dev/null
+++ b/python/openvino/runtime/common/pipelines/include/pipelines/requests_pool.h
@@ -0,0 +1,67 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <stddef.h>
+
+#include <mutex>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+/// This is class storing requests pool for asynchronous pipeline
+///
+class RequestsPool {
+public:
+ RequestsPool(ov::CompiledModel& compiledModel, unsigned int size);
+ ~RequestsPool();
+
+ /// Returns idle request from the pool. Returned request is automatically marked as In Use (this status will be
+ /// reset after request processing completion) This function is thread safe as long as request is used only until
+ /// setRequestIdle call
+ /// @returns pointer to request with idle state or nullptr if all requests are in use.
+ ov::InferRequest getIdleRequest();
+
+ /// Sets particular request to Idle state
+ /// This function is thread safe as long as request provided is not used after call to this function
+ /// @param request - request to be returned to idle state
+ void setRequestIdle(const ov::InferRequest& request);
+
+ /// Returns number of requests in use. This function is thread safe.
+ /// @returns number of requests in use
+ size_t getInUseRequestsCount();
+
+ /// Returns number of requests in use. This function is thread safe.
+ /// @returns number of requests in use
+ bool isIdleRequestAvailable();
+
+ /// Waits for completion of every non-idle requests in pool.
+ /// getIdleRequest should not be called together with this function or after it to avoid race condition or invalid
+ /// state
+ /// @returns number of requests in use
+ void waitForTotalCompletion();
+
+ /// Returns list of all infer requests in the pool.
+ /// @returns list of all infer requests in the pool.
+ std::vector<ov::InferRequest> getInferRequestsList();
+
+private:
+ std::vector<std::pair<ov::InferRequest, bool>> requests;
+ size_t numRequestsInUse;
+ std::mutex mtx;
+};
diff --git a/python/openvino/runtime/common/pipelines/src/async_pipeline.cpp b/python/openvino/runtime/common/pipelines/src/async_pipeline.cpp
new file mode 100644
index 0000000..3259280
--- /dev/null
+++ b/python/openvino/runtime/common/pipelines/src/async_pipeline.cpp
@@ -0,0 +1,166 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "pipelines/async_pipeline.h"
+
+#include <chrono>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+#include <models/model_base.h>
+#include <models/results.h>
+#include <utils/config_factory.h>
+#include <utils/performance_metrics.hpp>
+#include <utils/slog.hpp>
+
+struct InputData;
+struct MetaData;
+
+AsyncPipeline::AsyncPipeline(std::unique_ptr<ModelBase>&& modelInstance, const ModelConfig& config, ov::Core& core)
+ : model(std::move(modelInstance)) {
+ compiledModel = model->compileModel(config, core);
+ // --------------------------- Create infer requests ------------------------------------------------
+ unsigned int nireq = config.maxAsyncRequests;
+ if (nireq == 0) {
+ try {
+ nireq = compiledModel.get_property(ov::optimal_number_of_infer_requests);
+ } catch (const ov::Exception& ex) {
+ throw std::runtime_error(
+ std::string("Every device used with the demo should support compiled model's property "
+ "'OPTIMAL_NUMBER_OF_INFER_REQUESTS'. Failed to query the property with error: ") +
+ ex.what());
+ }
+ }
+ slog::info << "\tNumber of inference requests: " << nireq << slog::endl;
+ requestsPool.reset(new RequestsPool(compiledModel, nireq));
+ // --------------------------- Call onLoadCompleted to complete initialization of model -------------
+ model->onLoadCompleted(requestsPool->getInferRequestsList());
+}
+
+AsyncPipeline::~AsyncPipeline() {
+ waitForTotalCompletion();
+}
+
+void AsyncPipeline::waitForData(bool shouldKeepOrder) {
+ std::unique_lock<std::mutex> lock(mtx);
+
+ condVar.wait(lock, [&]() {
+ return callbackException != nullptr || requestsPool->isIdleRequestAvailable() ||
+ (shouldKeepOrder ? completedInferenceResults.find(outputFrameId) != completedInferenceResults.end()
+ : !completedInferenceResults.empty());
+ });
+
+ if (callbackException) {
+ std::rethrow_exception(callbackException);
+ }
+}
+
+int64_t AsyncPipeline::submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData) {
+ auto frameID = inputFrameId;
+
+ auto request = requestsPool->getIdleRequest();
+ if (!request) {
+ return -1;
+ }
+
+ auto startTime = std::chrono::steady_clock::now();
+ auto internalModelData = model->preprocess(inputData, request);
+ preprocessMetrics.update(startTime);
+
+ request.set_callback(
+ [this, request, frameID, internalModelData, metaData, startTime](std::exception_ptr ex) mutable {
+ {
+ const std::lock_guard<std::mutex> lock(mtx);
+ inferenceMetrics.update(startTime);
+ try {
+ if (ex) {
+ std::rethrow_exception(ex);
+ }
+ InferenceResult result;
+
+ result.frameId = frameID;
+ result.metaData = std::move(metaData);
+ result.internalModelData = std::move(internalModelData);
+
+ for (const auto& outName : model->getOutputsNames()) {
+ auto tensor = request.get_tensor(outName);
+ result.outputsData.emplace(outName, tensor);
+ }
+
+ completedInferenceResults.emplace(frameID, result);
+ requestsPool->setRequestIdle(request);
+ } catch (...) {
+ if (!callbackException) {
+ callbackException = std::current_exception();
+ }
+ }
+ }
+ condVar.notify_one();
+ });
+
+ inputFrameId++;
+ if (inputFrameId < 0)
+ inputFrameId = 0;
+
+ request.start_async();
+
+ return frameID;
+}
+
+std::unique_ptr<ResultBase> AsyncPipeline::getResult(bool shouldKeepOrder) {
+ auto infResult = AsyncPipeline::getInferenceResult(shouldKeepOrder);
+ if (infResult.IsEmpty()) {
+ return std::unique_ptr<ResultBase>();
+ }
+ auto startTime = std::chrono::steady_clock::now();
+ auto result = model->postprocess(infResult);
+ postprocessMetrics.update(startTime);
+
+ *result = static_cast<ResultBase&>(infResult);
+ return result;
+}
+
+InferenceResult AsyncPipeline::getInferenceResult(bool shouldKeepOrder) {
+ InferenceResult retVal;
+ {
+ const std::lock_guard<std::mutex> lock(mtx);
+
+ const auto& it =
+ shouldKeepOrder ? completedInferenceResults.find(outputFrameId) : completedInferenceResults.begin();
+
+ if (it != completedInferenceResults.end()) {
+ retVal = std::move(it->second);
+ completedInferenceResults.erase(it);
+ }
+ }
+
+ if (!retVal.IsEmpty()) {
+ outputFrameId = retVal.frameId;
+ outputFrameId++;
+ if (outputFrameId < 0) {
+ outputFrameId = 0;
+ }
+ }
+
+ return retVal;
+}
diff --git a/python/openvino/runtime/common/pipelines/src/requests_pool.cpp b/python/openvino/runtime/common/pipelines/src/requests_pool.cpp
new file mode 100644
index 0000000..93230c9
--- /dev/null
+++ b/python/openvino/runtime/common/pipelines/src/requests_pool.cpp
@@ -0,0 +1,94 @@
+/*
+// Copyright (C) 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "pipelines/requests_pool.h"
+
+#include <algorithm>
+#include <exception>
+#include <vector>
+
+#include <openvino/openvino.hpp>
+
+RequestsPool::RequestsPool(ov::CompiledModel& compiledModel, unsigned int size) : numRequestsInUse(0) {
+ for (unsigned int infReqId = 0; infReqId < size; ++infReqId) {
+ requests.emplace_back(compiledModel.create_infer_request(), false);
+ }
+}
+
+RequestsPool::~RequestsPool() {
+ // Setting empty callback to free resources allocated for previously assigned lambdas
+ for (auto& pair : requests) {
+ pair.first.set_callback([](std::exception_ptr) {});
+ }
+}
+
+ov::InferRequest RequestsPool::getIdleRequest() {
+ std::lock_guard<std::mutex> lock(mtx);
+
+ const auto& it = std::find_if(requests.begin(), requests.end(), [](const std::pair<ov::InferRequest, bool>& x) {
+ return !x.second;
+ });
+ if (it == requests.end()) {
+ return ov::InferRequest();
+ } else {
+ it->second = true;
+ numRequestsInUse++;
+ return it->first;
+ }
+}
+
+void RequestsPool::setRequestIdle(const ov::InferRequest& request) {
+ std::lock_guard<std::mutex> lock(mtx);
+ const auto& it = std::find_if(this->requests.begin(),
+ this->requests.end(),
+ [&request](const std::pair<ov::InferRequest, bool>& x) {
+ return x.first == request;
+ });
+ it->second = false;
+ numRequestsInUse--;
+}
+
+size_t RequestsPool::getInUseRequestsCount() {
+ std::lock_guard<std::mutex> lock(mtx);
+ return numRequestsInUse;
+}
+
+bool RequestsPool::isIdleRequestAvailable() {
+ std::lock_guard<std::mutex> lock(mtx);
+ return numRequestsInUse < requests.size();
+}
+
+void RequestsPool::waitForTotalCompletion() {
+ // Do not synchronize here to avoid deadlock (despite synchronization in other functions)
+ // Request status will be changed to idle in callback,
+ // upon completion of request we're waiting for. Synchronization is applied there
+ for (auto pair : requests) {
+ if (pair.second) {
+ pair.first.wait();
+ }
+ }
+}
+
+std::vector<ov::InferRequest> RequestsPool::getInferRequestsList() {
+ std::lock_guard<std::mutex> lock(mtx);
+ std::vector<ov::InferRequest> retVal;
+ retVal.reserve(requests.size());
+ for (auto& pair : requests) {
+ retVal.push_back(pair.first);
+ }
+
+ return retVal;
+}
diff --git a/python/openvino/runtime/common/utils/CMakeLists.txt b/python/openvino/runtime/common/utils/CMakeLists.txt
new file mode 100644
index 0000000..e1e7293
--- /dev/null
+++ b/python/openvino/runtime/common/utils/CMakeLists.txt
@@ -0,0 +1,61 @@
+# Copyright (C) 2018-2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+set(TARGET_NAME "ie_samples_utils")
+
+file(GLOB_RECURSE SOURCES "*.cpp" "*.hpp" "*.h")
+source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCES})
+
+add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${SOURCES})
+set_target_properties(${TARGET_NAME} PROPERTIES FOLDER "src")
+
+target_include_directories(${TARGET_NAME}
+ PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
+
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+
+if(TARGET gflags)
+ set(GFLAGS_TARGET gflags)
+else()
+ if(EXISTS /etc/debian_version)
+ set(gflags_component nothreads_static)
+ else()
+ find_package(gflags QUIET OPTIONAL_COMPONENTS nothreads_static)
+ if(NOT gflags_FOUND)
+ set(gflags_component shared)
+ else()
+ set(gflags_component nothreads_static)
+ endif()
+ endif()
+ find_package(gflags QUIET OPTIONAL_COMPONENTS ${gflags_component})
+ if(gflags_FOUND)
+ if(TARGET ${GFLAGS_TARGET})
+ # nothing
+ elseif(TARGET gflags_nothreads-static)
+ # Debian 9: gflag_component is ignored
+ set(GFLAGS_TARGET gflags_nothreads-static)
+ elseif(TARGET gflags-shared)
+ # gflags shared case for CentOS / RHEL / Fedora
+ set(GFLAGS_TARGET gflags-shared)
+ else()
+ message(FATAL_ERROR "Internal error: failed to find imported target 'gflags' using '${gflags_component}' component")
+ endif()
+
+ message(STATUS "gflags (${gflags_VERSION}) is found at ${gflags_DIR} using '${gflags_component}' component")
+ endif()
+
+ if(NOT gflags_FOUND)
+ if(EXISTS "$ENV{INTEL_OPENVINO_DIR}/samples/cpp/thirdparty/gflags")
+ add_subdirectory("$ENV{INTEL_OPENVINO_DIR}/samples/cpp/thirdparty/gflags" "${CMAKE_CURRENT_BINARY_DIR}/gflag")
+ set(GFLAGS_TARGET gflags_nothreads_static)
+ else()
+ message(FATAL_ERROR "Failed to find 'gflags' library using '${gflags_component}' component")
+ endif()
+ endif()
+endif()
+
+target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime ${GFLAGS_TARGET})
+
+if(COMMAND add_clang_format_target)
+ add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
+endif()
diff --git a/python/openvino/runtime/common/utils/include/samples/args_helper.hpp b/python/openvino/runtime/common/utils/include/samples/args_helper.hpp
new file mode 100644
index 0000000..6626140
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/args_helper.hpp
@@ -0,0 +1,112 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality
+ * @file args_helper.hpp
+ */
+
+#pragma once
+
+// clang-format off
+#include <string>
+#include <vector>
+
+#include "openvino/openvino.hpp"
+
+#include "samples/slog.hpp"
+// clang-format on
+
+/**
+ * @brief This function checks input args and existence of specified files in a given folder
+ * @param arg path to a file to be checked for existence
+ * @return files updated vector of verified input files
+ */
+void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg);
+
+/**
+ * @brief This function find -i/--images key in input args
+ * It's necessary to process multiple values for single key
+ * @return files updated vector of verified input files
+ */
+void parseInputFilesArguments(std::vector<std::string>& files);
+std::map<std::string, std::string> parseArgMap(std::string argMap);
+
+void printInputAndOutputsInfo(const ov::Model& network);
+
+void configurePrePostProcessing(std::shared_ptr<ov::Model>& function,
+ const std::string& ip,
+ const std::string& op,
+ const std::string& iop,
+ const std::string& il,
+ const std::string& ol,
+ const std::string& iol,
+ const std::string& iml,
+ const std::string& oml,
+ const std::string& ioml);
+
+void printInputAndOutputsInfo(const ov::Model& network);
+ov::element::Type getPrecision2(const std::string& value);
+
+template <class T>
+void printInputAndOutputsInfoShort(const T& network) {
+ slog::info << "Network inputs:" << slog::endl;
+ for (auto&& input : network.inputs()) {
+ std::string in_name;
+ std::string node_name;
+
+ // Workaround for "tensor has no name" issue
+ try {
+ for (const auto& name : input.get_names()) {
+ in_name += name + " , ";
+ }
+ in_name = in_name.substr(0, in_name.size() - 3);
+ } catch (const ov::Exception&) {
+ }
+
+ try {
+ node_name = input.get_node()->get_friendly_name();
+ } catch (const ov::Exception&) {
+ }
+
+ if (in_name == "") {
+ in_name = "***NO_NAME***";
+ }
+ if (node_name == "") {
+ node_name = "***NO_NAME***";
+ }
+
+ slog::info << " " << in_name << " (node: " << node_name << ") : " << input.get_element_type() << " / "
+ << ov::layout::get_layout(input).to_string() << " / " << input.get_partial_shape() << slog::endl;
+ }
+
+ slog::info << "Network outputs:" << slog::endl;
+ for (auto&& output : network.outputs()) {
+ std::string out_name;
+ std::string node_name;
+
+ // Workaround for "tensor has no name" issue
+ try {
+ for (const auto& name : output.get_names()) {
+ out_name += name + " , ";
+ }
+ out_name = out_name.substr(0, out_name.size() - 3);
+ } catch (const ov::Exception&) {
+ }
+ try {
+ node_name = output.get_node()->get_input_node_ptr(0)->get_friendly_name();
+ } catch (const ov::Exception&) {
+ }
+
+ if (out_name == "") {
+ out_name = "***NO_NAME***";
+ }
+ if (node_name == "") {
+ node_name = "***NO_NAME***";
+ }
+
+ slog::info << " " << out_name << " (node: " << node_name << ") : " << output.get_element_type() << " / "
+ << ov::layout::get_layout(output).to_string() << " / " << output.get_partial_shape() << slog::endl;
+ }
+}
diff --git a/python/openvino/runtime/common/utils/include/samples/classification_results.h b/python/openvino/runtime/common/utils/include/samples/classification_results.h
new file mode 100644
index 0000000..e1bc20f
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/classification_results.h
@@ -0,0 +1,205 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with output classification results
+ * @file classification_results.h
+ */
+#pragma once
+
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "openvino/openvino.hpp"
+
+/**
+ * @class ClassificationResult
+ * @brief A ClassificationResult creates an output table with results
+ */
+class ClassificationResult {
+private:
+ const std::string _classidStr = "classid";
+ const std::string _probabilityStr = "probability";
+ const std::string _labelStr = "label";
+ size_t _nTop;
+ ov::Tensor _outTensor;
+ const std::vector<std::string> _labels;
+ const std::vector<std::string> _imageNames;
+ const size_t _batchSize;
+ std::vector<unsigned> _results;
+
+ void printHeader() {
+ std::cout << _classidStr << " " << _probabilityStr;
+ if (!_labels.empty())
+ std::cout << " " << _labelStr;
+ std::string classidColumn(_classidStr.length(), '-');
+ std::string probabilityColumn(_probabilityStr.length(), '-');
+ std::string labelColumn(_labelStr.length(), '-');
+ std::cout << std::endl << classidColumn << " " << probabilityColumn;
+ if (!_labels.empty())
+ std::cout << " " << labelColumn;
+ std::cout << std::endl;
+ }
+
+ /**
+ * @brief Gets the top n results from a tensor
+ *
+ * @param n Top n count
+ * @param input 1D tensor that contains probabilities
+ * @param output Vector of indexes for the top n places
+ */
+ template <class T>
+ void topResults(unsigned int n, const ov::Tensor& input, std::vector<unsigned>& output) {
+ ov::Shape shape = input.get_shape();
+ size_t input_rank = shape.size();
+ OPENVINO_ASSERT(input_rank != 0 && shape[0] != 0, "Input tensor has incorrect dimensions!");
+ size_t batchSize = shape[0];
+ std::vector<unsigned> indexes(input.get_size() / batchSize);
+
+ n = static_cast<unsigned>(std::min<size_t>((size_t)n, input.get_size()));
+ output.resize(n * batchSize);
+
+ for (size_t i = 0; i < batchSize; i++) {
+ const size_t offset = i * (input.get_size() / batchSize);
+ const T* batchData = input.data<const T>();
+ batchData += offset;
+
+ std::iota(std::begin(indexes), std::end(indexes), 0);
+ std::partial_sort(std::begin(indexes),
+ std::begin(indexes) + n,
+ std::end(indexes),
+ [&batchData](unsigned l, unsigned r) {
+ return batchData[l] > batchData[r];
+ });
+ for (unsigned j = 0; j < n; j++) {
+ output.at(i * n + j) = indexes.at(j);
+ }
+ }
+ }
+
+ /**
+ * @brief Gets the top n results from a blob
+ *
+ * @param n Top n count
+ * @param input 1D blob that contains probabilities
+ * @param output Vector of indexes for the top n places
+ */
+ void topResults(unsigned int n, const ov::Tensor& input, std::vector<unsigned>& output) {
+#define TENSOR_TOP_RESULT(elem_type) \
+ case ov::element::Type_t::elem_type: { \
+ using tensor_type = ov::fundamental_type_for<ov::element::Type_t::elem_type>; \
+ topResults<tensor_type>(n, input, output); \
+ break; \
+ }
+
+ switch (input.get_element_type()) {
+ TENSOR_TOP_RESULT(f32);
+ TENSOR_TOP_RESULT(f64);
+ TENSOR_TOP_RESULT(f16);
+ TENSOR_TOP_RESULT(i16);
+ TENSOR_TOP_RESULT(u8);
+ TENSOR_TOP_RESULT(i8);
+ TENSOR_TOP_RESULT(u16);
+ TENSOR_TOP_RESULT(i32);
+ TENSOR_TOP_RESULT(u32);
+ TENSOR_TOP_RESULT(i64);
+ TENSOR_TOP_RESULT(u64);
+ default:
+ OPENVINO_ASSERT(false, "cannot locate tensor with element type: ", input.get_element_type());
+ }
+
+#undef TENSOR_TOP_RESULT
+ }
+
+public:
+ explicit ClassificationResult(const ov::Tensor& output_tensor,
+ const std::vector<std::string>& image_names = {},
+ size_t batch_size = 1,
+ size_t num_of_top = 10,
+ const std::vector<std::string>& labels = {})
+ : _nTop(num_of_top),
+ _outTensor(output_tensor),
+ _labels(labels),
+ _imageNames(image_names),
+ _batchSize(batch_size),
+ _results() {
+ OPENVINO_ASSERT(_imageNames.size() == _batchSize, "Batch size should be equal to the number of images.");
+
+ topResults(_nTop, _outTensor, _results);
+ }
+
+ /**
+ * @brief prints formatted classification results
+ */
+ void show() {
+ /** Print the result iterating over each batch **/
+ std::ios::fmtflags fmt(std::cout.flags());
+ std::cout << std::endl << "Top " << _nTop << " results:" << std::endl << std::endl;
+ for (size_t image_id = 0; image_id < _batchSize; ++image_id) {
+ std::string out(_imageNames[image_id].begin(), _imageNames[image_id].end());
+ std::cout << "Image " << out;
+ std::cout.flush();
+ std::cout.clear();
+ std::cout << std::endl << std::endl;
+ printHeader();
+
+ for (size_t id = image_id * _nTop, cnt = 0; id < (image_id + 1) * _nTop; ++cnt, ++id) {
+ std::cout.precision(7);
+ // Getting probability for resulting class
+ const auto index = _results.at(id) + image_id * (_outTensor.get_size() / _batchSize);
+ const auto result = _outTensor.data<const float>()[index];
+
+ std::cout << std::setw(static_cast<int>(_classidStr.length())) << std::left << _results.at(id) << " ";
+ std::cout << std::left << std::setw(static_cast<int>(_probabilityStr.length())) << std::fixed << result;
+
+ if (!_labels.empty()) {
+ std::cout << " " + _labels[_results.at(id)];
+ }
+ std::cout << std::endl;
+ }
+ std::cout << std::endl;
+ }
+ std::cout.flags(fmt);
+ }
+
+ void print() {
+ /** Print the result iterating over each batch **/
+ std::ios::fmtflags fmt(std::cout.flags());
+ std::cout << std::endl << "Top " << _nTop << " results:" << std::endl << std::endl;
+ for (size_t image_id = 0; image_id < _batchSize; ++image_id) {
+ std::string out(_imageNames[image_id].begin(), _imageNames[image_id].end());
+ std::cout << "Image " << out;
+ std::cout.flush();
+ std::cout.clear();
+ std::cout << std::endl << std::endl;
+ printHeader();
+
+ for (size_t id = image_id * _nTop, cnt = 0; id < (image_id + 1) * _nTop; ++cnt, ++id) {
+ std::cout.precision(7);
+ // Getting probability for resulting class
+ const auto result = _outTensor.data<float>();
+ std::cout << std::setw(static_cast<int>(_classidStr.length())) << std::left << _results.at(id) << " ";
+ std::cout << std::left << std::setw(static_cast<int>(_probabilityStr.length())) << std::fixed << result;
+
+ if (!_labels.empty()) {
+ std::cout << " " + _labels[_results.at(id)];
+ }
+ std::cout << std::endl;
+ }
+ std::cout << std::endl;
+ }
+ std::cout.flags(fmt);
+ }
+
+ /**
+ * @brief returns the classification results in a vector
+ */
+ std::vector<unsigned> getResults() {
+ return _results;
+ }
+};
diff --git a/python/openvino/runtime/common/utils/include/samples/common.hpp b/python/openvino/runtime/common/utils/include/samples/common.hpp
new file mode 100644
index 0000000..448fd96
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/common.hpp
@@ -0,0 +1,1429 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality
+ * @file common.hpp
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <cctype>
+#include <fstream>
+#include <functional>
+#include <iomanip>
+#include <iostream>
+#include <limits>
+#include <list>
+#include <map>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+using std::setprecision;
+
+// clang-format off
+#include <inference_engine.hpp>
+#include "openvino/openvino.hpp"
+#include "slog.hpp"
+// clang-format on
+
+// @brief performance counters sort
+static constexpr char pcSort[] = "sort";
+static constexpr char pcNoSort[] = "no_sort";
+static constexpr char pcSimpleSort[] = "simple_sort";
+
+#ifndef UNUSED
+# if defined(_MSC_VER) && !defined(__clang__)
+# define UNUSED
+# else
+# define UNUSED __attribute__((unused))
+# endif
+#endif
+
+/**
+ * @brief Unicode string wrappers
+ */
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+# define tchar wchar_t
+# define tstring std::wstring
+# define tmain wmain
+# define TSTRING2STRING(tstr) wstring2string(tstr)
+#else
+# define tchar char
+# define tstring std::string
+# define tmain main
+# define TSTRING2STRING(tstr) tstr
+#endif
+
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+
+/**
+ * @brief Convert wstring to string
+ * @param ref on wstring
+ * @return string
+ */
+inline std::string wstring2string(const std::wstring& wstr) {
+ std::string str;
+ for (auto&& wc : wstr)
+ str += static_cast<char>(wc);
+ return str;
+}
+#endif
+
+/**
+ * @brief trim from start (in place)
+ * @param s - string to trim
+ */
+inline void ltrim(std::string& s) {
+ s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) {
+ return !std::isspace(c);
+ }));
+}
+
+/**
+ * @brief trim from end (in place)
+ * @param s - string to trim
+ */
+inline void rtrim(std::string& s) {
+ s.erase(std::find_if(s.rbegin(),
+ s.rend(),
+ [](int c) {
+ return !std::isspace(c);
+ })
+ .base(),
+ s.end());
+}
+
+/**
+ * @brief trim from both ends (in place)
+ * @param s - string to trim
+ */
+inline std::string& trim(std::string& s) {
+ ltrim(s);
+ rtrim(s);
+ return s;
+}
+/**
+ * @brief Gets filename without extension
+ * @param filepath - full file name
+ * @return filename without extension
+ */
+inline std::string fileNameNoExt(const std::string& filepath) {
+ auto pos = filepath.rfind('.');
+ if (pos == std::string::npos)
+ return filepath;
+ return filepath.substr(0, pos);
+}
+
+/**
+ * @brief Get extension from filename
+ * @param filename - name of the file which extension should be extracted
+ * @return string with extracted file extension
+ */
+inline std::string fileExt(const std::string& filename) {
+ auto pos = filename.rfind('.');
+ if (pos == std::string::npos)
+ return "";
+ return filename.substr(pos + 1);
+}
+
+inline slog::LogStream& operator<<(slog::LogStream& os, const ov::Version& version) {
+ os << "Build ................................. ";
+ os << version.buildNumber << slog::endl;
+
+ return os;
+}
+
+inline slog::LogStream& operator<<(slog::LogStream& os, const std::map<std::string, ov::Version>& versions) {
+ for (auto&& version : versions) {
+ os << version.first << slog::endl;
+ os << version.second << slog::endl;
+ }
+
+ return os;
+}
+
+/**
+ * @class Color
+ * @brief A Color class stores channels of a given color
+ */
+class Color {
+private:
+ unsigned char _r;
+ unsigned char _g;
+ unsigned char _b;
+
+public:
+ /**
+ * A default constructor.
+ * @param r - value for red channel
+ * @param g - value for green channel
+ * @param b - value for blue channel
+ */
+ Color(unsigned char r, unsigned char g, unsigned char b) : _r(r), _g(g), _b(b) {}
+
+ inline unsigned char red() {
+ return _r;
+ }
+
+ inline unsigned char blue() {
+ return _b;
+ }
+
+ inline unsigned char green() {
+ return _g;
+ }
+};
+
+// TODO : keep only one version of writeOutputBMP
+
+/**
+ * @brief Writes output data to image
+ * @param name - image name
+ * @param data - output data
+ * @param classesNum - the number of classes
+ * @return false if error else true
+ */
+static UNUSED void writeOutputBmp(std::vector<std::vector<size_t>> data, size_t classesNum, std::ostream& outFile) {
+ unsigned int seed = (unsigned int)time(NULL);
+ // Known colors for training classes from Cityscape dataset
+ static std::vector<Color> colors = {
+ {128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, {153, 153, 153},
+ {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152}, {180, 130, 70}, {60, 20, 220},
+ {0, 0, 255}, {142, 0, 0}, {70, 0, 0}, {100, 60, 0}, {90, 0, 0}, {230, 0, 0},
+ {32, 11, 119}, {0, 74, 111}, {81, 0, 81}};
+
+ while (classesNum > colors.size()) {
+ static std::mt19937 rng(seed);
+ std::uniform_int_distribution<int> dist(0, 255);
+ Color color(dist(rng), dist(rng), dist(rng));
+ colors.push_back(color);
+ }
+
+ unsigned char file[14] = {
+ 'B',
+ 'M', // magic
+ 0,
+ 0,
+ 0,
+ 0, // size in bytes
+ 0,
+ 0, // app data
+ 0,
+ 0, // app data
+ 40 + 14,
+ 0,
+ 0,
+ 0 // start of data offset
+ };
+ unsigned char info[40] = {
+ 40, 0, 0, 0, // info hd size
+ 0, 0, 0, 0, // width
+ 0, 0, 0, 0, // height
+ 1, 0, // number color planes
+ 24, 0, // bits per pixel
+ 0, 0, 0, 0, // compression is none
+ 0, 0, 0, 0, // image bits size
+ 0x13, 0x0B, 0, 0, // horz resolution in pixel / m
+ 0x13, 0x0B, 0, 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi)
+ 0, 0, 0, 0, // #colors in palette
+ 0, 0, 0, 0, // #important colors
+ };
+
+ auto height = data.size();
+ auto width = data.at(0).size();
+
+ OPENVINO_ASSERT(
+ height < (size_t)std::numeric_limits<int32_t>::max && width < (size_t)std::numeric_limits<int32_t>::max,
+ "File size is too big: ",
+ height,
+ " X ",
+ width);
+
+ int padSize = static_cast<int>(4 - (width * 3) % 4) % 4;
+ int sizeData = static_cast<int>(width * height * 3 + height * padSize);
+ int sizeAll = sizeData + sizeof(file) + sizeof(info);
+
+ file[2] = (unsigned char)(sizeAll);
+ file[3] = (unsigned char)(sizeAll >> 8);
+ file[4] = (unsigned char)(sizeAll >> 16);
+ file[5] = (unsigned char)(sizeAll >> 24);
+
+ info[4] = (unsigned char)(width);
+ info[5] = (unsigned char)(width >> 8);
+ info[6] = (unsigned char)(width >> 16);
+ info[7] = (unsigned char)(width >> 24);
+
+ int32_t negativeHeight = -(int32_t)height;
+ info[8] = (unsigned char)(negativeHeight);
+ info[9] = (unsigned char)(negativeHeight >> 8);
+ info[10] = (unsigned char)(negativeHeight >> 16);
+ info[11] = (unsigned char)(negativeHeight >> 24);
+
+ info[20] = (unsigned char)(sizeData);
+ info[21] = (unsigned char)(sizeData >> 8);
+ info[22] = (unsigned char)(sizeData >> 16);
+ info[23] = (unsigned char)(sizeData >> 24);
+
+ outFile.write(reinterpret_cast<char*>(file), sizeof(file));
+ outFile.write(reinterpret_cast<char*>(info), sizeof(info));
+
+ unsigned char pad[3] = {0, 0, 0};
+
+ for (size_t y = 0; y < height; y++) {
+ for (size_t x = 0; x < width; x++) {
+ unsigned char pixel[3];
+ size_t index = data.at(y).at(x);
+ pixel[0] = colors.at(index).red();
+ pixel[1] = colors.at(index).green();
+ pixel[2] = colors.at(index).blue();
+ outFile.write(reinterpret_cast<char*>(pixel), 3);
+ }
+ outFile.write(reinterpret_cast<char*>(pad), padSize);
+ }
+}
+
+/**
+ * @brief Writes output data to BMP image
+ * @param name - image name
+ * @param data - output data
+ * @param height - height of the target image
+ * @param width - width of the target image
+ * @return false if error else true
+ */
+static UNUSED bool writeOutputBmp(std::string name, unsigned char* data, size_t height, size_t width) {
+ std::ofstream outFile;
+ outFile.open(name, std::ofstream::binary);
+ if (!outFile.is_open()) {
+ return false;
+ }
+
+ unsigned char file[14] = {
+ 'B',
+ 'M', // magic
+ 0,
+ 0,
+ 0,
+ 0, // size in bytes
+ 0,
+ 0, // app data
+ 0,
+ 0, // app data
+ 40 + 14,
+ 0,
+ 0,
+ 0 // start of data offset
+ };
+ unsigned char info[40] = {
+ 40, 0, 0, 0, // info hd size
+ 0, 0, 0, 0, // width
+ 0, 0, 0, 0, // height
+ 1, 0, // number color planes
+ 24, 0, // bits per pixel
+ 0, 0, 0, 0, // compression is none
+ 0, 0, 0, 0, // image bits size
+ 0x13, 0x0B, 0, 0, // horz resolution in pixel / m
+ 0x13, 0x0B, 0, 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi)
+ 0, 0, 0, 0, // #colors in palette
+ 0, 0, 0, 0, // #important colors
+ };
+
+ OPENVINO_ASSERT(
+ height < (size_t)std::numeric_limits<int32_t>::max && width < (size_t)std::numeric_limits<int32_t>::max,
+ "File size is too big: ",
+ height,
+ " X ",
+ width);
+
+ int padSize = static_cast<int>(4 - (width * 3) % 4) % 4;
+ int sizeData = static_cast<int>(width * height * 3 + height * padSize);
+ int sizeAll = sizeData + sizeof(file) + sizeof(info);
+
+ file[2] = (unsigned char)(sizeAll);
+ file[3] = (unsigned char)(sizeAll >> 8);
+ file[4] = (unsigned char)(sizeAll >> 16);
+ file[5] = (unsigned char)(sizeAll >> 24);
+
+ info[4] = (unsigned char)(width);
+ info[5] = (unsigned char)(width >> 8);
+ info[6] = (unsigned char)(width >> 16);
+ info[7] = (unsigned char)(width >> 24);
+
+ int32_t negativeHeight = -(int32_t)height;
+ info[8] = (unsigned char)(negativeHeight);
+ info[9] = (unsigned char)(negativeHeight >> 8);
+ info[10] = (unsigned char)(negativeHeight >> 16);
+ info[11] = (unsigned char)(negativeHeight >> 24);
+
+ info[20] = (unsigned char)(sizeData);
+ info[21] = (unsigned char)(sizeData >> 8);
+ info[22] = (unsigned char)(sizeData >> 16);
+ info[23] = (unsigned char)(sizeData >> 24);
+
+ outFile.write(reinterpret_cast<char*>(file), sizeof(file));
+ outFile.write(reinterpret_cast<char*>(info), sizeof(info));
+
+ unsigned char pad[3] = {0, 0, 0};
+
+ for (size_t y = 0; y < height; y++) {
+ for (size_t x = 0; x < width; x++) {
+ unsigned char pixel[3];
+ pixel[0] = data[y * width * 3 + x * 3];
+ pixel[1] = data[y * width * 3 + x * 3 + 1];
+ pixel[2] = data[y * width * 3 + x * 3 + 2];
+
+ outFile.write(reinterpret_cast<char*>(pixel), 3);
+ }
+ outFile.write(reinterpret_cast<char*>(pad), padSize);
+ }
+ return true;
+}
+
+/**
+ * @brief Adds colored rectangles to the image
+ * @param data - data where rectangles are put
+ * @param height - height of the rectangle
+ * @param width - width of the rectangle
+ * @param rectangles - vector points for the rectangle, should be 4x compared to num classes
+ * @param classes - vector of classes
+ * @param thickness - thickness of a line (in pixels) to be used for bounding boxes
+ */
+static UNUSED void addRectangles(unsigned char* data,
+ size_t height,
+ size_t width,
+ std::vector<int> rectangles,
+ std::vector<int> classes,
+ int thickness = 1) {
+ std::vector<Color> colors = {// colors to be used for bounding boxes
+ {128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190},
+ {153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152},
+ {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {70, 0, 0},
+ {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111},
+ {81, 0, 81}};
+
+ if (rectangles.size() % 4 != 0 || rectangles.size() / 4 != classes.size()) {
+ return;
+ }
+
+ for (size_t i = 0; i < classes.size(); i++) {
+ int x = rectangles.at(i * 4);
+ int y = rectangles.at(i * 4 + 1);
+ int w = rectangles.at(i * 4 + 2);
+ int h = rectangles.at(i * 4 + 3);
+
+ int cls = classes.at(i) % colors.size(); // color of a bounding box line
+
+ if (x < 0)
+ x = 0;
+ if (y < 0)
+ y = 0;
+ if (w < 0)
+ w = 0;
+ if (h < 0)
+ h = 0;
+
+ if (static_cast<std::size_t>(x) >= width) {
+ x = static_cast<int>(width - 1);
+ w = 0;
+ thickness = 1;
+ }
+ if (static_cast<std::size_t>(y) >= height) {
+ y = static_cast<int>(height - 1);
+ h = 0;
+ thickness = 1;
+ }
+
+ if (static_cast<std::size_t>(x + w) >= width) {
+ w = static_cast<int>(width - x - 1);
+ }
+ if (static_cast<std::size_t>(y + h) >= height) {
+ h = static_cast<int>(height - y - 1);
+ }
+
+ thickness = std::min(std::min(thickness, w / 2 + 1), h / 2 + 1);
+
+ size_t shift_first;
+ size_t shift_second;
+ for (int t = 0; t < thickness; t++) {
+ shift_first = (y + t) * width * 3;
+ shift_second = (y + h - t) * width * 3;
+ for (int ii = x; ii < x + w + 1; ii++) {
+ data[shift_first + ii * 3] = colors.at(cls).red();
+ data[shift_first + ii * 3 + 1] = colors.at(cls).green();
+ data[shift_first + ii * 3 + 2] = colors.at(cls).blue();
+ data[shift_second + ii * 3] = colors.at(cls).red();
+ data[shift_second + ii * 3 + 1] = colors.at(cls).green();
+ data[shift_second + ii * 3 + 2] = colors.at(cls).blue();
+ }
+ }
+
+ for (int t = 0; t < thickness; t++) {
+ shift_first = (x + t) * 3;
+ shift_second = (x + w - t) * 3;
+ for (int ii = y; ii < y + h + 1; ii++) {
+ data[shift_first + ii * width * 3] = colors.at(cls).red();
+ data[shift_first + ii * width * 3 + 1] = colors.at(cls).green();
+ data[shift_first + ii * width * 3 + 2] = colors.at(cls).blue();
+ data[shift_second + ii * width * 3] = colors.at(cls).red();
+ data[shift_second + ii * width * 3 + 1] = colors.at(cls).green();
+ data[shift_second + ii * width * 3 + 2] = colors.at(cls).blue();
+ }
+ }
+ }
+}
+
+// DLA PATCH BEGIN - Re-implement functions needed for dla_benchmark that was removed from OPENVINO 2022.3.0
+inline std::size_t getTensorWidth(const InferenceEngine::TensorDesc& desc) {
+ const auto& layout = desc.getLayout();
+ const auto& dims = desc.getDims();
+ const auto& size = dims.size();
+ if ((size >= 2) && (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW ||
+ layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::OIHW || layout == InferenceEngine::Layout::GOIHW ||
+ layout == InferenceEngine::Layout::OIDHW || layout == InferenceEngine::Layout::GOIDHW || layout == InferenceEngine::Layout::CHW ||
+ layout == InferenceEngine::Layout::HW)) {
+ // Regardless of layout, dimensions are stored in fixed order
+ return dims.back();
+ } else {
+ IE_THROW() << "Tensor does not have width dimension";
+ }
+ return 0;
+}
+
+inline std::size_t getTensorHeight(const InferenceEngine::TensorDesc& desc) {
+ const auto& layout = desc.getLayout();
+ const auto& dims = desc.getDims();
+ const auto& size = dims.size();
+ if ((size >= 2) && (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW ||
+ layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::OIHW || layout == InferenceEngine::Layout::GOIHW ||
+ layout == InferenceEngine::Layout::OIDHW || layout == InferenceEngine::Layout::GOIDHW || layout == InferenceEngine::Layout::CHW ||
+ layout == InferenceEngine::Layout::HW)) {
+ // Regardless of layout, dimensions are stored in fixed order
+ return dims.at(size - 2);
+ } else {
+ IE_THROW() << "Tensor does not have height dimension";
+ }
+ return 0;
+}
+
+inline std::size_t getTensorChannels(const InferenceEngine::TensorDesc& desc) {
+ const auto& layout = desc.getLayout();
+ if (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW ||
+ layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::C || layout == InferenceEngine::Layout::CHW ||
+ layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::CN) {
+ // Regardless of layout, dimensions are stored in fixed order
+ const auto& dims = desc.getDims();
+ switch (desc.getLayoutByDims(dims)) {
+ case InferenceEngine::Layout::C:
+ return dims.at(0);
+ case InferenceEngine::Layout::NC:
+ return dims.at(1);
+ case InferenceEngine::Layout::CHW:
+ return dims.at(0);
+ case InferenceEngine::Layout::NCHW:
+ return dims.at(1);
+ case InferenceEngine::Layout::NCDHW:
+ return dims.at(1);
+ case InferenceEngine::Layout::SCALAR: // [[fallthrough]]
+ case InferenceEngine::Layout::BLOCKED: // [[fallthrough]]
+ default:
+ IE_THROW() << "Tensor does not have channels dimension";
+ }
+ } else {
+ IE_THROW() << "Tensor does not have channels dimension";
+ }
+ return 0;
+}
+
+inline std::size_t getTensorBatch(const InferenceEngine::TensorDesc& desc) {
+ const auto& layout = desc.getLayout();
+ if (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW ||
+ layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::CN) {
+ // Regardless of layout, dimensions are stored in fixed order
+ const auto& dims = desc.getDims();
+ switch (desc.getLayoutByDims(dims)) {
+ case InferenceEngine::Layout::NC:
+ return dims.at(0);
+ case InferenceEngine::Layout::NCHW:
+ return dims.at(0);
+ case InferenceEngine::Layout::NCDHW:
+ return dims.at(0);
+ case InferenceEngine::Layout::CHW: // [[fallthrough]]
+ case InferenceEngine::Layout::C: // [[fallthrough]]
+ case InferenceEngine::Layout::SCALAR: // [[fallthrough]]
+ case InferenceEngine::Layout::BLOCKED: // [[fallthrough]]
+ default:
+ IE_THROW() << "Tensor does not have channels dimension";
+ }
+ } else {
+ IE_THROW() << "Tensor does not have channels dimension";
+ }
+ return 0;
+}
+
+// DLA PATCH END
+
+/**
+ * Write output data to image
+ * \param name - image name
+ * \param data - output data
+ * \param classesNum - the number of classes
+ * \return false if error else true
+ */
+
+static UNUSED bool writeOutputBmp(unsigned char* data, size_t height, size_t width, std::ostream& outFile) {
+ unsigned char file[14] = {
+ 'B',
+ 'M', // magic
+ 0,
+ 0,
+ 0,
+ 0, // size in bytes
+ 0,
+ 0, // app data
+ 0,
+ 0, // app data
+ 40 + 14,
+ 0,
+ 0,
+ 0 // start of data offset
+ };
+ unsigned char info[40] = {
+ 40, 0, 0, 0, // info hd size
+ 0, 0, 0, 0, // width
+ 0, 0, 0, 0, // height
+ 1, 0, // number color planes
+ 24, 0, // bits per pixel
+ 0, 0, 0, 0, // compression is none
+ 0, 0, 0, 0, // image bits size
+ 0x13, 0x0B, 0, 0, // horz resolution in pixel / m
+ 0x13, 0x0B, 0, 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 dpi)
+ 0, 0, 0, 0, // #colors in palette
+ 0, 0, 0, 0, // #important colors
+ };
+
+ OPENVINO_ASSERT(
+ height < (size_t)std::numeric_limits<int32_t>::max && width < (size_t)std::numeric_limits<int32_t>::max,
+ "File size is too big: ",
+ height,
+ " X ",
+ width);
+
+ int padSize = static_cast<int>(4 - (width * 3) % 4) % 4;
+ int sizeData = static_cast<int>(width * height * 3 + height * padSize);
+ int sizeAll = sizeData + sizeof(file) + sizeof(info);
+
+ file[2] = (unsigned char)(sizeAll);
+ file[3] = (unsigned char)(sizeAll >> 8);
+ file[4] = (unsigned char)(sizeAll >> 16);
+ file[5] = (unsigned char)(sizeAll >> 24);
+
+ info[4] = (unsigned char)(width);
+ info[5] = (unsigned char)(width >> 8);
+ info[6] = (unsigned char)(width >> 16);
+ info[7] = (unsigned char)(width >> 24);
+
+ int32_t negativeHeight = -(int32_t)height;
+ info[8] = (unsigned char)(negativeHeight);
+ info[9] = (unsigned char)(negativeHeight >> 8);
+ info[10] = (unsigned char)(negativeHeight >> 16);
+ info[11] = (unsigned char)(negativeHeight >> 24);
+
+ info[20] = (unsigned char)(sizeData);
+ info[21] = (unsigned char)(sizeData >> 8);
+ info[22] = (unsigned char)(sizeData >> 16);
+ info[23] = (unsigned char)(sizeData >> 24);
+
+ outFile.write(reinterpret_cast<char*>(file), sizeof(file));
+ outFile.write(reinterpret_cast<char*>(info), sizeof(info));
+
+ unsigned char pad[3] = {0, 0, 0};
+
+ for (size_t y = 0; y < height; y++) {
+ for (size_t x = 0; x < width; x++) {
+ unsigned char pixel[3];
+ pixel[0] = data[y * width * 3 + x * 3];
+ pixel[1] = data[y * width * 3 + x * 3 + 1];
+ pixel[2] = data[y * width * 3 + x * 3 + 2];
+ outFile.write(reinterpret_cast<char*>(pixel), 3);
+ }
+ outFile.write(reinterpret_cast<char*>(pad), padSize);
+ }
+
+ return true;
+}
+
+static UNUSED void printPerformanceCounts(const std::map<std::string, ov::ProfilingInfo>& performanceMap,
+ std::ostream& stream,
+ std::string deviceName,
+ bool bshowHeader = true) {
+ std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
+ // Print performance counts
+ if (bshowHeader) {
+ stream << std::endl << "performance counts:" << std::endl << std::endl;
+ }
+ std::ios::fmtflags fmt(std::cout.flags());
+
+ for (const auto& it : performanceMap) {
+ std::string toPrint(it.first);
+ const int maxLayerName = 30;
+
+ if (it.first.length() >= maxLayerName) {
+ toPrint = it.first.substr(0, maxLayerName - 4);
+ toPrint += "...";
+ }
+
+ stream << std::setw(maxLayerName) << std::left << toPrint;
+ switch (it.second.status) {
+ case ov::ProfilingInfo::Status::EXECUTED:
+ stream << std::setw(15) << std::left << "EXECUTED";
+ break;
+ case ov::ProfilingInfo::Status::NOT_RUN:
+ stream << std::setw(15) << std::left << "NOT_RUN";
+ break;
+ case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
+ stream << std::setw(15) << std::left << "OPTIMIZED_OUT";
+ break;
+ }
+ stream << std::setw(30) << std::left << "layerType: " + std::string(it.second.node_type) + " ";
+ stream << std::setw(20) << std::left << "realTime: " + std::to_string(it.second.real_time.count());
+ stream << std::setw(20) << std::left << "cpu: " + std::to_string(it.second.cpu_time.count());
+ stream << " execType: " << it.second.exec_type << std::endl;
+ if (it.second.real_time.count() > 0) {
+ totalTime += it.second.real_time;
+ }
+ }
+ stream << std::setw(20) << std::left << "Total time: " + std::to_string(totalTime.count()) << " microseconds"
+ << std::endl;
+ std::cout << std::endl;
+ std::cout << "Full device name: " << deviceName << std::endl;
+ std::cout << std::endl;
+ std::cout.flags(fmt);
+}
+
+/**
+ * @brief This class represents an object that is found by an object detection net
+ */
+class DetectedObject {
+public:
+ int objectType;
+ float xmin, xmax, ymin, ymax, prob;
+ bool difficult;
+
+ DetectedObject(int _objectType,
+ float _xmin,
+ float _ymin,
+ float _xmax,
+ float _ymax,
+ float _prob,
+ bool _difficult = false)
+ : objectType(_objectType),
+ xmin(_xmin),
+ xmax(_xmax),
+ ymin(_ymin),
+ ymax(_ymax),
+ prob(_prob),
+ difficult(_difficult) {}
+
+ DetectedObject(const DetectedObject& other) = default;
+
+ static float ioU(const DetectedObject& detectedObject1_, const DetectedObject& detectedObject2_) {
+ // Add small space to eliminate empty squares
+ float epsilon = 0; // 1e-5f;
+
+ DetectedObject detectedObject1(detectedObject1_.objectType,
+ (detectedObject1_.xmin - epsilon),
+ (detectedObject1_.ymin - epsilon),
+ (detectedObject1_.xmax - epsilon),
+ (detectedObject1_.ymax - epsilon),
+ detectedObject1_.prob);
+ DetectedObject detectedObject2(detectedObject2_.objectType,
+ (detectedObject2_.xmin + epsilon),
+ (detectedObject2_.ymin + epsilon),
+ (detectedObject2_.xmax),
+ (detectedObject2_.ymax),
+ detectedObject2_.prob);
+
+ if (detectedObject1.objectType != detectedObject2.objectType) {
+ // objects are different, so the result is 0
+ return 0.0f;
+ }
+
+ if (detectedObject1.xmax < detectedObject1.xmin)
+ return 0.0;
+ if (detectedObject1.ymax < detectedObject1.ymin)
+ return 0.0;
+ if (detectedObject2.xmax < detectedObject2.xmin)
+ return 0.0;
+ if (detectedObject2.ymax < detectedObject2.ymin)
+ return 0.0;
+
+ float xmin = (std::max)(detectedObject1.xmin, detectedObject2.xmin);
+ float ymin = (std::max)(detectedObject1.ymin, detectedObject2.ymin);
+ float xmax = (std::min)(detectedObject1.xmax, detectedObject2.xmax);
+ float ymax = (std::min)(detectedObject1.ymax, detectedObject2.ymax);
+
+ // Caffe adds 1 to every length if the box isn't normalized. So do we...
+ float addendum;
+ if (xmax > 1 || ymax > 1)
+ addendum = 1;
+ else
+ addendum = 0;
+
+ // intersection
+ float intr;
+ if ((xmax >= xmin) && (ymax >= ymin)) {
+ intr = (addendum + xmax - xmin) * (addendum + ymax - ymin);
+ } else {
+ intr = 0.0f;
+ }
+
+ // union
+ float square1 = (addendum + detectedObject1.xmax - detectedObject1.xmin) *
+ (addendum + detectedObject1.ymax - detectedObject1.ymin);
+ float square2 = (addendum + detectedObject2.xmax - detectedObject2.xmin) *
+ (addendum + detectedObject2.ymax - detectedObject2.ymin);
+
+ float unn = square1 + square2 - intr;
+
+ return static_cast<float>(intr) / unn;
+ }
+
+ DetectedObject scale(float scale_x, float scale_y) const {
+ return DetectedObject(objectType,
+ xmin * scale_x,
+ ymin * scale_y,
+ xmax * scale_x,
+ ymax * scale_y,
+ prob,
+ difficult);
+ }
+};
+
+class ImageDescription {
+public:
+ const std::list<DetectedObject> alist;
+ const bool check_probs;
+
+ explicit ImageDescription(const std::list<DetectedObject>& _alist, bool _check_probs = false)
+ : alist(_alist),
+ check_probs(_check_probs) {}
+
+ static float ioUMultiple(const ImageDescription& detectedObjects, const ImageDescription& desiredObjects) {
+ const ImageDescription *detectedObjectsSmall, *detectedObjectsBig;
+ bool check_probs = desiredObjects.check_probs;
+
+ if (detectedObjects.alist.size() < desiredObjects.alist.size()) {
+ detectedObjectsSmall = &detectedObjects;
+ detectedObjectsBig = &desiredObjects;
+ } else {
+ detectedObjectsSmall = &desiredObjects;
+ detectedObjectsBig = &detectedObjects;
+ }
+
+ std::list<DetectedObject> doS = detectedObjectsSmall->alist;
+ std::list<DetectedObject> doB = detectedObjectsBig->alist;
+
+ float fullScore = 0.0f;
+ while (doS.size() > 0) {
+ float score = 0.0f;
+ std::list<DetectedObject>::iterator bestJ = doB.end();
+ for (auto j = doB.begin(); j != doB.end(); j++) {
+ float curscore = DetectedObject::ioU(*doS.begin(), *j);
+ if (score < curscore) {
+ score = curscore;
+ bestJ = j;
+ }
+ }
+
+ float coeff = 1.0;
+ if (check_probs) {
+ if (bestJ != doB.end()) {
+ float mn = std::min((*bestJ).prob, (*doS.begin()).prob);
+ float mx = std::max((*bestJ).prob, (*doS.begin()).prob);
+
+ coeff = mn / mx;
+ }
+ }
+
+ doS.pop_front();
+ if (bestJ != doB.end())
+ doB.erase(bestJ);
+ fullScore += coeff * score;
+ }
+ fullScore /= detectedObjectsBig->alist.size();
+
+ return fullScore;
+ }
+
+ ImageDescription scale(float scale_x, float scale_y) const {
+ std::list<DetectedObject> slist;
+ for (auto& dob : alist) {
+ slist.push_back(dob.scale(scale_x, scale_y));
+ }
+ return ImageDescription(slist, check_probs);
+ }
+};
+
+struct AveragePrecisionCalculator {
+private:
+ enum MatchKind { TruePositive, FalsePositive };
+
+ /**
+ * Here we count all TP and FP matches for all the classes in all the images.
+ */
+ std::map<int, std::vector<std::pair<double, MatchKind>>> matches;
+
+ std::map<int, int> N;
+
+ double threshold;
+
+ static bool SortBBoxDescend(const DetectedObject& bbox1, const DetectedObject& bbox2) {
+ return bbox1.prob > bbox2.prob;
+ }
+
+ static bool SortPairDescend(const std::pair<double, MatchKind>& p1, const std::pair<double, MatchKind>& p2) {
+ return p1.first > p2.first;
+ }
+
+public:
+ explicit AveragePrecisionCalculator(double _threshold) : threshold(_threshold) {}
+
+ // gt_bboxes -> des
+ // bboxes -> det
+
+ void consumeImage(const ImageDescription& detectedObjects, const ImageDescription& desiredObjects) {
+ // Collecting IoU values
+ std::vector<bool> visited(desiredObjects.alist.size(), false);
+ std::vector<DetectedObject> bboxes{std::begin(detectedObjects.alist), std::end(detectedObjects.alist)};
+ std::sort(bboxes.begin(), bboxes.end(), SortBBoxDescend);
+
+ for (auto&& detObj : bboxes) {
+ // Searching for the best match to this detection
+ // Searching for desired object
+ float overlap_max = -1;
+ int jmax = -1;
+ auto desmax = desiredObjects.alist.end();
+
+ int j = 0;
+ for (auto desObj = desiredObjects.alist.begin(); desObj != desiredObjects.alist.end(); desObj++, j++) {
+ double iou = DetectedObject::ioU(detObj, *desObj);
+ if (iou > overlap_max) {
+ overlap_max = static_cast<float>(iou);
+ jmax = j;
+ desmax = desObj;
+ }
+ }
+
+ MatchKind mk;
+ if (overlap_max >= threshold) {
+ if (!desmax->difficult) {
+ if (!visited[jmax]) {
+ mk = TruePositive;
+ visited[jmax] = true;
+ } else {
+ mk = FalsePositive;
+ }
+ matches[detObj.objectType].push_back(std::make_pair(detObj.prob, mk));
+ }
+ } else {
+ mk = FalsePositive;
+ matches[detObj.objectType].push_back(std::make_pair(detObj.prob, mk));
+ }
+ }
+
+ for (auto desObj = desiredObjects.alist.begin(); desObj != desiredObjects.alist.end(); desObj++) {
+ if (!desObj->difficult) {
+ N[desObj->objectType]++;
+ }
+ }
+ }
+
+ std::map<int, double> calculateAveragePrecisionPerClass() const {
+ /**
+ * Precision-to-TP curve per class (a variation of precision-to-recall curve without
+ * dividing into N)
+ */
+ std::map<int, std::map<int, double>> precisionToTP;
+
+ std::map<int, double> res;
+
+ for (auto m : matches) {
+ // Sorting
+ std::sort(m.second.begin(), m.second.end(), SortPairDescend);
+
+ int clazz = m.first;
+ int TP = 0, FP = 0;
+
+ std::vector<double> prec;
+ std::vector<double> rec;
+
+ for (auto mm : m.second) {
+ // Here we are descending in a probability value
+ MatchKind mk = mm.second;
+ if (mk == TruePositive)
+ TP++;
+ else if (mk == FalsePositive)
+ FP++;
+
+ double precision = static_cast<double>(TP) / (TP + FP);
+ double recall = 0;
+ if (N.find(clazz) != N.end()) {
+ recall = static_cast<double>(TP) / N.at(clazz);
+ }
+
+ prec.push_back(precision);
+ rec.push_back(recall);
+ }
+
+ int num = static_cast<int>(rec.size());
+
+ // 11point from Caffe
+ double ap = 0;
+ std::vector<float> max_precs(11, 0.);
+ int start_idx = num - 1;
+ for (int j = 10; j >= 0; --j) {
+ for (int i = start_idx; i >= 0; --i) {
+ if (rec[i] < j / 10.) {
+ start_idx = i;
+ if (j > 0) {
+ max_precs[j - 1] = max_precs[j];
+ }
+ break;
+ } else {
+ if (max_precs[j] < prec[i]) {
+ max_precs[j] = static_cast<float>(prec[i]);
+ }
+ }
+ }
+ }
+ for (int j = 10; j >= 0; --j) {
+ ap += max_precs[j] / 11;
+ }
+ res[clazz] = ap;
+ }
+
+ return res;
+ }
+};
+
+/**
+ * @brief Adds colored rectangles to the image
+ * @param data - data where rectangles are put
+ * @param height - height of the rectangle
+ * @param width - width of the rectangle
+ * @param detectedObjects - vector of detected objects
+ */
+static UNUSED void addRectangles(unsigned char* data,
+ size_t height,
+ size_t width,
+ std::vector<DetectedObject> detectedObjects) {
+ std::vector<Color> colors = {{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190},
+ {153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152},
+ {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {70, 0, 0},
+ {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111},
+ {81, 0, 81}};
+
+ for (size_t i = 0; i < detectedObjects.size(); i++) {
+ int cls = detectedObjects[i].objectType % colors.size();
+
+ int xmin = static_cast<int>(detectedObjects[i].xmin * width);
+ int xmax = static_cast<int>(detectedObjects[i].xmax * width);
+ int ymin = static_cast<int>(detectedObjects[i].ymin * height);
+ int ymax = static_cast<int>(detectedObjects[i].ymax * height);
+
+ size_t shift_first = ymin * width * 3;
+ size_t shift_second = ymax * width * 3;
+ for (int x = xmin; x < xmax; x++) {
+ data[shift_first + x * 3] = colors.at(cls).red();
+ data[shift_first + x * 3 + 1] = colors.at(cls).green();
+ data[shift_first + x * 3 + 2] = colors.at(cls).blue();
+ data[shift_second + x * 3] = colors.at(cls).red();
+ data[shift_second + x * 3 + 1] = colors.at(cls).green();
+ data[shift_second + x * 3 + 2] = colors.at(cls).blue();
+ }
+
+ shift_first = xmin * 3;
+ shift_second = xmax * 3;
+ for (int y = ymin; y < ymax; y++) {
+ data[shift_first + y * width * 3] = colors.at(cls).red();
+ data[shift_first + y * width * 3 + 1] = colors.at(cls).green();
+ data[shift_first + y * width * 3 + 2] = colors.at(cls).blue();
+ data[shift_second + y * width * 3] = colors.at(cls).red();
+ data[shift_second + y * width * 3 + 1] = colors.at(cls).green();
+ data[shift_second + y * width * 3 + 2] = colors.at(cls).blue();
+ }
+ }
+}
+
+inline void showAvailableDevices() {
+ ov::Core core;
+ std::vector<std::string> devices = core.get_available_devices();
+
+ std::cout << std::endl;
+ std::cout << "Available target devices:";
+ for (const auto& device : devices) {
+ std::cout << " " << device;
+ }
+ std::cout << std::endl;
+}
+
+/**
+ * @brief Parse text config file. The file must have the following format (with space a delimeter):
+ * CONFIG_NAME1 CONFIG_VALUE1
+ * CONFIG_NAME2 CONFIG_VALUE2
+ *
+ * @param configName - filename for a file with config options
+ * @param comment - lines starting with symbol `comment` are skipped
+ */
+std::map<std::string, std::string> parseConfig(const std::string& configName, char comment = '#');
+
+inline std::string getFullDeviceName(ov::Core& core, std::string device) {
+ try {
+ return core.get_property(device, ov::device::full_name);
+ } catch (ov::Exception&) {
+ return {};
+ }
+}
+
+static UNUSED void printPerformanceCounts(std::vector<ov::ProfilingInfo> performanceData,
+ std::ostream& stream,
+ std::string deviceName,
+ bool bshowHeader = true) {
+ std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
+ // Print performance counts
+ if (bshowHeader) {
+ stream << std::endl << "performance counts:" << std::endl << std::endl;
+ }
+ std::ios::fmtflags fmt(std::cout.flags());
+ for (const auto& it : performanceData) {
+ std::string toPrint(it.node_name);
+ const int maxLayerName = 30;
+
+ if (it.node_name.length() >= maxLayerName) {
+ toPrint = it.node_name.substr(0, maxLayerName - 5);
+ toPrint += "...";
+ }
+
+ stream << std::setw(maxLayerName) << std::left << toPrint << " ";
+ switch (it.status) {
+ case ov::ProfilingInfo::Status::EXECUTED:
+ stream << std::setw(15) << std::left << "EXECUTED ";
+ break;
+ case ov::ProfilingInfo::Status::NOT_RUN:
+ stream << std::setw(15) << std::left << "NOT_RUN ";
+ break;
+ case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
+ stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
+ break;
+ }
+ stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
+ stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
+ stream << std::setw(25) << std::left << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
+ stream << std::setw(25) << std::left << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
+ stream << std::endl;
+ if (it.real_time.count() > 0) {
+ totalTime += it.real_time;
+ }
+ }
+ stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
+ << " milliseconds" << std::endl;
+ std::cout << std::endl;
+ std::cout << "Full device name: " << deviceName << std::endl;
+ std::cout << std::endl;
+ std::cout.flags(fmt);
+}
+
+static UNUSED void printPerformanceCounts(ov::InferRequest request,
+ std::ostream& stream,
+ std::string deviceName,
+ bool bshowHeader = true) {
+ auto performanceMap = request.get_profiling_info();
+ printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader);
+}
+
+static inline std::string double_to_string(const double number) {
+ std::stringstream ss;
+ ss << std::fixed << std::setprecision(2) << number;
+ return ss.str();
+}
+
+template <typename T>
+using uniformDistribution = typename std::conditional<
+ std::is_floating_point<T>::value,
+ std::uniform_real_distribution<T>,
+ typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
+
+template <typename T, typename T2>
+static inline void fill_random(ov::Tensor& tensor,
+ T rand_min = std::numeric_limits<uint8_t>::min(),
+ T rand_max = std::numeric_limits<uint8_t>::max()) {
+ std::mt19937 gen(0);
+ size_t tensor_size = tensor.get_size();
+ if (0 == tensor_size) {
+ throw std::runtime_error(
+ "Models with dynamic shapes aren't supported. Input tensors must have specific shapes before inference");
+ }
+ T* data = tensor.data<T>();
+ uniformDistribution<T2> distribution(rand_min, rand_max);
+ for (size_t i = 0; i < tensor_size; i++) {
+ data[i] = static_cast<T>(distribution(gen));
+ }
+}
+
+static inline void fill_tensor_random(ov::Tensor tensor) {
+ switch (tensor.get_element_type()) {
+ case ov::element::f32:
+ fill_random<float, float>(tensor);
+ break;
+ case ov::element::f64:
+ fill_random<double, double>(tensor);
+ break;
+ case ov::element::f16:
+ fill_random<short, short>(tensor);
+ break;
+ case ov::element::i32:
+ fill_random<int32_t, int32_t>(tensor);
+ break;
+ case ov::element::i64:
+ fill_random<int64_t, int64_t>(tensor);
+ break;
+ case ov::element::u8:
+ // uniform_int_distribution<uint8_t> is not allowed in the C++17
+ // standard and vs2017/19
+ fill_random<uint8_t, uint32_t>(tensor);
+ break;
+ case ov::element::i8:
+ // uniform_int_distribution<int8_t> is not allowed in the C++17 standard
+ // and vs2017/19
+ fill_random<int8_t, int32_t>(tensor, std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
+ break;
+ case ov::element::u16:
+ fill_random<uint16_t, uint16_t>(tensor);
+ break;
+ case ov::element::i16:
+ fill_random<int16_t, int16_t>(tensor);
+ break;
+ case ov::element::boolean:
+ fill_random<uint8_t, uint32_t>(tensor, 0, 1);
+ break;
+ default:
+ throw ov::Exception("Input type is not supported for a tensor");
+ }
+}
+
+static UNUSED void printPerformanceCountsNoSort(std::vector<ov::ProfilingInfo> performanceData,
+ std::ostream& stream,
+ std::string deviceName,
+ bool bshowHeader = true) {
+ std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
+ // Print performance counts
+ if (bshowHeader) {
+ stream << std::endl << "performance counts:" << std::endl << std::endl;
+ }
+ std::ios::fmtflags fmt(std::cout.flags());
+
+ for (const auto& it : performanceData) {
+ if (it.real_time.count() > 0) {
+ totalTime += it.real_time;
+ }
+ }
+ if (totalTime.count() != 0) {
+ for (const auto& it : performanceData) {
+ std::string toPrint(it.node_name);
+ const int maxLayerName = 30;
+
+ if (it.node_name.length() >= maxLayerName) {
+ toPrint = it.node_name.substr(0, maxLayerName - 5);
+ toPrint += "...";
+ }
+
+ stream << std::setw(maxLayerName) << std::left << toPrint << " ";
+ switch (it.status) {
+ case ov::ProfilingInfo::Status::EXECUTED:
+ stream << std::setw(15) << std::left << "EXECUTED ";
+ break;
+ case ov::ProfilingInfo::Status::NOT_RUN:
+ stream << std::setw(15) << std::left << "NOT_RUN ";
+ break;
+ case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
+ stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
+ break;
+ }
+ stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
+ stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
+ stream << std::setw(25) << std::left
+ << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
+ stream << std::setw(25) << std::left
+ << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
+
+ double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
+ std::stringstream opt_proportion_ss;
+ opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
+ std::string opt_proportion_str = opt_proportion_ss.str();
+ if (opt_proportion_str == "0.00") {
+ opt_proportion_str = "N/A";
+ }
+ stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
+
+ stream << std::endl;
+ }
+ }
+ stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
+ << " milliseconds" << std::endl;
+ std::cout << std::endl;
+ std::cout << "Full device name: " << deviceName << std::endl;
+ std::cout << std::endl;
+ std::cout.flags(fmt);
+}
+
+static UNUSED bool sort_pc_descend(const ov::ProfilingInfo& profiling1, const ov::ProfilingInfo& profiling2) {
+ return profiling1.real_time > profiling2.real_time;
+}
+
+static UNUSED void printPerformanceCountsDescendSort(std::vector<ov::ProfilingInfo> performanceData,
+ std::ostream& stream,
+ std::string deviceName,
+ bool bshowHeader = true) {
+ std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
+ // Print performance counts
+ if (bshowHeader) {
+ stream << std::endl << "performance counts:" << std::endl << std::endl;
+ }
+ std::ios::fmtflags fmt(std::cout.flags());
+
+ for (const auto& it : performanceData) {
+ if (it.real_time.count() > 0) {
+ totalTime += it.real_time;
+ }
+ }
+ if (totalTime.count() != 0) {
+ // sort perfcounter
+ std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
+ std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
+
+ for (const auto& it : sortPerfCounts) {
+ std::string toPrint(it.node_name);
+ const int maxLayerName = 30;
+
+ if (it.node_name.length() >= maxLayerName) {
+ toPrint = it.node_name.substr(0, maxLayerName - 5);
+ toPrint += "...";
+ }
+
+ stream << std::setw(maxLayerName) << std::left << toPrint << " ";
+ switch (it.status) {
+ case ov::ProfilingInfo::Status::EXECUTED:
+ stream << std::setw(15) << std::left << "EXECUTED ";
+ break;
+ case ov::ProfilingInfo::Status::NOT_RUN:
+ stream << std::setw(15) << std::left << "NOT_RUN ";
+ break;
+ case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
+ stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
+ break;
+ }
+ stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
+ stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
+ stream << std::setw(25) << std::left
+ << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
+ stream << std::setw(25) << std::left
+ << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
+
+ double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
+ std::stringstream opt_proportion_ss;
+ opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
+ std::string opt_proportion_str = opt_proportion_ss.str();
+ if (opt_proportion_str == "0.00") {
+ opt_proportion_str = "N/A";
+ }
+ stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
+
+ stream << std::endl;
+ }
+ }
+ stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
+ << " milliseconds" << std::endl;
+ std::cout << std::endl;
+ std::cout << "Full device name: " << deviceName << std::endl;
+ std::cout << std::endl;
+ std::cout.flags(fmt);
+}
+
+static UNUSED void printPerformanceCountsSimpleSort(std::vector<ov::ProfilingInfo> performanceData,
+ std::ostream& stream,
+ std::string deviceName,
+ bool bshowHeader = true) {
+ std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
+ // Print performance counts
+ if (bshowHeader) {
+ stream << std::endl << "performance counts:" << std::endl << std::endl;
+ }
+ std::ios::fmtflags fmt(std::cout.flags());
+
+ for (const auto& it : performanceData) {
+ if (it.real_time.count() > 0) {
+ totalTime += it.real_time;
+ }
+ }
+ if (totalTime.count() != 0) {
+ // sort perfcounter
+ std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
+ std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
+
+ for (const auto& it : sortPerfCounts) {
+ if (it.status == ov::ProfilingInfo::Status::EXECUTED) {
+ std::string toPrint(it.node_name);
+ const int maxLayerName = 30;
+
+ if (it.node_name.length() >= maxLayerName) {
+ toPrint = it.node_name.substr(0, maxLayerName - 5);
+ toPrint += "...";
+ }
+
+ stream << std::setw(maxLayerName) << std::left << toPrint << " ";
+ stream << std::setw(15) << std::left << "EXECUTED ";
+ stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
+ stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
+ stream << std::setw(25) << std::left
+ << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
+ stream << std::setw(25) << std::left
+ << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
+
+ double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
+ std::stringstream opt_proportion_ss;
+ opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
+ std::string opt_proportion_str = opt_proportion_ss.str();
+ if (opt_proportion_str == "0.00") {
+ opt_proportion_str = "N/A";
+ }
+ stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
+
+ stream << std::endl;
+ }
+ }
+ }
+ stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
+ << " milliseconds" << std::endl;
+ std::cout << std::endl;
+ std::cout << "Full device name: " << deviceName << std::endl;
+ std::cout << std::endl;
+ std::cout.flags(fmt);
+}
+
+static UNUSED void printPerformanceCountsSort(std::vector<ov::ProfilingInfo> performanceData,
+ std::ostream& stream,
+ std::string deviceName,
+ std::string sorttype,
+ bool bshowHeader = true) {
+ if (sorttype == pcNoSort) {
+ printPerformanceCountsNoSort(performanceData, stream, deviceName, bshowHeader);
+ } else if (sorttype == pcSort) {
+ printPerformanceCountsDescendSort(performanceData, stream, deviceName, bshowHeader);
+ } else if (sorttype == pcSimpleSort) {
+ printPerformanceCountsSimpleSort(performanceData, stream, deviceName, bshowHeader);
+ }
+}
diff --git a/python/openvino/runtime/common/utils/include/samples/console_progress.hpp b/python/openvino/runtime/common/utils/include/samples/console_progress.hpp
new file mode 100644
index 0000000..f62aeed
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/console_progress.hpp
@@ -0,0 +1,107 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cstdio>
+#include <iomanip>
+#include <sstream>
+
+/**
+ * @class ConsoleProgress
+ * @brief A ConsoleProgress class provides functionality for printing progress dynamics
+ */
+class ConsoleProgress {
+ static const size_t DEFAULT_DETALIZATION = 20;
+ static const size_t DEFAULT_PERCENT_TO_UPDATE_PROGRESS = 1;
+
+ size_t total;
+ size_t cur_progress = 0;
+ size_t prev_progress = 0;
+ bool stream_output;
+ size_t detalization;
+ size_t percent_to_update;
+
+public:
+ /**
+ * @brief A constructor of ConsoleProgress class
+ * @param _total - maximum value that is correspondent to 100%
+ * @param _detalization - number of symbols(.) to use to represent progress
+ */
+ explicit ConsoleProgress(size_t _total,
+ bool _stream_output = false,
+ size_t _percent_to_update = DEFAULT_PERCENT_TO_UPDATE_PROGRESS,
+ size_t _detalization = DEFAULT_DETALIZATION)
+ : total(_total),
+ detalization(_detalization),
+ percent_to_update(_percent_to_update) {
+ stream_output = _stream_output;
+ if (total == 0) {
+ total = 1;
+ }
+ }
+
+ /**
+ * @brief Shows progress with current data. Progress is shown from the beginning of the current
+ * line.
+ */
+ void showProgress() const {
+ std::stringstream strm;
+ if (!stream_output) {
+ strm << '\r';
+ }
+ strm << "Progress: [";
+ size_t i = 0;
+ for (; i < detalization * cur_progress / total; i++) {
+ strm << ".";
+ }
+ for (; i < detalization; i++) {
+ strm << " ";
+ }
+ strm << "] " << std::setw(3) << 100 * cur_progress / total << "% done";
+ if (stream_output) {
+ strm << std::endl;
+ }
+ std::fputs(strm.str().c_str(), stdout);
+ std::fflush(stdout);
+ }
+
+ /**
+ * @brief Updates current value and progressbar
+ */
+ void updateProgress() {
+ if (cur_progress > total)
+ cur_progress = total;
+ size_t prev_percent = 100 * prev_progress / total;
+ size_t cur_percent = 100 * cur_progress / total;
+
+ if (prev_progress == 0 || cur_progress == total || prev_percent + percent_to_update <= cur_percent) {
+ showProgress();
+ prev_progress = cur_progress;
+ }
+ }
+
+ /**
+ * @brief Adds value to currently represented and redraw progressbar
+ * @param add - value to add
+ */
+ void addProgress(int add) {
+ if (add < 0 && -add > static_cast<int>(cur_progress)) {
+ add = -static_cast<int>(cur_progress);
+ }
+ cur_progress += add;
+ updateProgress();
+ }
+
+ /**
+ * @brief Output end line.
+ * @return
+ */
+ void finish() {
+ std::stringstream strm;
+ strm << std::endl;
+ std::fputs(strm.str().c_str(), stdout);
+ std::fflush(stdout);
+ }
+};
diff --git a/python/openvino/runtime/common/utils/include/samples/csv_dumper.hpp b/python/openvino/runtime/common/utils/include/samples/csv_dumper.hpp
new file mode 100644
index 0000000..5c80134
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/csv_dumper.hpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ctime>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+#include "samples/slog.hpp"
+
+/**
+ * @class CsvDumper
+ * @brief A CsvDumper class provides functionality for dumping the values in CSV files
+ */
+class CsvDumper {
+ std::ofstream file;
+ std::string filename;
+ bool canDump = true;
+ char delimiter = ';';
+
+ std::string generateFilename() {
+ std::stringstream filename;
+ filename << "dumpfile-";
+ filename << time(nullptr);
+ filename << ".csv";
+ return filename.str();
+ }
+
+public:
+ /**
+ * @brief A constructor. Disables dumping in case dump file cannot be created
+ * @param enabled - True if dumping is enabled by default.
+ * @param name - name of file to dump to. File won't be created if first parameter is false.
+ */
+ explicit CsvDumper(bool enabled = true, const std::string& name = "") : canDump(enabled) {
+ if (!canDump) {
+ return;
+ }
+ filename = (name == "" ? generateFilename() : name);
+ file.open(filename, std::ios::out);
+ if (!file) {
+ slog::warn << "Cannot create dump file! Disabling dump." << slog::endl;
+ canDump = false;
+ }
+ }
+
+ /**
+ * @brief Sets a delimiter to use in csv file
+ * @param c - Delimiter char
+ * @return
+ */
+ void setDelimiter(char c) {
+ delimiter = c;
+ }
+
+ /**
+ * @brief Overloads operator to organize streaming values to file. Does nothing if dumping is
+ * disabled Adds delimiter at the end of value provided
+ * @param add - value to add to dump
+ * @return reference to same object
+ */
+ template <class T>
+ CsvDumper& operator<<(const T& add) {
+ if (canDump) {
+ file << add << delimiter;
+ }
+ return *this;
+ }
+
+ /**
+ * @brief Finishes line in dump file. Does nothing if dumping is disabled
+ */
+ void endLine() {
+ if (canDump) {
+ file << "\n";
+ }
+ }
+
+ /**
+ * @brief Gets information if dump is enabled.
+ * @return true if dump is enabled and file was successfully created
+ */
+ bool dumpEnabled() {
+ return canDump;
+ }
+
+ /**
+ * @brief Gets name of a dump file
+ * @return name of a dump file
+ */
+ std::string getFilename() const {
+ return filename;
+ }
+};
diff --git a/python/openvino/runtime/common/utils/include/samples/latency_metrics.hpp b/python/openvino/runtime/common/utils/include/samples/latency_metrics.hpp
new file mode 100644
index 0000000..bca39d0
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/latency_metrics.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+// clang-format off
+#include "samples/common.hpp"
+#include "samples/slog.hpp"
+// clang-format on
+
+/// @brief Responsible for calculating different latency metrics
+class LatencyMetrics {
+public:
+ LatencyMetrics() {}
+
+ LatencyMetrics(const std::vector<double>& latencies,
+ const std::string& data_shape = "",
+ size_t percentile_boundary = 50)
+ : data_shape(data_shape),
+ percentile_boundary(percentile_boundary) {
+ fill_data(latencies, percentile_boundary);
+ }
+
+ void write_to_stream(std::ostream& stream) const;
+ void write_to_slog() const;
+
+ double median_or_percentile = 0;
+ double avg = 0;
+ double min = 0;
+ double max = 0;
+ std::string data_shape;
+
+private:
+ void fill_data(std::vector<double> latencies, size_t percentile_boundary);
+ size_t percentile_boundary = 50;
+};
diff --git a/python/openvino/runtime/common/utils/include/samples/ocv_common.hpp b/python/openvino/runtime/common/utils/include/samples/ocv_common.hpp
new file mode 100644
index 0000000..94f3b1f
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/ocv_common.hpp
@@ -0,0 +1,92 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with common samples functionality using OpenCV
+ * @file ocv_common.hpp
+ */
+
+#pragma once
+
+#include <opencv2/opencv.hpp>
+
+#include "openvino/openvino.hpp"
+#include "samples/common.hpp"
+
+/**
+ * @brief Sets image data stored in cv::Mat object to a given Blob object.
+ * @param orig_image - given cv::Mat object with an image data.
+ * @param blob - Blob object which to be filled by an image data.
+ * @param batchIndex - batch index of an image inside of the blob.
+ */
+template <typename T>
+void matU8ToBlob(const cv::Mat& orig_image, InferenceEngine::Blob::Ptr& blob, int batchIndex = 0) {
+ InferenceEngine::SizeVector blobSize = blob->getTensorDesc().getDims();
+ const size_t width = blobSize[3];
+ const size_t height = blobSize[2];
+ const size_t channels = blobSize[1];
+ InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
+ OPENVINO_ASSERT(mblob,
+ "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
+ "but by fact we were not able to cast inputBlob to MemoryBlob");
+ // locked memory holder should be alive all time while access to its buffer happens
+ auto mblobHolder = mblob->wmap();
+
+ T* blob_data = mblobHolder.as<T*>();
+
+ cv::Mat resized_image(orig_image);
+ if (static_cast<int>(width) != orig_image.size().width || static_cast<int>(height) != orig_image.size().height) {
+ cv::resize(orig_image, resized_image, cv::Size(width, height));
+ }
+
+ int batchOffset = batchIndex * width * height * channels;
+
+ for (size_t c = 0; c < channels; c++) {
+ for (size_t h = 0; h < height; h++) {
+ for (size_t w = 0; w < width; w++) {
+ blob_data[batchOffset + c * width * height + h * width + w] = resized_image.at<cv::Vec3b>(h, w)[c];
+ }
+ }
+ }
+}
+
+/**
+ * @brief Wraps data stored inside of a passed cv::Mat object by new Blob pointer.
+ * @note: No memory allocation is happened. The blob just points to already existing
+ * cv::Mat data.
+ * @param mat - given cv::Mat object with an image data.
+ * @return resulting Blob pointer.
+ */
+static UNUSED InferenceEngine::Blob::Ptr wrapMat2Blob(const cv::Mat& mat) {
+ size_t channels = mat.channels();
+ size_t height = mat.size().height;
+ size_t width = mat.size().width;
+
+ size_t strideH = mat.step.buf[0];
+ size_t strideW = mat.step.buf[1];
+
+ bool is_dense = strideW == channels && strideH == channels * width;
+
+ OPENVINO_ASSERT(is_dense, "Doesn't support conversion from not dense cv::Mat");
+
+ InferenceEngine::TensorDesc tDesc(InferenceEngine::Precision::U8,
+ {1, channels, height, width},
+ InferenceEngine::Layout::NHWC);
+
+ return InferenceEngine::make_shared_blob<uint8_t>(tDesc, mat.data);
+}
+
+static UNUSED ov::Tensor wrapMat2Tensor(const cv::Mat& mat) {
+ const size_t channels = mat.channels();
+ const size_t height = mat.size().height;
+ const size_t width = mat.size().width;
+
+ const size_t strideH = mat.step.buf[0];
+ const size_t strideW = mat.step.buf[1];
+
+ const bool is_dense = strideW == channels && strideH == channels * width;
+ OPENVINO_ASSERT(is_dense, "Doesn't support conversion from not dense cv::Mat");
+
+ return ov::Tensor(ov::element::u8, ov::Shape{1, height, width, channels}, mat.data);
+}
diff --git a/python/openvino/runtime/common/utils/include/samples/os/windows/w_dirent.h b/python/openvino/runtime/common/utils/include/samples/os/windows/w_dirent.h
new file mode 100644
index 0000000..40d1c5b
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/os/windows/w_dirent.h
@@ -0,0 +1,176 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if defined(_WIN32)
+
+# ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN_UNDEF
+# endif
+
+# ifndef NOMINMAX
+# define NOMINMAX
+# define NOMINMAX_UNDEF
+# endif
+
+# if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_)
+# define _X86_
+# endif
+
+# if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_)
+# define _AMD64_
+# endif
+
+# if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
+# define _ARM_
+# endif
+
+# if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
+# define _ARM64_
+# endif
+
+// clang-format off
+ #include <string.h>
+ #include <windef.h>
+ #include <fileapi.h>
+ #include <Winbase.h>
+ #include <sys/stat.h>
+// clang-format on
+
+// Copied from linux libc sys/stat.h:
+# define S_ISREG(m) (((m)&S_IFMT) == S_IFREG)
+# define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
+
+/// @brief structure to store directory names
+struct dirent {
+ char* d_name;
+
+ explicit dirent(const wchar_t* wsFilePath) {
+ size_t i;
+ auto slen = wcslen(wsFilePath);
+ d_name = static_cast<char*>(malloc(slen + 1));
+ wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen);
+ }
+ ~dirent() {
+ free(d_name);
+ }
+};
+
+/// @brief class to store directory data (files meta)
+class DIR {
+ WIN32_FIND_DATAA FindFileData;
+ HANDLE hFind;
+ dirent* next;
+
+ static inline bool endsWith(const std::string& src, const char* with) {
+ int wl = static_cast<int>(strlen(with));
+ int so = static_cast<int>(src.length()) - wl;
+ if (so < 0)
+ return false;
+ return 0 == strncmp(with, &src[so], wl);
+ }
+
+public:
+ DIR(const DIR& other) = delete;
+ DIR(DIR&& other) = delete;
+ DIR& operator=(const DIR& other) = delete;
+ DIR& operator=(DIR&& other) = delete;
+
+ explicit DIR(const char* dirPath) : next(nullptr) {
+ std::string ws = dirPath;
+ if (endsWith(ws, "\\"))
+ ws += "*";
+ else
+ ws += "\\*";
+ hFind = FindFirstFileA(ws.c_str(), &FindFileData);
+ FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE;
+ }
+
+ ~DIR() {
+ if (!next)
+ delete next;
+ next = nullptr;
+ FindClose(hFind);
+ }
+
+ /**
+ * @brief Check file handler is valid
+ * @return status True(success) or False(fail)
+ */
+ bool isValid() const {
+ return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0);
+ }
+
+ /**
+ * @brief Add directory to directory names struct
+ * @return pointer to directory names struct
+ */
+ dirent* nextEnt() {
+ if (next != nullptr)
+ delete next;
+ next = nullptr;
+
+ if (!FindFileData.dwReserved0)
+ return nullptr;
+
+ wchar_t wbuf[4096];
+
+ size_t outSize;
+ mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094);
+ next = new dirent(wbuf);
+ FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData);
+ return next;
+ }
+};
+
+/**
+ * @brief Create directory data struct element
+ * @param string directory path
+ * @return pointer to directory data struct element
+ */
+static DIR* opendir(const char* dirPath) {
+ auto dp = new DIR(dirPath);
+ if (!dp->isValid()) {
+ delete dp;
+ return nullptr;
+ }
+ return dp;
+}
+
+/**
+ * @brief Walk throw directory data struct
+ * @param pointer to directory data struct
+ * @return pointer to directory data struct next element
+ */
+static struct dirent* readdir(DIR* dp) {
+ return dp->nextEnt();
+}
+
+/**
+ * @brief Remove directory data struct
+ * @param pointer to struct directory data
+ * @return void
+ */
+static void closedir(DIR* dp) {
+ delete dp;
+}
+
+# ifdef WIN32_LEAN_AND_MEAN_UNDEF
+# undef WIN32_LEAN_AND_MEAN
+# undef WIN32_LEAN_AND_MEAN_UNDEF
+# endif
+
+# ifdef NOMINMAX_UNDEF
+# undef NOMINMAX_UNDEF
+# undef NOMINMAX
+# endif
+
+#else
+
+# include <dirent.h>
+# include <sys/types.h>
+
+#endif
diff --git a/python/openvino/runtime/common/utils/include/samples/slog.hpp b/python/openvino/runtime/common/utils/include/samples/slog.hpp
new file mode 100644
index 0000000..3f237e5
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/slog.hpp
@@ -0,0 +1,102 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with logging facility for common samples
+ * @file log.hpp
+ */
+
+#pragma once
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace slog {
+/**
+ * @class LogStreamEndLine
+ * @brief The LogStreamEndLine class implements an end line marker for a log stream
+ */
+class LogStreamEndLine {};
+
+static constexpr LogStreamEndLine endl;
+
+/**
+ * @class LogStreamBoolAlpha
+ * @brief The LogStreamBoolAlpha class implements bool printing for a log stream
+ */
+class LogStreamBoolAlpha {};
+
+static constexpr LogStreamBoolAlpha boolalpha;
+
+/**
+ * @class LogStreamFlush
+ * @brief The LogStreamFlush class implements flushing for a log stream
+ */
+class LogStreamFlush {};
+
+static constexpr LogStreamFlush flush;
+
+/**
+ * @class LogStream
+ * @brief The LogStream class implements a stream for sample logging
+ */
+class LogStream {
+ std::string _prefix;
+ std::ostream* _log_stream;
+ bool _new_line;
+
+public:
+ /**
+ * @brief A constructor. Creates an LogStream object
+ * @param prefix The prefix to print
+ */
+ LogStream(const std::string& prefix, std::ostream& log_stream);
+
+ /**
+ * @brief A stream output operator to be used within the logger
+ * @param arg Object for serialization in the logger message
+ */
+ template <class T>
+ LogStream& operator<<(const T& arg) {
+ if (_new_line) {
+ (*_log_stream) << "[ " << _prefix << " ] ";
+ _new_line = false;
+ }
+
+ (*_log_stream) << arg;
+ return *this;
+ }
+
+ /**
+ * @brief Overload output stream operator to print vectors in pretty form
+ * [value1, value2, ...]
+ */
+ template <typename T>
+ LogStream& operator<<(const std::vector<T>& v) {
+ (*_log_stream) << "[ ";
+
+ for (auto&& value : v)
+ (*_log_stream) << value << " ";
+
+ (*_log_stream) << "]";
+
+ return *this;
+ }
+
+ // Specializing for LogStreamEndLine to support slog::endl
+ LogStream& operator<<(const LogStreamEndLine&);
+
+ // Specializing for LogStreamBoolAlpha to support slog::boolalpha
+ LogStream& operator<<(const LogStreamBoolAlpha&);
+
+ // Specializing for LogStreamFlush to support slog::flush
+ LogStream& operator<<(const LogStreamFlush&);
+};
+
+extern LogStream info;
+extern LogStream warn;
+extern LogStream err;
+
+} // namespace slog
diff --git a/python/openvino/runtime/common/utils/include/samples/vpu/vpu_tools_common.hpp b/python/openvino/runtime/common/utils/include/samples/vpu/vpu_tools_common.hpp
new file mode 100644
index 0000000..ba0665f
--- /dev/null
+++ b/python/openvino/runtime/common/utils/include/samples/vpu/vpu_tools_common.hpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <fstream>
+#include <map>
+#include <string>
+
+static std::map<std::string, std::string> parseConfig(const std::string& configName, char comment = '#') {
+ std::map<std::string, std::string> config = {};
+
+ std::ifstream file(configName);
+ if (!file.is_open()) {
+ return config;
+ }
+
+ std::string key, value;
+ while (file >> key >> value) {
+ if (key.empty() || key[0] == comment) {
+ continue;
+ }
+ config[key] = value;
+ }
+
+ return config;
+}
diff --git a/python/openvino/runtime/common/utils/src/args_helper.cpp b/python/openvino/runtime/common/utils/src/args_helper.cpp
new file mode 100644
index 0000000..ae7fa67
--- /dev/null
+++ b/python/openvino/runtime/common/utils/src/args_helper.cpp
@@ -0,0 +1,390 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// clang-format off
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <iostream>
+
+#ifdef _WIN32
+# include "samples/os/windows/w_dirent.h"
+#else
+# include <dirent.h>
+# include <unistd.h>
+#endif
+
+#include "openvino/openvino.hpp"
+
+#include "gflags/gflags.h"
+#include "samples/args_helper.hpp"
+#include "samples/slog.hpp"
+// clang-format on
+
+/**
+ * @brief Checks input file argument and add it to files vector
+ * @param files reference to vector to store file names
+ * @param arg file or folder name
+ * @return none
+ */
+void readInputFilesArguments(std::vector<std::string>& files, const std::string& arg) {
+ struct stat sb;
+
+#if defined(_WIN32)
+ FILE* fd = fopen(arg.c_str(), "r");
+ if (!fd) {
+ slog::warn << "File " << arg << " cannot be opened!" << slog::endl;
+ return;
+ }
+
+ if (fstat(fileno(fd), &sb) != 0) {
+ fclose(fd);
+ slog::warn << "File " << arg << " cannot be opened!" << slog::endl;
+ return;
+ }
+ fclose(fd);
+#else
+ int fd = open(arg.c_str(), O_RDONLY);
+ if (fd == -1) {
+ slog::warn << "File " << arg << " cannot be opened!" << slog::endl;
+ return;
+ }
+
+ if (fstat(fd, &sb) != 0) {
+ close(fd);
+ slog::warn << "File " << arg << " cannot be opened!" << slog::endl;
+ return;
+ }
+ close(fd);
+#endif
+
+ if (S_ISDIR(sb.st_mode)) {
+ struct CloseDir {
+ void operator()(DIR* d) const noexcept {
+ if (d) {
+ closedir(d);
+ }
+ }
+ };
+ using Dir = std::unique_ptr<DIR, CloseDir>;
+ Dir dp(opendir(arg.c_str()));
+ if (dp == nullptr) {
+ slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl;
+ return;
+ }
+
+ struct dirent* ep;
+ while (nullptr != (ep = readdir(dp.get()))) {
+ std::string fileName = ep->d_name;
+ if (fileName == "." || fileName == "..")
+ continue;
+ files.push_back(arg + "/" + ep->d_name);
+ }
+ } else {
+ files.push_back(arg);
+ }
+}
+
+/**
+ * @brief This function find -i key in input args. It's necessary to process multiple values for
+ * single key
+ * @param files reference to vector
+ * @return none.
+ */
+void parseInputFilesArguments(std::vector<std::string>& files) {
+ std::vector<std::string> args = gflags::GetArgvs();
+ auto args_it = begin(args);
+ const auto is_image_arg = [](const std::string& s) {
+ return s == "-i" || s == "--images";
+ };
+ const auto is_arg = [](const std::string& s) {
+ return s.front() == '-';
+ };
+
+ while (args_it != args.end()) {
+ const auto img_start = std::find_if(args_it, end(args), is_image_arg);
+ if (img_start == end(args)) {
+ break;
+ }
+ const auto img_begin = std::next(img_start);
+ const auto img_end = std::find_if(img_begin, end(args), is_arg);
+ for (auto img = img_begin; img != img_end; ++img) {
+ readInputFilesArguments(files, *img);
+ }
+ args_it = img_end;
+ }
+
+ if (files.empty()) {
+ return;
+ }
+ size_t max_files = 20;
+ if (files.size() < max_files) {
+ slog::info << "Files were added: " << files.size() << slog::endl;
+ for (const auto& filePath : files) {
+ slog::info << " " << filePath << slog::endl;
+ }
+ } else {
+ slog::info << "Files were added: " << files.size() << ". Too many to display each of them." << slog::endl;
+ }
+}
+
+std::vector<std::string> splitStringList(const std::string& str, char delim) {
+ if (str.empty())
+ return {};
+
+ std::istringstream istr(str);
+
+ std::vector<std::string> result;
+ std::string elem;
+ while (std::getline(istr, elem, delim)) {
+ if (elem.empty()) {
+ continue;
+ }
+ result.emplace_back(std::move(elem));
+ }
+
+ return result;
+}
+
+std::map<std::string, std::string> parseArgMap(std::string argMap) {
+ argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end());
+
+ const auto pairs = splitStringList(argMap, ',');
+
+ std::map<std::string, std::string> parsedMap;
+ for (auto&& pair : pairs) {
+ const auto lastDelimPos = pair.find_last_of(':');
+ auto key = pair.substr(0, lastDelimPos);
+ auto value = pair.substr(lastDelimPos + 1);
+
+ if (lastDelimPos == std::string::npos || key.empty() || value.empty()) {
+ throw std::invalid_argument("Invalid key/value pair " + pair + ". Expected <layer_name>:<value>");
+ }
+
+ parsedMap[std::move(key)] = std::move(value);
+ }
+
+ return parsedMap;
+}
+
+using supported_type_t = std::unordered_map<std::string, ov::element::Type>;
+ov::element::Type getType(std::string value, const supported_type_t& supported_precisions) {
+ std::transform(value.begin(), value.end(), value.begin(), ::toupper);
+
+ const auto precision = supported_precisions.find(value);
+ if (precision == supported_precisions.end()) {
+ throw std::logic_error("\"" + value + "\"" + " is not a valid precision");
+ }
+
+ return precision->second;
+}
+ov::element::Type getType(const std::string& value) {
+ static const supported_type_t supported_types = {
+ {"FP32", ov::element::f32}, {"f32", ov::element::f32}, {"FP16", ov::element::f16},
+ {"f16", ov::element::f16}, {"BF16", ov::element::bf16}, {"bf16", ov::element::bf16},
+ {"U64", ov::element::u64}, {"u64", ov::element::u64}, {"I64", ov::element::i64},
+ {"i64", ov::element::i64}, {"U32", ov::element::u32}, {"u32", ov::element::u32},
+ {"I32", ov::element::i32}, {"i32", ov::element::i32}, {"U16", ov::element::u16},
+ {"u16", ov::element::u16}, {"I16", ov::element::i16}, {"i16", ov::element::i16},
+ {"U8", ov::element::u8}, {"u8", ov::element::u8}, {"I8", ov::element::i8},
+ {"i8", ov::element::i8}, {"BOOL", ov::element::boolean}, {"boolean", ov::element::boolean},
+ };
+
+ return getType(value, supported_types);
+}
+
+void printInputAndOutputsInfo(const ov::Model& network) {
+ slog::info << "model name: " << network.get_friendly_name() << slog::endl;
+
+ const std::vector<ov::Output<const ov::Node>> inputs = network.inputs();
+ for (const ov::Output<const ov::Node> &input : inputs) {
+ slog::info << " inputs" << slog::endl;
+
+ const std::string name = input.get_names().empty() ? "NONE" : input.get_any_name();
+ slog::info << " input name: " << name << slog::endl;
+
+ const ov::element::Type type = input.get_element_type();
+ slog::info << " input type: " << type << slog::endl;
+
+ const ov::Shape shape = input.get_shape();
+ slog::info << " input shape: " << shape << slog::endl;
+ }
+
+ const std::vector<ov::Output<const ov::Node>> outputs = network.outputs();
+ for (const ov::Output<const ov::Node> &output : outputs) {
+ slog::info << " outputs" << slog::endl;
+
+ const std::string name = output.get_names().empty() ? "NONE" : output.get_any_name();
+ slog::info << " output name: " << name << slog::endl;
+
+ const ov::element::Type type = output.get_element_type();
+ slog::info << " output type: " << type << slog::endl;
+
+ const ov::Shape shape = output.get_shape();
+ slog::info << " output shape: " << shape << slog::endl;
+ }
+}
+
+void configurePrePostProcessing(std::shared_ptr<ov::Model>& model,
+ const std::string& ip,
+ const std::string& op,
+ const std::string& iop,
+ const std::string& il,
+ const std::string& ol,
+ const std::string& iol,
+ const std::string& iml,
+ const std::string& oml,
+ const std::string& ioml) {
+ auto preprocessor = ov::preprocess::PrePostProcessor(model);
+ const auto inputs = model->inputs();
+ const auto outputs = model->outputs();
+ if (!ip.empty()) {
+ auto type = getType(ip);
+ for (size_t i = 0; i < inputs.size(); i++) {
+ preprocessor.input(i).tensor().set_element_type(type);
+ }
+ }
+
+ if (!op.empty()) {
+ auto type = getType(op);
+ for (size_t i = 0; i < outputs.size(); i++) {
+ preprocessor.output(i).tensor().set_element_type(type);
+ }
+ }
+
+ if (!iop.empty()) {
+ const auto user_precisions_map = parseArgMap(iop);
+ for (auto&& item : user_precisions_map) {
+ const auto& tensor_name = item.first;
+ const auto type = getType(item.second);
+
+ bool tensorFound = false;
+ for (size_t i = 0; i < inputs.size(); i++) {
+ if (inputs[i].get_names().count(tensor_name)) {
+ preprocessor.input(i).tensor().set_element_type(type);
+ tensorFound = true;
+ break;
+ }
+ }
+ if (!tensorFound) {
+ for (size_t i = 0; i < outputs.size(); i++) {
+ if (outputs[i].get_names().count(tensor_name)) {
+ preprocessor.output(i).tensor().set_element_type(type);
+ tensorFound = true;
+ break;
+ }
+ }
+ }
+ OPENVINO_ASSERT(!tensorFound, "Model doesn't have input/output with tensor name: ", tensor_name);
+ }
+ }
+ if (!il.empty()) {
+ for (size_t i = 0; i < inputs.size(); i++) {
+ preprocessor.input(i).tensor().set_layout(ov::Layout(il));
+ }
+ }
+
+ if (!ol.empty()) {
+ for (size_t i = 0; i < outputs.size(); i++) {
+ preprocessor.output(i).tensor().set_layout(ov::Layout(ol));
+ }
+ }
+
+ if (!iol.empty()) {
+ const auto user_precisions_map = parseArgMap(iol);
+ for (auto&& item : user_precisions_map) {
+ const auto& tensor_name = item.first;
+
+ bool tensorFound = false;
+ for (size_t i = 0; i < inputs.size(); i++) {
+ if (inputs[i].get_names().count(tensor_name)) {
+ preprocessor.input(i).tensor().set_layout(ov::Layout(item.second));
+ tensorFound = true;
+ break;
+ }
+ }
+ if (!tensorFound) {
+ for (size_t i = 0; i < outputs.size(); i++) {
+ if (outputs[i].get_names().count(tensor_name)) {
+ preprocessor.output(i).tensor().set_layout(ov::Layout(item.second));
+ tensorFound = true;
+ break;
+ }
+ }
+ }
+ OPENVINO_ASSERT(!tensorFound, "Model doesn't have input/output with tensor name: ", tensor_name);
+ }
+ }
+
+ if (!iml.empty()) {
+ for (size_t i = 0; i < inputs.size(); i++) {
+ preprocessor.input(i).model().set_layout(ov::Layout(iml));
+ }
+ }
+
+ if (!oml.empty()) {
+ for (size_t i = 0; i < outputs.size(); i++) {
+ preprocessor.output(i).model().set_layout(ov::Layout(oml));
+ }
+ }
+
+ if (!ioml.empty()) {
+ const auto user_precisions_map = parseArgMap(ioml);
+ for (auto&& item : user_precisions_map) {
+ const auto& tensor_name = item.first;
+
+ bool tensorFound = false;
+ for (size_t i = 0; i < inputs.size(); i++) {
+ if (inputs[i].get_names().count(tensor_name)) {
+ preprocessor.input(i).model().set_layout(ov::Layout(item.second));
+ tensorFound = true;
+ break;
+ }
+ }
+ if (!tensorFound) {
+ for (size_t i = 0; i < outputs.size(); i++) {
+ if (outputs[i].get_names().count(tensor_name)) {
+ preprocessor.output(i).model().set_layout(ov::Layout(item.second));
+ tensorFound = true;
+ break;
+ }
+ }
+ }
+ OPENVINO_ASSERT(!tensorFound, "Model doesn't have input/output with tensor name: ", tensor_name);
+ }
+ }
+
+ model = preprocessor.build();
+}
+
+ov::element::Type getPrecision(std::string value,
+ const std::unordered_map<std::string, ov::element::Type>& supported_precisions) {
+ std::transform(value.begin(), value.end(), value.begin(), ::toupper);
+
+ const auto precision = supported_precisions.find(value);
+ if (precision == supported_precisions.end()) {
+ throw std::logic_error("\"" + value + "\"" + " is not a valid precision");
+ }
+
+ return precision->second;
+}
+
+ov::element::Type getPrecision2(const std::string& value) {
+ static const std::unordered_map<std::string, ov::element::Type> supported_precisions = {
+ {"FP32", ov::element::f32},
+ {"FP16", ov::element::f16},
+ {"BF16", ov::element::bf16},
+ {"U64", ov::element::u64},
+ {"I64", ov::element::i64},
+ {"U32", ov::element::u32},
+ {"I32", ov::element::i32},
+ {"U16", ov::element::u16},
+ {"I16", ov::element::i16},
+ {"U8", ov::element::u8},
+ {"I8", ov::element::i8},
+ {"BOOL", ov::element::boolean},
+ };
+
+ return getPrecision(value, supported_precisions);
+}
diff --git a/python/openvino/runtime/common/utils/src/common.cpp b/python/openvino/runtime/common/utils/src/common.cpp
new file mode 100644
index 0000000..fb238c7
--- /dev/null
+++ b/python/openvino/runtime/common/utils/src/common.cpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "samples/common.hpp"
+
+std::map<std::string, std::string> parseConfig(const std::string& configName, char comment) {
+ std::map<std::string, std::string> config = {};
+
+ std::ifstream file(configName);
+ if (!file.is_open()) {
+ return config;
+ }
+
+ std::string key, value;
+ while (file >> key >> value) {
+ if (key.empty() || key[0] == comment) {
+ continue;
+ }
+ config[key] = value;
+ }
+
+ return config;
+}
diff --git a/python/openvino/runtime/common/utils/src/latency_metrics.cpp b/python/openvino/runtime/common/utils/src/latency_metrics.cpp
new file mode 100644
index 0000000..c6c3d15
--- /dev/null
+++ b/python/openvino/runtime/common/utils/src/latency_metrics.cpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// clang-format off
+#include <algorithm>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "samples/latency_metrics.hpp"
+// clang-format on
+
+void LatencyMetrics::write_to_stream(std::ostream& stream) const {
+ std::ios::fmtflags fmt(stream.flags());
+ stream << data_shape << ";" << std::fixed << std::setprecision(2) << median_or_percentile << ";" << avg << ";"
+ << min << ";" << max;
+ stream.flags(fmt);
+}
+
+void LatencyMetrics::write_to_slog() const {
+ std::string percentileStr = (percentile_boundary == 50)
+ ? " Median: "
+ : " " + std::to_string(percentile_boundary) + " percentile: ";
+
+ slog::info << percentileStr << double_to_string(median_or_percentile) << " ms" << slog::endl;
+ slog::info << " Average: " << double_to_string(avg) << " ms" << slog::endl;
+ slog::info << " Min: " << double_to_string(min) << " ms" << slog::endl;
+ slog::info << " Max: " << double_to_string(max) << " ms" << slog::endl;
+}
+
+void LatencyMetrics::fill_data(std::vector<double> latencies, size_t percentile_boundary) {
+ if (latencies.empty()) {
+ throw std::logic_error("Latency metrics class expects non-empty vector of latencies at consturction.");
+ }
+ std::sort(latencies.begin(), latencies.end());
+ min = latencies[0];
+ avg = std::accumulate(latencies.begin(), latencies.end(), 0.0) / latencies.size();
+ median_or_percentile = latencies[size_t(latencies.size() / 100.0 * percentile_boundary)];
+ max = latencies.back();
+};
diff --git a/python/openvino/runtime/common/utils/src/slog.cpp b/python/openvino/runtime/common/utils/src/slog.cpp
new file mode 100644
index 0000000..df484ec
--- /dev/null
+++ b/python/openvino/runtime/common/utils/src/slog.cpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// clang-format off
+#include <iostream>
+
+#include "samples/slog.hpp"
+// clang-format on
+
+namespace slog {
+
+LogStream info("INFO", std::cout);
+LogStream warn("WARNING", std::cout);
+LogStream err("ERROR", std::cerr);
+
+LogStream::LogStream(const std::string& prefix, std::ostream& log_stream) : _prefix(prefix), _new_line(true) {
+ _log_stream = &log_stream;
+}
+
+// Specializing for LogStreamEndLine to support slog::endl
+LogStream& LogStream::operator<<(const LogStreamEndLine& /*arg*/) {
+ if (_new_line)
+ (*_log_stream) << "[ " << _prefix << " ] ";
+ _new_line = true;
+
+ (*_log_stream) << std::endl;
+ return *this;
+}
+
+// Specializing for LogStreamBoolAlpha to support slog::boolalpha
+LogStream& LogStream::operator<<(const LogStreamBoolAlpha& /*arg*/) {
+ (*_log_stream) << std::boolalpha;
+ return *this;
+}
+
+// Specializing for LogStreamFlush to support slog::flush
+LogStream& LogStream::operator<<(const LogStreamFlush& /*arg*/) {
+ (*_log_stream) << std::flush;
+ return *this;
+}
+
+} // namespace slog