completed thesisHEAD master

author: Eric Dao <eric@erickhangdao.com> 2025-03-10 17:54:31 -0400
committer: Eric Dao <eric@erickhangdao.com> 2025-03-10 17:54:31 -0400
commit: ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch)
tree: a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/runtime/object_detection_demo
parent: 40da1752f2c8639186b72f6838aa415e854d0b1d (diff)
download: thesis-master.tar.gz
thesis-master.tar.bz2
thesis-master.zip
5 files changed, 739 insertions, 0 deletions
diff --git a/python/openvino/runtime/object_detection_demo/CMakeLists.txt b/python/openvino/runtime/object_detection_demo/CMakeLists.txt
new file mode 100644
index 0000000..f88d1d7
--- /dev/null
+++ b/python/openvino/runtime/object_detection_demo/CMakeLists.txt
@@ -0,0 +1,60 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set (CMAKE_CXX_STANDARD 11)
+set (CMAKE_CXX_STANDARD_REQUIRED ON)
+if (NOT("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel"))
+        set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
+endif()
+
+set (TARGET_NAME "object_detection_demo")
+
+file (GLOB MAIN_SRC
+    ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
+)
+
+file (GLOB MAIN_HEADERS
+    # Add headers if any
+)
+
+source_group("src" FILES ${MAIN_SRC})
+source_group("include" FILES ${MAIN_HEADERS})
+
+# Find OpenCV components if exist
+find_package(OpenCV COMPONENTS core highgui imgcodecs imgproc videoio REQUIRED)
+
+# Create library file from sources.
+add_executable(${TARGET_NAME} ${MAIN_SRC} ${MAIN_HEADERS})
+
+target_include_directories(${TARGET_NAME} PRIVATE
+    "$ENV{COREDLA_ROOT}/runtime/common/demo_utils/include/utils"
+    "$ENV{COREDLA_ROOT}/runtime/common/monitors/include"
+)
+
+if (NOT WIN32)
+    set (LIB_DL dl)
+endif()
+
+target_link_libraries(${TARGET_NAME}
+    openvino::runtime
+    openvino_dev_api
+    ie_samples_utils
+    ${OpenCV_LIBRARIES}
+    models
+    monitors
+    pipelines
+    utils
+    coreDLAHeteroPlugin
+)
+
+if(NOT WIN32)
+    target_link_libraries(${TARGET_NAME} ${LIB_DL} pthread)
+endif()
+
+set_target_properties(${TARGET_NAME} PROPERTIES INSTALL_RPATH "\$ORIGIN/../lib")
+
+# For libcoreDlaRuntimePlugin.so - typically specified by $COREDLA_ROOT/runtime/plugins.xml
+set_target_properties(${TARGET_NAME} PROPERTIES BUILD_RPATH "\$ORIGIN/..")
+
+install(TARGETS ${TARGET_NAME} RUNTIME DESTINATION "dla/bin" COMPONENT DEMO)
+install(TARGETS ${TARGET_NAME} RUNTIME DESTINATION "dla/not_shipped/bin" COMPONENT NOT_SHIPPED)
diff --git a/python/openvino/runtime/object_detection_demo/CMakeLists.txt.orig b/python/openvino/runtime/object_detection_demo/CMakeLists.txt.orig
new file mode 100755
index 0000000..5fba764
--- /dev/null
+++ b/python/openvino/runtime/object_detection_demo/CMakeLists.txt.orig
@@ -0,0 +1,11 @@
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+file(GLOB SRC_FILES ./*.cpp)
+file(GLOB H_FILES ./*.h)
+
+add_demo(NAME object_detection_demo
+    SOURCES ${SRC_FILES}
+    HEADERS ${H_FILES}
+    DEPENDENCIES monitors models pipelines)
diff --git a/python/openvino/runtime/object_detection_demo/README.md b/python/openvino/runtime/object_detection_demo/README.md
new file mode 100644
index 0000000..7e7a90d
--- /dev/null
+++ b/python/openvino/runtime/object_detection_demo/README.md
@@ -0,0 +1,15 @@
+# Object Detection YOLO* V3 C++ Demo, Async API Performance Showcase
+
+### Running with CoreDLA
+In addition to the options described below, include the arguments:
+
+-  `-plugins=<path the plugins.xml>`, using the path to [plugins.xml](../plugins.xml)
+- `-d HETERO:FPGA,CPU`
+- `-arch_file <path to arch file>`, using the path to the architecture used when creating the FPGA bitstream
+
+Use the -build_demo option to the runtime/build_runtime.sh script to build the demos.
+
+See the documentation that is included with the example design.
+
+For detailed information on the OpenVINO C++ Object Detection Demo, please see the [README](https://github.com/openvinotoolkit/open_model_zoo/blob/2023.3.0/demos/object_detection_demo/cpp/README.md) in the OpenVINO repository. Make sure to match the git tag with your installed version of OpenVINO for compatibility.
+
diff --git a/python/openvino/runtime/object_detection_demo/main.cpp b/python/openvino/runtime/object_detection_demo/main.cpp
new file mode 100644
index 0000000..f787504
--- /dev/null
+++ b/python/openvino/runtime/object_detection_demo/main.cpp
@@ -0,0 +1,598 @@
+/*
+// Copyright (C) 2018-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <chrono>
+#include <cmath>
+#include <cstdint>
+#include <exception>
+#include <iomanip>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <random>
+#include <stdexcept>
+#include <string>
+#include <typeinfo>
+#include <utility>
+#include <vector>
+
+#include <gflags/gflags.h>
+#include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+#include <sys/stat.h>
+
+#include <models/detection_model.h>
+#include <models/detection_model_centernet.h>
+#include <models/detection_model_faceboxes.h>
+#include <models/detection_model_retinaface.h>
+#include <models/detection_model_retinaface_pt.h>
+#include <models/detection_model_ssd.h>
+#include <models/detection_model_yolo.h>
+#include <models/detection_model_yolov3_onnx.h>
+#include <models/detection_model_yolox.h>
+#include <models/input_data.h>
+#include <models/model_base.h>
+#include <models/results.h>
+#include <monitors/presenter.h>
+#include <pipelines/async_pipeline.h>
+#include <pipelines/metadata.h>
+#include <utils/args_helper.hpp>
+#include <utils/common.hpp>
+#include <utils/config_factory.h>
+#include <utils/default_flags.hpp>
+#include <utils/images_capture.h>
+#include <utils/ocv_common.hpp>
+#include <utils/performance_metrics.hpp>
+#include <utils/slog.hpp>
+
+DEFINE_INPUT_FLAGS
+DEFINE_OUTPUT_FLAGS
+
+static const char help_message[] = "Print a usage message.";
+static const char at_message[] =
+    "Required. Architecture type: centernet, faceboxes, retinaface, retinaface-pytorch, ssd, yolo, yolov3-onnx or yolox";
+static const char model_message[] = "Required. Path to an .xml file with a trained model.";
+static const char target_device_message[] =
+    "Optional. Specify the target device to infer on (the list of available devices is shown below). "
+    "Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. "
+    "The demo will look for a suitable plugin for a specified device.";
+static const char labels_message[] = "Optional. Path to a file with labels mapping.";
+static const char layout_message[] = "Optional. Specify inputs layouts."
+                                     " Ex. NCHW or input0:NCHW,input1:NC in case of more than one input.";
+static const char thresh_output_message[] = "Optional. Probability threshold for detections.";
+static const char raw_output_message[] = "Optional. Inference results as raw values.";
+static const char input_resizable_message[] =
+    "Optional. Enables resizable input with support of ROI crop & auto resize.";
+static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer "
+                                    "requests is determined automatically.";
+static const char num_threads_message[] = "Optional. Number of threads.";
+static const char num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in "
+                                          "throughput mode (for HETERO and MULTI device cases use format "
+                                          "<device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>)";
+static const char no_show_message[] = "Optional. Don't show output.";
+static const char utilization_monitors_message[] = "Optional. List of monitors to show initially.";
+static const char iou_thresh_output_message[] =
+    "Optional. Filtering intersection over union threshold for overlapping boxes.";
+static const char yolo_af_message[] = "Optional. Use advanced postprocessing/filtering algorithm for YOLO.";
+static const char output_resolution_message[] =
+    "Optional. Specify the maximum output window resolution "
+    "in (width x height) format. Example: 1280x720.";
+static const char input_resolution_message[] =
+    "Optional. Specify the maximum input video capturing resolution "
+    "in (width x height) format. Example: 640x640. The input frame size used by default is 1280x720.";
+static const char anchors_message[] = "Optional. A comma separated list of anchors. "
+                                      "By default used default anchors for model. Only for YOLOV4 architecture type.";
+static const char masks_message[] = "Optional. A comma separated list of mask for anchors. "
+                                    "By default used default masks for model. Only for YOLOV4 architecture type.";
+static const char reverse_input_channels_message[] = "Optional. Switch the input channels order from BGR to RGB.";
+static const char mean_values_message[] =
+    "Optional. Normalize input by subtracting the mean values per channel. Example: \"255.0 255.0 255.0\"";
+static const char scale_values_message[] = "Optional. Divide input by scale values per channel. Division is applied "
+                                           "after mean values subtraction. Example: \"255.0 255.0 255.0\"";
+
+// @brief message for performance counters option
+static const char plugins_message[] = "Optional. Select a custom plugins_xml file to use.";
+// @brief message for architecture .arch file
+static const char arch_file_message[] = "Optional. Provide a path for the architecture .arch file.";
+
+DEFINE_bool(h, false, help_message);
+DEFINE_string(at, "", at_message);
+DEFINE_string(m, "", model_message);
+DEFINE_string(d, "CPU", target_device_message);
+DEFINE_string(labels, "", labels_message);
+DEFINE_string(layout, "", layout_message);
+DEFINE_bool(r, false, raw_output_message);
+DEFINE_double(t, 0.5, thresh_output_message);
+DEFINE_double(iou_t, 0.5, iou_thresh_output_message);
+DEFINE_bool(auto_resize, false, input_resizable_message);
+DEFINE_int32(nireq, 0, nireq_message);
+DEFINE_int32(nthreads, 0, num_threads_message);
+DEFINE_string(nstreams, "", num_streams_message);
+DEFINE_bool(no_show, false, no_show_message);
+DEFINE_string(u, "", utilization_monitors_message);
+DEFINE_bool(yolo_af, true, yolo_af_message);
+DEFINE_string(input_resolution, "", input_resolution_message);
+DEFINE_string(output_resolution, "", output_resolution_message);
+DEFINE_string(anchors, "", anchors_message);
+DEFINE_string(masks, "", masks_message);
+DEFINE_bool(reverse_input_channels, false, reverse_input_channels_message);
+DEFINE_string(mean_values, "", mean_values_message);
+DEFINE_string(scale_values, "", scale_values_message);
+
+/// @brief Path to a plugins_xml file
+DEFINE_string(plugins, "", plugins_message);
+/// @brief Path to arch file
+DEFINE_string(arch_file, "", arch_file_message);
+
+/**
+ * \brief This function shows a help message
+ */
+static void showUsage() {
+    std::cout << std::endl;
+    std::cout << "object_detection_demo [OPTION]" << std::endl;
+    std::cout << "Options:" << std::endl;
+    std::cout << std::endl;
+    std::cout << "    -h                        " << help_message << std::endl;
+    std::cout << "    -at \"<type>\"              " << at_message << std::endl;
+    std::cout << "    -i                        " << input_message << std::endl;
+    std::cout << "    -m \"<path>\"               " << model_message << std::endl;
+    std::cout << "    -o \"<path>\"               " << output_message << std::endl;
+    std::cout << "    -limit \"<num>\"            " << limit_message << std::endl;
+    std::cout << "    -d \"<device>\"             " << target_device_message << std::endl;
+    std::cout << "    -labels \"<path>\"          " << labels_message << std::endl;
+    std::cout << "    -layout \"<string>\"        " << layout_message << std::endl;
+    std::cout << "    -r                        " << raw_output_message << std::endl;
+    std::cout << "    -t                        " << thresh_output_message << std::endl;
+    std::cout << "    -iou_t                    " << iou_thresh_output_message << std::endl;
+    std::cout << "    -auto_resize              " << input_resizable_message << std::endl;
+    std::cout << "    -nireq \"<integer>\"        " << nireq_message << std::endl;
+    std::cout << "    -nthreads \"<integer>\"     " << num_threads_message << std::endl;
+    std::cout << "    -nstreams                 " << num_streams_message << std::endl;
+    std::cout << "    -loop                     " << loop_message << std::endl;
+    std::cout << "    -no_show                  " << no_show_message << std::endl;
+    std::cout << "    -input_resolution         " << input_resolution_message << std::endl;
+    std::cout << "    -output_resolution        " << output_resolution_message << std::endl;
+    std::cout << "    -u                        " << utilization_monitors_message << std::endl;
+    std::cout << "    -yolo_af                  " << yolo_af_message << std::endl;
+    std::cout << "    -anchors                  " << anchors_message << std::endl;
+    std::cout << "    -masks                    " << masks_message << std::endl;
+    std::cout << "    -reverse_input_channels   " << reverse_input_channels_message << std::endl;
+    std::cout << "    -mean_values              " << mean_values_message << std::endl;
+    std::cout << "    -scale_values             " << scale_values_message << std::endl;
+}
+
+class ColorPalette {
+private:
+    std::vector<cv::Scalar> palette;
+
+    static double getRandom(double a = 0.0, double b = 1.0) {
+        static std::default_random_engine e;
+        std::uniform_real_distribution<> dis(a, std::nextafter(b, std::numeric_limits<double>::max()));
+        return dis(e);
+    }
+
+    static double distance(const cv::Scalar& c1, const cv::Scalar& c2) {
+        auto dh = std::fmin(std::fabs(c1[0] - c2[0]), 1 - fabs(c1[0] - c2[0])) * 2;
+        auto ds = std::fabs(c1[1] - c2[1]);
+        auto dv = std::fabs(c1[2] - c2[2]);
+
+        return dh * dh + ds * ds + dv * dv;
+    }
+
+    static cv::Scalar maxMinDistance(const std::vector<cv::Scalar>& colorSet,
+                                     const std::vector<cv::Scalar>& colorCandidates) {
+        std::vector<double> distances;
+        distances.reserve(colorCandidates.size());
+        for (auto& c1 : colorCandidates) {
+            auto min =
+                *std::min_element(colorSet.begin(), colorSet.end(), [&c1](const cv::Scalar& a, const cv::Scalar& b) {
+                    return distance(c1, a) < distance(c1, b);
+                });
+            distances.push_back(distance(c1, min));
+        }
+        auto max = std::max_element(distances.begin(), distances.end());
+        return colorCandidates[std::distance(distances.begin(), max)];
+    }
+
+    static cv::Scalar hsv2rgb(const cv::Scalar& hsvColor) {
+        cv::Mat rgb;
+        cv::Mat hsv(1, 1, CV_8UC3, hsvColor);
+        cv::cvtColor(hsv, rgb, cv::COLOR_HSV2RGB);
+        return cv::Scalar(rgb.data[0], rgb.data[1], rgb.data[2]);
+    }
+
+public:
+    explicit ColorPalette(size_t n) {
+        palette.reserve(n);
+        std::vector<cv::Scalar> hsvColors(1, {1., 1., 1.});
+        std::vector<cv::Scalar> colorCandidates;
+        size_t numCandidates = 100;
+
+        hsvColors.reserve(n);
+        colorCandidates.resize(numCandidates);
+        for (size_t i = 1; i < n; ++i) {
+            std::generate(colorCandidates.begin(), colorCandidates.end(), []() {
+                return cv::Scalar{getRandom(), getRandom(0.8, 1.0), getRandom(0.5, 1.0)};
+            });
+            hsvColors.push_back(maxMinDistance(hsvColors, colorCandidates));
+        }
+
+        for (auto& hsv : hsvColors) {
+            // Convert to OpenCV HSV format
+            hsv[0] *= 179;
+            hsv[1] *= 255;
+            hsv[2] *= 255;
+
+            palette.push_back(hsv2rgb(hsv));
+        }
+    }
+
+    const cv::Scalar& operator[](size_t index) const {
+        return palette[index % palette.size()];
+    }
+};
+
+bool exists_test (const std::string& name) {
+  struct stat buffer;
+  return (stat (name.c_str(), &buffer) == 0);
+}
+
+bool is_valid_resolution(const std::string& resolution) {
+  bool valid = true;
+  size_t pos = FLAGS_input_resolution.find("x");
+  if (pos == std::string::npos) {
+    valid = false;
+  } else {
+    try {
+        int width = std::stoi(FLAGS_input_resolution.substr(0, pos));
+        int height = std::stoi(FLAGS_input_resolution.substr(pos + 1, FLAGS_input_resolution.length()));
+        if (width <= 0 || height <= 0) {
+            valid = false;
+        }
+    } catch (...) {
+        valid = false;
+    }
+  }
+  return valid;
+}
+
+bool ParseAndCheckCommandLine(int argc, char* argv[]) {
+    // ---------------------------Parsing and validation of input args--------------------------------------
+    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
+    if (FLAGS_h) {
+        showUsage();
+       // showAvailableDevices();
+        return false;
+    }
+
+    if (FLAGS_i.empty()) {
+        throw std::logic_error("Parameter -i is not set");
+    }
+
+    if (FLAGS_m.empty()) {
+        throw std::logic_error("Parameter -m is not set");
+    }
+
+    if (FLAGS_at.empty()) {
+        throw std::logic_error("Parameter -at is not set");
+    }
+
+    if (!FLAGS_input_resolution.empty() && !is_valid_resolution(FLAGS_input_resolution)) {
+        throw std::logic_error("Correct format of -input_resolution parameter is \"width\"x\"height\".");
+    }
+
+    if (!FLAGS_output_resolution.empty() && !is_valid_resolution(FLAGS_input_resolution)) {
+        throw std::logic_error("Correct format of -output_resolution parameter is \"width\"x\"height\".");
+    }
+
+    if(!FLAGS_plugins.empty()) {
+        std::cout << "Using custom plugins xml file - " << FLAGS_plugins << std::endl;
+    }
+
+    if (!exists_test(FLAGS_plugins)) {
+        std::cout << "Error: plugins_xml file: " << FLAGS_plugins << " doesn't exist. Please provide a valid path." << std::endl;
+        throw std::logic_error("plugins_xml file path does not exist.");
+    }
+    return true;
+}
+
+// Input image is stored inside metadata, as we put it there during submission stage
+cv::Mat renderDetectionData(DetectionResult& result, const ColorPalette& palette, OutputTransform& outputTransform) {
+    if (!result.metaData) {
+        throw std::invalid_argument("Renderer: metadata is null");
+    }
+
+    auto outputImg = result.metaData->asRef<ImageMetaData>().img;
+
+    if (outputImg.empty()) {
+        throw std::invalid_argument("Renderer: image provided in metadata is empty");
+    }
+    outputTransform.resize(outputImg);
+    // Visualizing result data over source image
+    if (FLAGS_r) {
+        slog::debug << " -------------------- Frame # " << result.frameId << "--------------------" << slog::endl;
+        slog::debug << " Class ID  | Confidence | XMIN | YMIN | XMAX | YMAX " << slog::endl;
+    }
+
+    for (auto& obj : result.objects) {
+        if (FLAGS_r) {
+            slog::debug << " " << std::left << std::setw(9) << obj.label << " | " << std::setw(10) << obj.confidence
+                        << " | " << std::setw(4) << int(obj.x) << " | " << std::setw(4) << int(obj.y) << " | "
+                        << std::setw(4) << int(obj.x + obj.width) << " | " << std::setw(4) << int(obj.y + obj.height)
+                        << slog::endl;
+        }
+        outputTransform.scaleRect(obj);
+        std::ostringstream conf;
+        conf << ":" << std::fixed << std::setprecision(1) << obj.confidence * 100 << '%';
+        const auto& color = palette[obj.labelID];
+        putHighlightedText(outputImg,
+                           obj.label + conf.str(),
+                           cv::Point2f(obj.x, obj.y - 5),
+                           cv::FONT_HERSHEY_COMPLEX_SMALL,
+                           1,
+                           color,
+                           2);
+        cv::rectangle(outputImg, obj, color, 2);
+    }
+
+    try {
+        for (auto& lmark : result.asRef<RetinaFaceDetectionResult>().landmarks) {
+            outputTransform.scaleCoord(lmark);
+            cv::circle(outputImg, lmark, 2, cv::Scalar(0, 255, 255), -1);
+        }
+    } catch (const std::bad_cast&) {}
+
+    return outputImg;
+}
+
+int main(int argc, char* argv[]) {
+    try {
+        PerformanceMetrics metrics;
+
+        // ------------------------------ Parsing and validation of input args ---------------------------------
+        if (!ParseAndCheckCommandLine(argc, argv)) {
+            return 0;
+        }
+
+        const auto& strAnchors = split(FLAGS_anchors, ',');
+        const auto& strMasks = split(FLAGS_masks, ',');
+
+        std::vector<float> anchors;
+        std::vector<int64_t> masks;
+        try {
+            for (auto& str : strAnchors) {
+                anchors.push_back(std::stof(str));
+            }
+        } catch (...) { throw std::runtime_error("Invalid anchors list is provided."); }
+
+        try {
+            for (auto& str : strMasks) {
+                masks.push_back(std::stoll(str));
+            }
+        } catch (...) { throw std::runtime_error("Invalid masks list is provided."); }
+
+        //------------------------------- Preparing Input ------------------------------------------------------
+        cv::Size inputResolution;
+        if (!FLAGS_input_resolution.empty()) {
+            size_t pos = FLAGS_input_resolution.find("x");
+            inputResolution = cv::Size{
+                    std::stoi(FLAGS_input_resolution.substr(0, pos)),
+                    std::stoi(FLAGS_input_resolution.substr(pos + 1, FLAGS_input_resolution.length()))};
+            slog::info << "Using custom input resolution of " << FLAGS_input_resolution << slog::endl;
+        } else {
+            inputResolution = cv::Size{1280, 720};
+            slog::info << "Using default input resolution of 1280x720." << slog::endl;
+        }
+
+        auto cap = openImagesCapture(FLAGS_i,
+                                     FLAGS_loop,
+                                     FLAGS_nireq == 1 ? read_type::efficient : read_type::safe,
+                                     0,
+                                     std::numeric_limits<size_t>::max(),
+                                     inputResolution);
+
+        cv::Mat curr_frame;
+
+        //------------------------------ Running Detection routines ----------------------------------------------
+        std::vector<std::string> labels;
+        if (!FLAGS_labels.empty())
+            labels = DetectionModel::loadLabels(FLAGS_labels);
+        ColorPalette palette(labels.size() > 0 ? labels.size() : 100);
+
+        std::unique_ptr<ModelBase> model;
+        if (FLAGS_at == "centernet") {
+            model.reset(new ModelCenterNet(FLAGS_m, static_cast<float>(FLAGS_t), labels, FLAGS_layout));
+        } else if (FLAGS_at == "faceboxes") {
+            model.reset(new ModelFaceBoxes(FLAGS_m,
+                                           static_cast<float>(FLAGS_t),
+                                           FLAGS_auto_resize,
+                                           static_cast<float>(FLAGS_iou_t),
+                                           FLAGS_layout));
+        } else if (FLAGS_at == "retinaface") {
+            model.reset(new ModelRetinaFace(FLAGS_m,
+                                            static_cast<float>(FLAGS_t),
+                                            FLAGS_auto_resize,
+                                            static_cast<float>(FLAGS_iou_t),
+                                            FLAGS_layout));
+        } else if (FLAGS_at == "retinaface-pytorch") {
+            model.reset(new ModelRetinaFacePT(FLAGS_m,
+                                              static_cast<float>(FLAGS_t),
+                                              FLAGS_auto_resize,
+                                              static_cast<float>(FLAGS_iou_t),
+                                              FLAGS_layout));
+        } else if (FLAGS_at == "ssd") {
+            model.reset(new ModelSSD(FLAGS_m, static_cast<float>(FLAGS_t), FLAGS_auto_resize, labels, FLAGS_layout));
+        } else if (FLAGS_at == "yolo") {
+            model.reset(new ModelYolo(FLAGS_m,
+                                      static_cast<float>(FLAGS_t),
+                                      FLAGS_auto_resize,
+                                      FLAGS_yolo_af,
+                                      static_cast<float>(FLAGS_iou_t),
+                                      labels,
+                                      anchors,
+                                      masks,
+                                      FLAGS_layout));
+        } else if (FLAGS_at == "yolov3-onnx") {
+            model.reset(new ModelYoloV3ONNX(FLAGS_m,
+                                            static_cast<float>(FLAGS_t),
+                                            labels,
+                                            FLAGS_layout));
+        } else if (FLAGS_at == "yolox") {
+            model.reset(new ModelYoloX(FLAGS_m,
+                                       static_cast<float>(FLAGS_t),
+                                       static_cast<float>(FLAGS_iou_t),
+                                       labels,
+                                       FLAGS_layout));
+        } else {
+            slog::err << "No model type or invalid model type (-at) provided: " + FLAGS_at << slog::endl;
+            return -1;
+        }
+        model->setInputsPreprocessing(FLAGS_reverse_input_channels, FLAGS_mean_values, FLAGS_scale_values);
+        slog::info << ov::get_openvino_version() << slog::endl;
+
+        ov::Core core(FLAGS_plugins);
+
+        AsyncPipeline pipeline(std::move(model),
+                               ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads, FLAGS_arch_file),
+                               core);
+        Presenter presenter(FLAGS_u);
+
+        bool keepRunning = true;
+        int64_t frameNum = -1;
+        std::unique_ptr<ResultBase> result;
+        uint32_t framesProcessed = 0;
+
+        LazyVideoWriter videoWriter{FLAGS_o, cap->fps(), static_cast<unsigned int>(FLAGS_limit)};
+
+        PerformanceMetrics renderMetrics;
+
+        cv::Size outputResolution;
+        OutputTransform outputTransform = OutputTransform();
+        size_t found = FLAGS_output_resolution.find("x");
+
+        while (keepRunning) {
+            if (pipeline.isReadyToProcess()) {
+                auto startTime = std::chrono::steady_clock::now();
+
+                //--- Capturing frame
+                curr_frame = cap->read();
+
+                if (curr_frame.empty()) {
+                    // Input stream is over
+                    break;
+                }
+
+                frameNum = pipeline.submitData(ImageInputData(curr_frame),
+                                               std::make_shared<ImageMetaData>(curr_frame, startTime));
+            }
+
+            if (frameNum == 0) {
+                if (found == std::string::npos) {
+                    outputResolution = curr_frame.size();
+                } else {
+                    outputResolution = cv::Size{
+                        std::stoi(FLAGS_output_resolution.substr(0, found)),
+                        std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length()))};
+                    outputTransform = OutputTransform(curr_frame.size(), outputResolution);
+                    outputResolution = outputTransform.computeResolution();
+                }
+            }
+
+            //--- Waiting for free input slot or output data available. Function will return immediately if any of them
+            // are available.
+            pipeline.waitForData();
+
+            //--- Checking for results and rendering data if it's ready
+            //--- If you need just plain data without rendering - cast result's underlying pointer to DetectionResult*
+            //    and use your own processing instead of calling renderDetectionData().
+            while (keepRunning && (result = pipeline.getResult())) {
+                auto renderingStart = std::chrono::steady_clock::now();
+                cv::Mat outFrame = renderDetectionData(result->asRef<DetectionResult>(), palette, outputTransform);
+
+                //--- Showing results and device information
+                presenter.drawGraphs(outFrame);
+                renderMetrics.update(renderingStart);
+                metrics.update(result->metaData->asRef<ImageMetaData>().timeStamp,
+                               outFrame,
+                               {10, 22},
+                               cv::FONT_HERSHEY_COMPLEX,
+                               0.65);
+
+                videoWriter.write(outFrame);
+                framesProcessed++;
+
+                if (!FLAGS_no_show) {
+                    cv::imshow("Detection Results", outFrame);
+                    //--- Processing keyboard events
+                    int key = cv::waitKey(1);
+                    if (27 == key || 'q' == key || 'Q' == key) {  // Esc
+                        keepRunning = false;
+                    } else {
+                        presenter.handleKey(key);
+                    }
+                }
+            }
+        }  // while(keepRunning)
+
+        // ------------ Waiting for completion of data processing and rendering the rest of results ---------
+        pipeline.waitForTotalCompletion();
+
+        for (; framesProcessed <= frameNum; framesProcessed++) {
+            result = pipeline.getResult();
+            if (result != nullptr) {
+                auto renderingStart = std::chrono::steady_clock::now();
+                cv::Mat outFrame = renderDetectionData(result->asRef<DetectionResult>(), palette, outputTransform);
+                //--- Showing results and device information
+                presenter.drawGraphs(outFrame);
+                renderMetrics.update(renderingStart);
+                metrics.update(result->metaData->asRef<ImageMetaData>().timeStamp,
+                               outFrame,
+                               {10, 22},
+                               cv::FONT_HERSHEY_COMPLEX,
+                               0.65);
+                videoWriter.write(outFrame);
+                if (!FLAGS_no_show) {
+                    cv::imshow("Detection Results", outFrame);
+                    //--- Updating output window
+                    cv::waitKey(1);
+                }
+            }
+        }
+
+        slog::info << "Metrics report:" << slog::endl;
+        metrics.logTotal();
+        logLatencyPerStage(cap->getMetrics().getTotal().latency,
+                           pipeline.getPreprocessMetrics().getTotal().latency,
+                           pipeline.getInferenceMetircs().getTotal().latency,
+                           pipeline.getPostprocessMetrics().getTotal().latency,
+                           renderMetrics.getTotal().latency);
+        slog::info << presenter.reportMeans() << slog::endl;
+    } catch (const std::exception& error) {
+        slog::err << error.what() << slog::endl;
+        return 1;
+    } catch (...) {
+        slog::err << "Unknown/internal exception happened." << slog::endl;
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/python/openvino/runtime/object_detection_demo/models.lst b/python/openvino/runtime/object_detection_demo/models.lst
new file mode 100644
index 0000000..4b2e8a8
--- /dev/null
+++ b/python/openvino/runtime/object_detection_demo/models.lst
@@ -0,0 +1,55 @@
+# This file can be used with the --list option of the model downloader.
+# For -at centernet
+ctdet_coco_dlav0_512
+# For -at faceboxes
+faceboxes-pytorch
+# For -at retinaface-pytorch
+retinaface-resnet50-pytorch
+# For -at ssd
+efficientdet-d0-tf
+efficientdet-d1-tf
+face-detection-????
+face-detection-adas-????
+face-detection-retail-????
+faster-rcnn-resnet101-coco-sparse-60-0001
+faster_rcnn_inception_resnet_v2_atrous_coco
+faster_rcnn_resnet50_coco
+pedestrian-and-vehicle-detector-adas-????
+pedestrian-detection-adas-????
+pelee-coco
+person-detection-????
+person-detection-retail-0013
+person-vehicle-bike-detection-????
+product-detection-0001
+rfcn-resnet101-coco-tf
+retinanet-tf
+ssd300
+ssd512
+ssd-resnet34-1200-onnx
+ssd_mobilenet_v1_coco
+ssd_mobilenet_v1_fpn_coco
+ssdlite_mobilenet_v2
+vehicle-detection-????
+vehicle-detection-adas-????
+vehicle-license-plate-detection-barrier-????
+# For -at yolo
+mobilenet-yolo-v4-syg
+person-vehicle-bike-detection-crossroad-yolov3-1020
+yolo-v1-tiny-tf
+yolo-v2-ava-0001
+yolo-v2-ava-sparse-??-0001
+yolo-v2-tiny-ava-0001
+yolo-v2-tiny-ava-sparse-??-0001
+yolo-v2-tiny-vehicle-detection-0001
+yolo-v2-tf
+yolo-v2-tiny-tf
+yolo-v3-tf
+yolo-v3-tiny-tf
+yolo-v4-tf
+yolo-v4-tiny-tf
+yolof
+# For -at yolov3-onnx
+yolo-v3-onnx
+yolo-v3-tiny-onnx
+# For -at yolox
+yolox-tiny
author	Eric Dao <eric@erickhangdao.com>	2025-03-10 17:54:31 -0400
committer	Eric Dao <eric@erickhangdao.com>	2025-03-10 17:54:31 -0400
commit	ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch)
tree	a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/runtime/object_detection_demo
parent	40da1752f2c8639186b72f6838aa415e854d0b1d (diff)
download	thesis-master.tar.gz thesis-master.tar.bz2 thesis-master.zip