summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/segmentation_demo/main.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/runtime/segmentation_demo/main.cpp')
-rw-r--r--python/openvino/runtime/segmentation_demo/main.cpp445
1 files changed, 445 insertions, 0 deletions
diff --git a/python/openvino/runtime/segmentation_demo/main.cpp b/python/openvino/runtime/segmentation_demo/main.cpp
new file mode 100644
index 0000000..1631e29
--- /dev/null
+++ b/python/openvino/runtime/segmentation_demo/main.cpp
@@ -0,0 +1,445 @@
+/*
+// Copyright (C) 2018-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <chrono>
+#include <exception>
+#include <initializer_list>
+#include <iomanip>
+#include <iostream>
+#include <memory>
+#include <random>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <gflags/gflags.h>
+#include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <models/input_data.h>
+#include <models/model_base.h>
+#include <models/results.h>
+#include <models/segmentation_model.h>
+#include <monitors/presenter.h>
+#include <pipelines/async_pipeline.h>
+#include <pipelines/metadata.h>
+#include <utils/common.hpp>
+#include <utils/config_factory.h>
+#include <utils/default_flags.hpp>
+#include <utils/images_capture.h>
+#include <utils/ocv_common.hpp>
+#include <utils/performance_metrics.hpp>
+#include <utils/slog.hpp>
+
+#include <sys/stat.h>
+
+DEFINE_INPUT_FLAGS
+DEFINE_OUTPUT_FLAGS
+
+static const char help_message[] = "Print a usage message.";
+static const char model_message[] = "Required. Path to an .xml file with a trained model.";
+static const char target_device_message[] =
+ "Optional. Specify the target device to infer on (the list of available devices is shown below). "
+ "Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. "
+ "The demo will look for a suitable plugin for a specified device.";
+static const char labels_message[] = "Optional. Path to a file with labels mapping.";
+static const char layout_message[] = "Optional. Specify inputs layouts."
+ " Ex. NCHW or input0:NCHW,input1:NC in case of more than one input.";
+static const char raw_output_message[] = "Optional. Output inference results as mask histogram.";
+static const char nireq_message[] = "Optional. Number of infer requests. If this option is omitted, number of infer "
+ "requests is determined automatically.";
+static const char input_resizable_message[] =
+ "Optional. Enables resizable input with support of ROI crop & auto resize.";
+static const char num_threads_message[] = "Optional. Number of threads.";
+static const char num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in "
+ "throughput mode (for HETERO and MULTI device cases use format "
+ "<device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>)";
+static const char no_show_message[] = "Optional. Don't show output.";
+static const char utilization_monitors_message[] = "Optional. List of monitors to show initially.";
+static const char output_resolution_message[] =
+ "Optional. Specify the maximum output window resolution "
+ "in (width x height) format. Example: 1280x720. Input frame size used by default.";
+static const char only_masks_message[] = "Optional. Display only masks. Could be switched by TAB key.";
+
+static const char plugins_message[] = "Optional. Select a custom plugins_xml file to use.";
+static const char arch_file_message[] = "Optional. Provide a path for the architecture .arch file.";
+
+DEFINE_bool(h, false, help_message);
+DEFINE_string(m, "", model_message);
+DEFINE_string(d, "CPU", target_device_message);
+DEFINE_string(labels, "", labels_message);
+DEFINE_string(layout, "", layout_message);
+DEFINE_bool(r, false, raw_output_message);
+DEFINE_int32(nireq, 0, nireq_message);
+DEFINE_bool(auto_resize, false, input_resizable_message);
+DEFINE_int32(nthreads, 0, num_threads_message);
+DEFINE_string(nstreams, "", num_streams_message);
+DEFINE_bool(no_show, false, no_show_message);
+DEFINE_string(u, "", utilization_monitors_message);
+DEFINE_string(output_resolution, "", output_resolution_message);
+DEFINE_bool(only_masks, false, only_masks_message);
+
+DEFINE_string(plugins, "", plugins_message);
+DEFINE_string(arch_file, "", arch_file_message);
+
+/**
+ * \brief This function shows a help message
+ */
+static void showUsage() {
+ std::cout << std::endl;
+ std::cout << "segmentation_demo [OPTION]" << std::endl;
+ std::cout << "Options:" << std::endl;
+ std::cout << std::endl;
+ std::cout << " -h " << help_message << std::endl;
+ std::cout << " -i " << input_message << std::endl;
+ std::cout << " -m \"<path>\" " << model_message << std::endl;
+ std::cout << " -o \"<path>\" " << output_message << std::endl;
+ std::cout << " -limit \"<num>\" " << limit_message << std::endl;
+ std::cout << " -d \"<device>\" " << target_device_message << std::endl;
+ std::cout << " -labels \"<path>\" " << labels_message << std::endl;
+ std::cout << " -layout \"<string>\" " << layout_message << std::endl;
+ std::cout << " -r " << raw_output_message << std::endl;
+ std::cout << " -nireq \"<integer>\" " << nireq_message << std::endl;
+ std::cout << " -auto_resize " << input_resizable_message << std::endl;
+ std::cout << " -nthreads \"<integer>\" " << num_threads_message << std::endl;
+ std::cout << " -nstreams " << num_streams_message << std::endl;
+ std::cout << " -loop " << loop_message << std::endl;
+ std::cout << " -no_show " << no_show_message << std::endl;
+ std::cout << " -output_resolution " << output_resolution_message << std::endl;
+ std::cout << " -u " << utilization_monitors_message << std::endl;
+ std::cout << " -only_masks " << only_masks_message << std::endl;
+}
+
+bool exists_test (const std::string& name) {
+ struct stat buffer;
+ return (stat(name.c_str(), &buffer) == 0);
+}
+
+bool ParseAndCheckCommandLine(int argc, char* argv[]) {
+ // ---------------------------Parsing and validation of input args--------------------------------------
+ gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
+ if (FLAGS_h) {
+ showUsage();
+ showAvailableDevices();
+ return false;
+ }
+
+ if (FLAGS_i.empty()) {
+ throw std::logic_error("Parameter -i is not set");
+ }
+
+ if (FLAGS_m.empty()) {
+ throw std::logic_error("Parameter -m is not set");
+ }
+
+ if(!FLAGS_plugins.empty()) {
+ std::cout << "Using custom plugins xml file - " << FLAGS_plugins << std::endl;
+ }
+
+ if (!exists_test(FLAGS_plugins)) {
+ std::cout << "Error: plugins_xml file: " << FLAGS_plugins << " doesn't exist. Please provide a valid path." << std::endl;
+ throw std::logic_error("plugins_xml file path does not exist.");
+ }
+
+ if (!FLAGS_output_resolution.empty() && FLAGS_output_resolution.find("x") == std::string::npos) {
+ throw std::logic_error("Correct format of -output_resolution parameter is \"width\"x\"height\".");
+ }
+ return true;
+}
+
+// TODO: replace with cv::applyColorMap() after OpenCV3 is dropped
+class ParallelLoopBodyLambda : public cv::ParallelLoopBody {
+ std::function<void(const cv::Range &)> f;
+public:
+ explicit ParallelLoopBodyLambda(std::function<void(const cv::Range &)> f): f{f} {}
+ void operator()(const cv::Range& range) const override {f(range);}
+};
+
+void applyColorMapOpenCV(const cv::Mat& src, cv::Mat& dstMat, const cv::Mat& _lut) {
+ const int lut_type = _lut.type();
+
+ cv::Mat srcGray;
+ if (src.channels() == 1)
+ srcGray = src;
+ else
+ cv::cvtColor(src, srcGray, cv::COLOR_BGR2GRAY);//BGR because of historical cv::LUT() usage
+
+ dstMat.create(src.size(), lut_type);
+
+ //we do not use cv::LUT() which requires src.channels() == dst.channels()
+ const int rows = srcGray.rows;
+ const int cols = srcGray.cols;
+ const int minimalPixelsPerPacket = 1<<12;
+ const int rowsPerPacket = std::max(1, minimalPixelsPerPacket/cols);
+ const int rowsPacketsCount = (rows+rowsPerPacket-1)/rowsPerPacket;
+ const cv::Range all(0, rows);
+
+ if (lut_type == CV_8UC1) {
+ typedef unsigned char lut_pixel_t;
+ const lut_pixel_t* srcLUT = _lut.ptr<lut_pixel_t>(0);
+ ParallelLoopBodyLambda body([&, cols](const cv::Range& range) -> void {
+ for(int row = range.start ; row<range.end ; ++row) {
+ const unsigned char* srcRow = srcGray.ptr<unsigned char>(row);
+ lut_pixel_t* dstRow = dstMat.ptr<lut_pixel_t>(row);
+ for(int col = 0 ; col<cols ; ++col)
+ *dstRow++ = srcLUT[*srcRow++];
+ }
+ });
+ cv::parallel_for_(all, body, rowsPacketsCount);
+ }
+ else if (lut_type == CV_8UC3) {
+ typedef cv::Vec3b lut_pixel_t;
+ const lut_pixel_t* srcLUT = _lut.ptr<lut_pixel_t>(0);
+ ParallelLoopBodyLambda body([&, cols](const cv::Range& range) -> void {
+ for(int row = range.start ; row<range.end ; ++row) {
+ const unsigned char* srcRow = srcGray.ptr<unsigned char>(row);
+ lut_pixel_t* dstRow = dstMat.ptr<lut_pixel_t>(row);
+ for(int col = 0 ; col<cols ; ++col)
+ *dstRow++ = srcLUT[*srcRow++];
+ }
+ });
+ cv::parallel_for_(all, body, rowsPacketsCount);
+ }
+}
+
+cv::Mat applyColorMap(cv::Mat input) {
+ // Initializing colors array if needed
+ static const Color PASCAL_VOC_COLORS[] = {
+ {0, 0, 0}, {128, 0, 0}, {0, 128, 0}, {128, 128, 0}, {0, 0, 128}, {128, 0, 128}, {0, 128, 128},
+ {128, 128, 128}, {64, 0, 0}, {192, 0, 0}, {64, 128, 0}, {192, 128, 0}, {64, 0, 128}, {192, 0, 128},
+ {64, 128, 128}, {192, 128, 128}, {0, 64, 0}, {128, 64, 0}, {0, 192, 0}, {128, 192, 0}, {0, 64, 128}};
+ static cv::Mat colors;
+ static std::mt19937 rng;
+ static std::uniform_int_distribution<int> distr(0, 255);
+
+ if (colors.empty()) {
+ colors = cv::Mat(256, 1, CV_8UC3);
+ std::size_t i = 0;
+ for (; i < arraySize(PASCAL_VOC_COLORS); ++i) {
+ colors.at<cv::Vec3b>(i, 0) = {PASCAL_VOC_COLORS[i].blue(),
+ PASCAL_VOC_COLORS[i].green(),
+ PASCAL_VOC_COLORS[i].red()};
+ }
+ for (; i < (std::size_t)colors.cols; ++i) {
+ colors.at<cv::Vec3b>(i, 0) = cv::Vec3b(distr(rng), distr(rng), distr(rng));
+ }
+ }
+
+ // Converting class to color
+ cv::Mat out;
+ applyColorMapOpenCV(input, out, colors);
+ return out;
+}
+
+cv::Mat renderSegmentationData(const ImageResult& result, OutputTransform& outputTransform, bool masks_only) {
+ if (!result.metaData) {
+ throw std::invalid_argument("Renderer: metadata is null");
+ }
+
+ // Input image is stored inside metadata, as we put it there during submission stage
+ auto inputImg = result.metaData->asRef<ImageMetaData>().img;
+
+ if (inputImg.empty()) {
+ throw std::invalid_argument("Renderer: image provided in metadata is empty");
+ }
+
+ // Visualizing result data over source image
+ cv::Mat output =
+ masks_only ? applyColorMap(result.resultImage) : inputImg / 2 + applyColorMap(result.resultImage) / 2;
+ outputTransform.resize(output);
+ return output;
+}
+
+void printRawResults(const ImageResult& result, std::vector<std::string> labels) {
+ slog::debug << " --------------- Frame # " << result.frameId << " ---------------" << slog::endl;
+ slog::debug << " Class ID | Pixels | Percentage " << slog::endl;
+
+ double min_val, max_val;
+ cv::minMaxLoc(result.resultImage, &min_val, &max_val);
+ int max_classes = static_cast<int>(max_val) + 1; // We use +1 for only background case
+ const float range[] = {0, static_cast<float>(max_classes)};
+ const float* ranges[] = {range};
+ cv::Mat histogram;
+ cv::calcHist(&result.resultImage, 1, 0, cv::Mat(), histogram, 1, &max_classes, ranges);
+
+ const double all = result.resultImage.cols * result.resultImage.rows;
+ for (int i = 0; i < max_classes; ++i) {
+ const int value = static_cast<int>(histogram.at<float>(i));
+ if (value > 0) {
+ std::string label = (size_t)i < labels.size() ? labels[i] : "#" + std::to_string(i);
+ slog::debug << " " << std::setw(16) << std::left << label << " | " << std::setw(6) << value << " | "
+ << std::setw(5) << std::setprecision(2) << std::fixed << std::right << value / all * 100 << "%"
+ << slog::endl;
+ }
+ }
+}
+
+int main(int argc, char* argv[]) {
+ try {
+ PerformanceMetrics metrics, renderMetrics;
+
+ // ------------------------------ Parsing and validation of input args ---------------------------------
+ if (!ParseAndCheckCommandLine(argc, argv)) {
+ return 0;
+ }
+
+ //------------------------------- Preparing Input ------------------------------------------------------
+ auto cap = openImagesCapture(FLAGS_i, FLAGS_loop, FLAGS_nireq == 1 ? read_type::efficient : read_type::safe);
+ cv::Mat curr_frame;
+
+ //------------------------------ Running Segmentation routines ----------------------------------------------
+ slog::info << ov::get_openvino_version() << slog::endl;
+
+ ov::Core core(FLAGS_plugins);
+ AsyncPipeline pipeline(
+ std::unique_ptr<SegmentationModel>(new SegmentationModel(FLAGS_m, FLAGS_auto_resize, FLAGS_layout)),
+ ConfigFactory::getUserConfig(FLAGS_d, FLAGS_nireq, FLAGS_nstreams, FLAGS_nthreads, FLAGS_arch_file),
+ core);
+ Presenter presenter(FLAGS_u);
+
+ std::vector<std::string> labels;
+ if (!FLAGS_labels.empty()) {
+ labels = SegmentationModel::loadLabels(FLAGS_labels);
+ }
+
+ bool keepRunning = true;
+ int64_t frameNum = -1;
+ std::unique_ptr<ResultBase> result;
+ uint32_t framesProcessed = 0;
+ LazyVideoWriter videoWriter{FLAGS_o, cap->fps(), static_cast<unsigned int>(FLAGS_limit)};
+
+ cv::Size outputResolution;
+ OutputTransform outputTransform = OutputTransform();
+ size_t found = FLAGS_output_resolution.find("x");
+
+ bool only_masks = FLAGS_only_masks;
+
+ while (keepRunning) {
+ if (pipeline.isReadyToProcess()) {
+ auto startTime = std::chrono::steady_clock::now();
+
+ //--- Capturing frame
+ curr_frame = cap->read();
+
+ if (curr_frame.empty()) {
+ // Input stream is over
+ break;
+ }
+
+ frameNum = pipeline.submitData(ImageInputData(curr_frame),
+ std::make_shared<ImageMetaData>(curr_frame, startTime));
+ }
+
+ if (frameNum == 0) {
+ if (found == std::string::npos) {
+ outputResolution = curr_frame.size();
+ } else {
+ outputResolution = cv::Size{
+ std::stoi(FLAGS_output_resolution.substr(0, found)),
+ std::stoi(FLAGS_output_resolution.substr(found + 1, FLAGS_output_resolution.length()))};
+ outputTransform = OutputTransform(curr_frame.size(), outputResolution);
+ outputResolution = outputTransform.computeResolution();
+ }
+ }
+
+ //--- Waiting for free input slot or output data available. Function will return immediately if any of them
+ // are available.
+ pipeline.waitForData();
+
+ //--- Checking for results and rendering data if it's ready
+ //--- If you need just plain data without rendering - cast result's underlying pointer to ImageResult*
+ // and use your own processing instead of calling renderSegmentationData().
+ while (keepRunning && (result = pipeline.getResult())) {
+ auto renderingStart = std::chrono::steady_clock::now();
+ cv::Mat outFrame = renderSegmentationData(result->asRef<ImageResult>(), outputTransform, only_masks);
+ //--- Showing results and device information
+ if (FLAGS_r) {
+ printRawResults(result->asRef<ImageResult>(), labels);
+ }
+ presenter.drawGraphs(outFrame);
+ renderMetrics.update(renderingStart);
+ metrics.update(result->metaData->asRef<ImageMetaData>().timeStamp,
+ outFrame,
+ {10, 22},
+ cv::FONT_HERSHEY_COMPLEX,
+ 0.65);
+ videoWriter.write(outFrame);
+ framesProcessed++;
+ if (!FLAGS_no_show) {
+ cv::imshow("Segmentation Results", outFrame);
+
+ //--- Processing keyboard events
+ auto key = cv::waitKey(1);
+ if (27 == key || 'q' == key || 'Q' == key) { // Esc
+ keepRunning = false;
+ } else if (9 == key) {
+ only_masks = !only_masks;
+ } else {
+ presenter.handleKey(key);
+ }
+ }
+ }
+ } // while(keepRunning)
+
+ // ------------ Waiting for completion of data processing and rendering the rest of results ---------
+ pipeline.waitForTotalCompletion();
+
+ for (; framesProcessed <= frameNum; framesProcessed++) {
+ result = pipeline.getResult();
+ if (result != nullptr) {
+ cv::Mat outFrame = renderSegmentationData(result->asRef<ImageResult>(), outputTransform, only_masks);
+ //--- Showing results and device information
+ if (FLAGS_r) {
+ printRawResults(result->asRef<ImageResult>(), labels);
+ }
+ presenter.drawGraphs(outFrame);
+ metrics.update(result->metaData->asRef<ImageMetaData>().timeStamp,
+ outFrame,
+ {10, 22},
+ cv::FONT_HERSHEY_COMPLEX,
+ 0.65);
+ videoWriter.write(outFrame);
+ if (!FLAGS_no_show) {
+ cv::imshow("Segmentation Results", outFrame);
+ //--- Updating output window
+ cv::waitKey(1);
+ }
+ }
+ }
+
+ slog::info << "Metrics report:" << slog::endl;
+ metrics.logTotal();
+ logLatencyPerStage(cap->getMetrics().getTotal().latency,
+ pipeline.getPreprocessMetrics().getTotal().latency,
+ pipeline.getInferenceMetircs().getTotal().latency,
+ pipeline.getPostprocessMetrics().getTotal().latency,
+ renderMetrics.getTotal().latency);
+ slog::info << presenter.reportMeans() << slog::endl;
+ } catch (const std::exception& error) {
+ slog::err << error.what() << slog::endl;
+ return 1;
+ } catch (...) {
+ slog::err << "Unknown/internal exception happened." << slog::endl;
+ return 1;
+ }
+
+ return 0;
+}