summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/common/models/src/super_resolution_model.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/runtime/common/models/src/super_resolution_model.cpp')
-rw-r--r--python/openvino/runtime/common/models/src/super_resolution_model.cpp207
1 files changed, 207 insertions, 0 deletions
diff --git a/python/openvino/runtime/common/models/src/super_resolution_model.cpp b/python/openvino/runtime/common/models/src/super_resolution_model.cpp
new file mode 100644
index 0000000..164991a
--- /dev/null
+++ b/python/openvino/runtime/common/models/src/super_resolution_model.cpp
@@ -0,0 +1,207 @@
+/*
+// Copyright (C) 2021-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "models/super_resolution_model.h"
+
+#include <stddef.h>
+
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/imgproc.hpp>
+#include <openvino/openvino.hpp>
+
+#include <utils/image_utils.h>
+#include <utils/ocv_common.hpp>
+#include <utils/slog.hpp>
+
+#include "models/input_data.h"
+#include "models/internal_model_data.h"
+#include "models/results.h"
+
+SuperResolutionModel::SuperResolutionModel(const std::string& modelFileName,
+ const cv::Size& inputImgSize,
+ const std::string& layout)
+ : ImageModel(modelFileName, false, layout) {
+ netInputHeight = inputImgSize.height;
+ netInputWidth = inputImgSize.width;
+}
+
+void SuperResolutionModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
+ // --------------------------- Configure input & output ---------------------------------------------
+ // --------------------------- Prepare input --------------------------------------------------
+ const ov::OutputVector& inputs = model->inputs();
+ if (inputs.size() != 1 && inputs.size() != 2) {
+ throw std::logic_error("Super resolution model wrapper supports topologies with 1 or 2 inputs only");
+ }
+ std::string lrInputTensorName = inputs.begin()->get_any_name();
+ inputsNames.push_back(lrInputTensorName);
+ ov::Shape lrShape = inputs.begin()->get_shape();
+ if (lrShape.size() != 4) {
+ throw std::logic_error("Number of dimensions for an input must be 4");
+ }
+ // in case of 2 inputs they have the same layouts
+ ov::Layout inputLayout = getInputLayout(model->inputs().front());
+
+ auto channelsId = ov::layout::channels_idx(inputLayout);
+ auto heightId = ov::layout::height_idx(inputLayout);
+ auto widthId = ov::layout::width_idx(inputLayout);
+
+ if (lrShape[channelsId] != 1 && lrShape[channelsId] != 3) {
+ throw std::logic_error("Input layer is expected to have 1 or 3 channels");
+ }
+
+ // A model like single-image-super-resolution-???? may take bicubic interpolation of the input image as the
+ // second input
+ std::string bicInputTensorName;
+ if (inputs.size() == 2) {
+ bicInputTensorName = (++inputs.begin())->get_any_name();
+ inputsNames.push_back(bicInputTensorName);
+ ov::Shape bicShape = (++inputs.begin())->get_shape();
+ if (bicShape.size() != 4) {
+ throw std::logic_error("Number of dimensions for both inputs must be 4");
+ }
+ if (lrShape[widthId] >= bicShape[widthId] && lrShape[heightId] >= bicShape[heightId]) {
+ std::swap(bicShape, lrShape);
+ inputsNames[0].swap(inputsNames[1]);
+ } else if (!(lrShape[widthId] <= bicShape[widthId] && lrShape[heightId] <= bicShape[heightId])) {
+ throw std::logic_error("Each spatial dimension of one input must surpass or be equal to a spatial"
+ "dimension of another input");
+ }
+ }
+
+ ov::preprocess::PrePostProcessor ppp(model);
+ for (const auto& input : inputs) {
+ ppp.input(input.get_any_name()).tensor().set_element_type(ov::element::u8).set_layout("NHWC");
+
+ ppp.input(input.get_any_name()).model().set_layout(inputLayout);
+ }
+
+ // --------------------------- Prepare output -----------------------------------------------------
+ const ov::OutputVector& outputs = model->outputs();
+ if (outputs.size() != 1) {
+ throw std::logic_error("Super resolution model wrapper supports topologies with only 1 output");
+ }
+
+ outputsNames.push_back(outputs.begin()->get_any_name());
+ ppp.output().tensor().set_element_type(ov::element::f32);
+ model = ppp.build();
+
+ const ov::Shape& outShape = model->output().get_shape();
+
+ const ov::Layout outputLayout("NCHW");
+ const auto outWidth = outShape[ov::layout::width_idx(outputLayout)];
+ const auto inWidth = lrShape[ov::layout::width_idx(outputLayout)];
+ changeInputSize(model, static_cast<int>(outWidth / inWidth));
+}
+
+void SuperResolutionModel::changeInputSize(std::shared_ptr<ov::Model>& model, int coeff) {
+ std::map<std::string, ov::PartialShape> shapes;
+ const ov::Layout& layout = ov::layout::get_layout(model->inputs().front());
+ const auto batchId = ov::layout::batch_idx(layout);
+ const auto heightId = ov::layout::height_idx(layout);
+ const auto widthId = ov::layout::width_idx(layout);
+
+ const ov::OutputVector& inputs = model->inputs();
+ std::string lrInputTensorName = inputs.begin()->get_any_name();
+ ov::Shape lrShape = inputs.begin()->get_shape();
+
+ if (inputs.size() == 2) {
+ std::string bicInputTensorName = (++inputs.begin())->get_any_name();
+ ov::Shape bicShape = (++inputs.begin())->get_shape();
+ if (lrShape[heightId] >= bicShape[heightId] && lrShape[widthId] >= bicShape[widthId]) {
+ std::swap(bicShape, lrShape);
+ std::swap(bicInputTensorName, lrInputTensorName);
+ }
+ bicShape[batchId] = 1;
+ bicShape[heightId] = coeff * netInputHeight;
+ bicShape[widthId] = coeff * netInputWidth;
+ shapes[bicInputTensorName] = ov::PartialShape(bicShape);
+ }
+
+ lrShape[batchId] = 1;
+ lrShape[heightId] = netInputHeight;
+ lrShape[widthId] = netInputWidth;
+ shapes[lrInputTensorName] = ov::PartialShape(lrShape);
+
+ model->reshape(shapes);
+}
+
+std::shared_ptr<InternalModelData> SuperResolutionModel::preprocess(const InputData& inputData,
+ ov::InferRequest& request) {
+ auto imgData = inputData.asRef<ImageInputData>();
+ auto& img = imgData.inputImage;
+
+ const ov::Tensor lrInputTensor = request.get_tensor(inputsNames[0]);
+ const ov::Layout layout("NHWC");
+
+ if (img.channels() != static_cast<int>(lrInputTensor.get_shape()[ov::layout::channels_idx(layout)])) {
+ cv::cvtColor(img, img, cv::COLOR_BGR2GRAY);
+ }
+
+ if (static_cast<size_t>(img.cols) != netInputWidth || static_cast<size_t>(img.rows) != netInputHeight) {
+ slog::warn << "\tChosen model aspect ratio doesn't match image aspect ratio" << slog::endl;
+ }
+ const size_t height = lrInputTensor.get_shape()[ov::layout::height_idx(layout)];
+ const size_t width = lrInputTensor.get_shape()[ov::layout::width_idx(layout)];
+ img = resizeImageExt(img, width, height);
+ request.set_tensor(inputsNames[0], wrapMat2Tensor(img));
+
+ if (inputsNames.size() == 2) {
+ const ov::Tensor bicInputTensor = request.get_tensor(inputsNames[1]);
+ const int h = static_cast<int>(bicInputTensor.get_shape()[ov::layout::height_idx(layout)]);
+ const int w = static_cast<int>(bicInputTensor.get_shape()[ov::layout::width_idx(layout)]);
+ cv::Mat resized;
+ cv::resize(img, resized, cv::Size(w, h), 0, 0, cv::INTER_CUBIC);
+ request.set_tensor(inputsNames[1], wrapMat2Tensor(resized));
+ }
+
+ return std::make_shared<InternalImageModelData>(img.cols, img.rows);
+}
+
+std::unique_ptr<ResultBase> SuperResolutionModel::postprocess(InferenceResult& infResult) {
+ ImageResult* result = new ImageResult;
+ *static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
+ const auto outputData = infResult.getFirstOutputTensor().data<float>();
+
+ std::vector<cv::Mat> imgPlanes;
+ const ov::Shape& outShape = infResult.getFirstOutputTensor().get_shape();
+ const size_t outChannels = static_cast<int>(outShape[1]);
+ const size_t outHeight = static_cast<int>(outShape[2]);
+ const size_t outWidth = static_cast<int>(outShape[3]);
+ const size_t numOfPixels = outWidth * outHeight;
+ if (outChannels == 3) {
+ imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
+ cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2]))};
+ } else {
+ imgPlanes = std::vector<cv::Mat>{cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
+ // Post-processing for text-image-super-resolution models
+ cv::threshold(imgPlanes[0], imgPlanes[0], 0.5f, 1.0f, cv::THRESH_BINARY);
+ }
+
+ for (auto& img : imgPlanes) {
+ img.convertTo(img, CV_8UC1, 255);
+ }
+ cv::Mat resultImg;
+ cv::merge(imgPlanes, resultImg);
+ result->resultImage = resultImg;
+
+ return std::unique_ptr<ResultBase>(result);
+}