/* // Copyright (C) 2020-2022 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. */ #include "models/detection_model_ssd.h" #include #include #include #include #include #include #include #include #include #include "models/internal_model_data.h" #include "models/results.h" struct InputData; ModelSSD::ModelSSD(const std::string& modelFileName, float confidenceThreshold, bool useAutoResize, const std::vector& labels, const std::string& layout) : DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels, layout) {} std::shared_ptr ModelSSD::preprocess(const InputData& inputData, ov::InferRequest& request) { if (inputsNames.size() > 1) { const auto& imageInfoTensor = request.get_tensor(inputsNames[1]); const auto info = imageInfoTensor.data(); info[0] = static_cast(netInputHeight); info[1] = static_cast(netInputWidth); info[2] = 1; request.set_tensor(inputsNames[1], imageInfoTensor); } return DetectionModel::preprocess(inputData, request); } std::unique_ptr ModelSSD::postprocess(InferenceResult& infResult) { return outputsNames.size() > 1 ? postprocessMultipleOutputs(infResult) : postprocessSingleOutput(infResult); } std::unique_ptr ModelSSD::postprocessSingleOutput(InferenceResult& infResult) { const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor(); size_t detectionsNum = detectionsTensor.get_shape()[detectionsNumId]; const float* detections = detectionsTensor.data(); DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); auto retVal = std::unique_ptr(result); const auto& internalData = infResult.internalModelData->asRef(); for (size_t i = 0; i < detectionsNum; i++) { float image_id = detections[i * objectSize + 0]; if (image_id < 0) { break; } float confidence = detections[i * objectSize + 2]; /** Filtering out objects with confidence < confidence_threshold probability **/ if (confidence > confidenceThreshold) { DetectedObject desc; desc.confidence = confidence; desc.labelID = static_cast(detections[i * objectSize + 1]); desc.label = getLabelName(desc.labelID); desc.x = clamp(detections[i * objectSize + 3] * internalData.inputImgWidth, 0.f, static_cast(internalData.inputImgWidth)); desc.y = clamp(detections[i * objectSize + 4] * internalData.inputImgHeight, 0.f, static_cast(internalData.inputImgHeight)); desc.width = clamp(detections[i * objectSize + 5] * internalData.inputImgWidth, 0.f, static_cast(internalData.inputImgWidth)) - desc.x; desc.height = clamp(detections[i * objectSize + 6] * internalData.inputImgHeight, 0.f, static_cast(internalData.inputImgHeight)) - desc.y; result->objects.push_back(desc); } } return retVal; } std::unique_ptr ModelSSD::postprocessMultipleOutputs(InferenceResult& infResult) { const float* boxes = infResult.outputsData[outputsNames[0]].data(); size_t detectionsNum = infResult.outputsData[outputsNames[0]].get_shape()[detectionsNumId]; const float* labels = infResult.outputsData[outputsNames[1]].data(); const float* scores = outputsNames.size() > 2 ? infResult.outputsData[outputsNames[2]].data() : nullptr; DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); auto retVal = std::unique_ptr(result); const auto& internalData = infResult.internalModelData->asRef(); // In models with scores are stored in separate output, coordinates are normalized to [0,1] // In other multiple-outputs models coordinates are normalized to [0,netInputWidth] and [0,netInputHeight] float widthScale = static_cast(internalData.inputImgWidth) / (scores ? 1 : netInputWidth); float heightScale = static_cast(internalData.inputImgHeight) / (scores ? 1 : netInputHeight); for (size_t i = 0; i < detectionsNum; i++) { float confidence = scores ? scores[i] : boxes[i * objectSize + 4]; /** Filtering out objects with confidence < confidence_threshold probability **/ if (confidence > confidenceThreshold) { DetectedObject desc; desc.confidence = confidence; desc.labelID = static_cast(labels[i]); desc.label = getLabelName(desc.labelID); desc.x = clamp(boxes[i * objectSize] * widthScale, 0.f, static_cast(internalData.inputImgWidth)); desc.y = clamp(boxes[i * objectSize + 1] * heightScale, 0.f, static_cast(internalData.inputImgHeight)); desc.width = clamp(boxes[i * objectSize + 2] * widthScale, 0.f, static_cast(internalData.inputImgWidth)) - desc.x; desc.height = clamp(boxes[i * objectSize + 3] * heightScale, 0.f, static_cast(internalData.inputImgHeight)) - desc.y; result->objects.push_back(desc); } } return retVal; } void ModelSSD::prepareInputsOutputs(std::shared_ptr& model) { // --------------------------- Configure input & output ------------------------------------------------- // --------------------------- Prepare input ------------------------------------------------------ ov::preprocess::PrePostProcessor ppp(model); for (const auto& input : model->inputs()) { auto inputTensorName = input.get_any_name(); const ov::Shape& shape = input.get_shape(); ov::Layout inputLayout = getInputLayout(input); if (shape.size() == 4) { // 1st input contains images if (inputsNames.empty()) { inputsNames.push_back(inputTensorName); } else { inputsNames[0] = inputTensorName; } inputTransform.setPrecision(ppp, inputTensorName); ppp.input(inputTensorName).tensor().set_layout({"NHWC"}); if (useAutoResize) { ppp.input(inputTensorName).tensor().set_spatial_dynamic_shape(); ppp.input(inputTensorName) .preprocess() .convert_element_type(ov::element::f32) .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); } ppp.input(inputTensorName).model().set_layout(inputLayout); netInputWidth = shape[ov::layout::width_idx(inputLayout)]; netInputHeight = shape[ov::layout::height_idx(inputLayout)]; } else if (shape.size() == 2) { // 2nd input contains image info inputsNames.resize(2); inputsNames[1] = inputTensorName; ppp.input(inputTensorName).tensor().set_element_type(ov::element::f32); } else { throw std::logic_error("Unsupported " + std::to_string(input.get_shape().size()) + "D " "input layer '" + input.get_any_name() + "'. " "Only 2D and 4D input layers are supported"); } } model = ppp.build(); // --------------------------- Prepare output ----------------------------------------------------- if (model->outputs().size() == 1) { prepareSingleOutput(model); } else { prepareMultipleOutputs(model); } } void ModelSSD::prepareSingleOutput(std::shared_ptr& model) { const auto& output = model->output(); outputsNames.push_back(output.get_any_name()); const ov::Shape& shape = output.get_shape(); const ov::Layout& layout("NCHW"); if (shape.size() != 4) { throw std::logic_error("SSD single output must have 4 dimensions, but had " + std::to_string(shape.size())); } detectionsNumId = ov::layout::height_idx(layout); objectSize = shape[ov::layout::width_idx(layout)]; if (objectSize != 7) { throw std::logic_error("SSD single output must have 7 as a last dimension, but had " + std::to_string(objectSize)); } ov::preprocess::PrePostProcessor ppp(model); ppp.output().tensor().set_element_type(ov::element::f32).set_layout(layout); model = ppp.build(); } void ModelSSD::prepareMultipleOutputs(std::shared_ptr& model) { const ov::OutputVector& outputs = model->outputs(); for (auto& output : outputs) { const auto& tensorNames = output.get_names(); for (const auto& name : tensorNames) { if (name.find("boxes") != std::string::npos) { outputsNames.push_back(name); break; } else if (name.find("labels") != std::string::npos) { outputsNames.push_back(name); break; } else if (name.find("scores") != std::string::npos) { outputsNames.push_back(name); break; } } } if (outputsNames.size() != 2 && outputsNames.size() != 3) { throw std::logic_error("SSD model wrapper must have 2 or 3 outputs, but had " + std::to_string(outputsNames.size())); } std::sort(outputsNames.begin(), outputsNames.end()); ov::preprocess::PrePostProcessor ppp(model); const auto& boxesShape = model->output(outputsNames[0]).get_partial_shape().get_max_shape(); ov::Layout boxesLayout; if (boxesShape.size() == 2) { boxesLayout = "NC"; detectionsNumId = ov::layout::batch_idx(boxesLayout); objectSize = boxesShape[ov::layout::channels_idx(boxesLayout)]; if (objectSize != 5) { throw std::logic_error("Incorrect 'boxes' output shape, [n][5] shape is required"); } } else if (boxesShape.size() == 3) { boxesLayout = "CHW"; detectionsNumId = ov::layout::height_idx(boxesLayout); objectSize = boxesShape[ov::layout::width_idx(boxesLayout)]; if (objectSize != 4) { throw std::logic_error("Incorrect 'boxes' output shape, [b][n][4] shape is required"); } } else { throw std::logic_error("Incorrect number of 'boxes' output dimensions, expected 2 or 3, but had " + std::to_string(boxesShape.size())); } ppp.output(outputsNames[0]).tensor().set_layout(boxesLayout); for (const auto& outName : outputsNames) { ppp.output(outName).tensor().set_element_type(ov::element::f32); } model = ppp.build(); }