diff options
Diffstat (limited to 'python/openvino/runtime/dla_benchmark/inputs_filling.cpp')
| -rw-r--r-- | python/openvino/runtime/dla_benchmark/inputs_filling.cpp | 885 |
1 files changed, 885 insertions, 0 deletions
diff --git a/python/openvino/runtime/dla_benchmark/inputs_filling.cpp b/python/openvino/runtime/dla_benchmark/inputs_filling.cpp new file mode 100644 index 0000000..0d20a14 --- /dev/null +++ b/python/openvino/runtime/dla_benchmark/inputs_filling.cpp @@ -0,0 +1,885 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +// Description: This file implements all supported formats of filling input tensors with input data. +// Functions in this file has been based off/modified from OpenVINO's input filling algorithms, +// which would be a good place to start for future OpenVINO uplifts. +// Ref: [openvinotoolkit/openvino › samples/cpp/benchmark_app/input_fillings.cpp] + +#include "inputs_filling.hpp" + +#include <algorithm> +#include <cstdlib> +#include <memory> +#include <functional> +#include <limits> +#include <tuple> +#include <string> +#include <utility> +#include <vector> + +#include <opencv2/videoio.hpp> +#include <samples/ocv_common.hpp> +#include <samples/slog.hpp> +#include "format_reader_ptr.h" +#include "shared_tensor_allocator.hpp" +#include "utils.hpp" + +/** + * @brief Struct to store info of an image read by the FormatReader::Reader class +*/ +struct ReaderInfo { + std::shared_ptr<uint8_t> data; // Image data + const size_t file_index; // Index of the image in the file_paths vector + const size_t channels; // Number of channels used by the reader to store the image + + ReaderInfo(std::shared_ptr<uint8_t>& data, size_t file_index, size_t channels) + : data(data), file_index(file_index), channels(channels) {} +}; + +// Since the reader always expands the image being read into an rgb image. +// The only way to tell that an image is in fact an rgb and not a grayscale +// image, it to find if the values in channel 0 differ from channel 1 or 2. +// Return true if this is a grayscale image or an rgb image than can safely +// be considered a grayscale image since all channel values are the same. +static bool IsGrayScaleImage(const ReaderInfo& reader_info, uint32_t image_size) { + const auto num_channels = reader_info.channels; + const auto& image_data = reader_info.data; + // Iterate through the image surface + for (size_t pid = 0; pid < image_size; pid++) { + // Iterate through the image channels + for (size_t ch = 1; ch < num_channels; ++ch) { + if (image_data.get()[pid * num_channels + ch] != image_data.get()[pid * num_channels]) return false; + } + } + return true; +} + +template <typename T> +using uniformDistribution = typename std::conditional< + std::is_floating_point<T>::value, + std::uniform_real_distribution<T>, + typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type; + +/** + * @brief Fills a tensor with image data from input files + * + * Helper function to GetStaticTensors(), not used outside this file. + * Determines which image to use based of of input_id, batch_size, input_size, and request_id. + * Reads that data as uint8 and creates an input tensor of type T corresponding to input element type. + * + * @param files vector of file paths to the input images + * @param input_id image input id, ie image 1, image 2... + * @param batch_size batch size of the tensor + * @param input_size number of images to be used + * @param request_id infer request id + * @param input_info InputInfo struct corresponding to the input node of the tensor + * @param input_name name of the input + * @param bgr boolean indicating if input channels need to be reversed + * @param verbose prints extra logging information if true + * @return ov::Tensor containing the input data extracted from the image +*/ +template <typename T> +ov::Tensor CreateTensorFromImage(const std::vector<std::string>& files, + const size_t input_id, + const size_t batch_size, + const size_t input_size, + const size_t request_id, + const dla_benchmark::InputInfo& input_info, + const std::string& input_name, + const FormatReader::Reader::ResizeType resize_type, + const bool bgr = false, + const bool verbose = false) { + size_t tensor_size = + std::accumulate(input_info.data_shape.begin(), input_info.data_shape.end(), 1, std::multiplies<size_t>()); + auto allocator = std::make_shared<SharedTensorAllocator>(tensor_size * sizeof(T)); + auto data = reinterpret_cast<T*>(allocator->get_buffer()); + /** Collect images data ptrs **/ + std::vector<ReaderInfo> vreader; + vreader.reserve(batch_size); + + size_t img_batch_size = 1; + if (!input_info.layout.empty() && ov::layout::has_batch(input_info.layout)) { + img_batch_size = batch_size; + } else { + slog::warn << input_name << ": layout does not contain batch dimension. Assuming batch 1 for this input" + << slog::endl; + } + + for (size_t i = 0, input_idx = request_id * batch_size * input_size + input_id; i < img_batch_size; i++, input_idx += input_size) { + input_idx %= files.size(); + FormatReader::ReaderPtr reader(files[input_idx].c_str()); + if (input_idx <= MAX_COUT_WITHOUT_VERBOSE || verbose) { + slog::info << "Prepare image " << files[input_idx] << slog::endl; + if (!verbose && input_idx == MAX_COUT_WITHOUT_VERBOSE) { + slog::info << "Truncating list of input files. Run with --verbose for complete list." << slog::endl; + } + } + if (reader.get() == nullptr) { + slog::warn << "Image " << files[input_idx] << " cannot be read!" << slog::endl << slog::endl; + continue; + } + + /** Getting image data **/ + std::shared_ptr<uint8_t> image_data(reader->getData(input_info.GetWidth(), input_info.GetHeight(), resize_type)); + if (image_data) { + // Store the number of channels used in storing the image in the reader + // If the image is grayscale, the reader would will still store it as a three + // channel image and therefore to read the image correctly we need to read the + // first channel value and then skip the next two. + const auto reader_channels = reader->size() / (reader->width() * reader->height()); + vreader.emplace_back(image_data, input_idx, reader_channels); + } + } + + /** Fill input tensor with image. First b channel, then g and r channels **/ + const size_t num_channels = input_info.GetChannels(); + const size_t width = input_info.GetWidth(); + const size_t height = input_info.GetHeight(); + const size_t batch = input_info.GetBatch(); + + const size_t image_size = width * height; // Calculate the image size + + // Lambda expression for calculating the pixel index in inputBlobData + const auto get_index = [=](size_t image_id, size_t pid, size_t ch) { + // Reverse the channel index if bgr is set to true + return image_id * image_size * num_channels + (bgr ? ch : (num_channels - ch - 1)) * image_size + pid; + }; + + // Lambda expression for calculating the channel (if bgr) + const auto get_channel = [=](size_t ch) { + return bgr ? ch : (num_channels - ch - 1); + }; + + /** Iterate over all input images **/ + for (size_t image_id = 0; image_id < vreader.size(); ++image_id) { + const auto& reader_info = vreader.at(image_id); + // Error out of the graph has a single channel input and the image is not grayscale + if (num_channels == 1 && !IsGrayScaleImage(reader_info, image_size)) { + THROW_IE_EXCEPTION + << "Graph input is grayscale (has a single channel) and the following image is in RGB format:\n\t" + << files.at(reader_info.file_index); + } + const auto reader_channels = reader_info.channels; + /** Iterate over all pixel in image (b,g,r) **/ + for (size_t pid = 0; pid < image_size; pid++) { + /** Iterate over all channels **/ + for (size_t ch = 0; ch < num_channels; ++ch) { + // check if scale values are 0 + if (input_info.scale_values[get_channel(ch)] == 0) { + throw ov::Exception("Cannot apply scale value of 0"); + } + // Reader is created with the assumption that the number of channels is always the maximum + data[get_index(image_id, pid, ch)] = static_cast<T>( + (reader_info.data.get()[pid * reader_channels + ch] - input_info.mean_values[get_channel(ch)]) / + input_info.scale_values[get_channel(ch)]); + } + } + } + + auto tensor = ov::Tensor(input_info.type, {batch, num_channels, height, width}, ov::Allocator(allocator)); + return tensor; +} + +/** + * @brief Fills a tensor with video data from input files + * + * Helper function to GetStaticTensors(), not used outside this file. + * Determines which image to use based of of input_id, batch_size, input_size, and request_id. + * Reads that and creates an input tensor of type T corresponding to input element type. + * + * @param file_paths vector of file paths to the input images + * @param input_id binary input id, ie video 1, video 2... + * @param batch_size batch size of the tensor + * @param input_size number of images to be used + * @param request_id infer request id + * @param input_info InputInfo struct corresponding to the input node of the tensor + * @param input_name name of the input + * @param bgr boolean indicating if input channels need to be reversed + * @param verbose prints extra logging information if true + * @return ov::Tensor containing the input data extracted from the video +*/ +template <typename T> +ov::Tensor CreateTensorFromVideo(const std::vector<std::string>& file_paths, + const size_t input_id, + const size_t batch_size, + const size_t input_size, + const size_t request_id, + const dla_benchmark::InputInfo& input_info, + const std::string& input_name, + const bool bgr = false, + const bool verbose = false) { + size_t tensor_size = + std::accumulate(input_info.data_shape.begin(), input_info.data_shape.end(), 1, std::multiplies<size_t>()); + auto allocator = std::make_shared<SharedTensorAllocator>(tensor_size * sizeof(T)); + auto data = reinterpret_cast<T*>(allocator->get_buffer()); + + const size_t input_idx = (request_id * input_size + input_id) % file_paths.size(); + + const size_t channels = input_info.GetChannels(); + const size_t height = input_info.GetHeight(); + const size_t width = input_info.GetWidth(); + const size_t frame_count = input_info.GetDepth(); + const size_t batch = input_info.GetBatch(); + + std::vector<cv::Mat> frames_to_write; + frames_to_write.reserve(batch_size * frame_count); + if (verbose) slog::info << "Prepare Video " << file_paths[input_idx] << slog::endl; + + // Open Video + cv::VideoCapture cap(file_paths[input_idx]); + if (!cap.isOpened()) { + throw std::runtime_error("Video file " + file_paths[input_idx] + " cannot be read!"); + } + + // Get amount of frames in video and calculate a step to partition the video into clips + size_t video_frames = 0; + size_t step; + size_t cur_video_pos = 0; + cv::Mat calc_frame; + + // Using while loop instead of cv::get() since cv::get() isn't guaranteed to return + // the correct amount of frames + while ((cap.read(calc_frame))) { + if (calc_frame.empty()) { + break; + } + video_frames++; + } + + // Reopen the file at the starting position + cap.release(); + cap.open(file_paths[input_idx].c_str()); + if (!cap.isOpened()) { + throw std::runtime_error("Video file " + file_paths[input_idx] + " cannot be read!"); + } + + if (verbose) { + slog::info << "Video file " << file_paths[input_idx] << " contains " << video_frames << " readable frames." + << slog::endl; + } + + // Calculate step to partition video into "batch_size" amount of clips + if (batch_size == 1) { + step = frame_count; + } else if (video_frames < frame_count) { + step = 1; + } else { + step = std::max((size_t)1, (video_frames - frame_count) / (batch_size - 1)); + } + + // Get frames + for (size_t clip_start = 0; clip_start < batch_size * step; clip_start += step) { + // Attempt to set position using OpenCV + Video Codec + bool success = cap.set(cv::CAP_PROP_POS_FRAMES, clip_start); + + // Unsupported by codec, set manually + if (!success) { + if (cur_video_pos < clip_start) { + while (cur_video_pos != clip_start) { + cap.read(calc_frame); + cur_video_pos++; + } + } else if (cur_video_pos > clip_start) { + // Reopen the file at the starting position + cap.release(); + cap.open(file_paths[input_idx].c_str()); + if (!cap.isOpened()) { + throw std::runtime_error("Video file " + file_paths[input_idx] + " cannot be read!"); + } + cur_video_pos = 0; + while (cur_video_pos != clip_start) { + cap.read(calc_frame); + cur_video_pos++; + } + } + } + + for (size_t curr_frame = 0; curr_frame < frame_count; curr_frame++) { + cv::Mat frame; + cap.read(frame); + + // Frame is empty -> Clip is shorter than frame_count, loop from start of clip + if (frame.empty()) { + if (verbose) + slog::info << "A video clip was shorter than the desired frame count, looping video." << slog::endl; + bool success = cap.set(cv::CAP_PROP_POS_FRAMES, clip_start); + + // If unsupported by codec, set manually + if (!success) { + // Reopen the file at the starting position + cap.release(); + cap.open(file_paths[input_idx].c_str()); + if (!cap.isOpened()) { + throw std::runtime_error("Video file " + file_paths[input_idx] + " cannot be read!"); + } + cur_video_pos = 0; + while (cur_video_pos != clip_start) { + cap.read(calc_frame); + cur_video_pos++; + } + } else { + cur_video_pos = clip_start; + } + + cap.read(frame); + + // If it's still empty, then there's an error with reading + if (frame.empty()) { + slog::err << "Video file " << file_paths[input_idx] << " frames cannot be read!" << slog::endl << slog::endl; + continue; + } + } + + cur_video_pos++; + // If bgr=false, convert to RGB + if (!bgr) { + cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB); + } + + // Check frame sizing, resize if it doesn't match expected blob size + cv::Mat resized_frame(frame); + if (static_cast<int>(width) != frame.size().width || static_cast<int>(height) != frame.size().height) { + // Resizes to 256 and centre crops based on actual needed dimensions, may add a flag for this in the future + // to be cleaner + if (static_cast<int>(width) < 256 && static_cast<int>(height) < 256) { + double scale; + if (frame.size().width <= frame.size().height) + scale = double(256) / frame.size().width; + else + scale = double(256) / frame.size().height; + cv::resize(frame, resized_frame, cv::Size(0, 0), scale, scale); + const int offsetW = (resized_frame.size().width - static_cast<int>(width)) / 2; + const int offsetH = (resized_frame.size().height - static_cast<int>(height)) / 2; + const cv::Rect roi(offsetW, offsetH, static_cast<int>(width), static_cast<int>(height)); + resized_frame = resized_frame(roi).clone(); + } else { + cv::resize(frame, resized_frame, cv::Size(width, height)); + } + } + // Save frame to write + frames_to_write.emplace_back(resized_frame); + } + } + + // Write frames to blob + for (size_t b = 0; b < batch_size; b++) { + size_t batch_offset = b * channels * frame_count * height * width; + for (size_t c = 0; c < channels; c++) { + size_t channel_offset = c * frame_count * height * width; + for (size_t frameId = b * frame_count; frameId < (b + 1) * frame_count; frameId++) { + const cv::Mat& frame_to_write = frames_to_write.at(frameId); + size_t frame_offset_id = frameId % frame_count; + size_t frame_offset = frame_offset_id * height * width; + for (size_t h = 0; h < height; h++) { + for (size_t w = 0; w < width; w++) { + data[batch_offset + channel_offset + frame_offset + h * width + w] = frame_to_write.at<cv::Vec3b>(h, w)[c]; + } + } + } + } + } + cap.release(); + return ov::Tensor(input_info.type, {batch, channels, frame_count, height, width}, ov::Allocator(allocator)); +} + +/** + * @brief Fills a tensor with image info data + * + * Helper function to GetStaticTensors(), not used outside this file. + * + * @param image_size Size of image width x height + * @param batch_size batch size of the tensor + * @param input_info InputInfo struct corresponding to the input node of the tensor + * @param input_name name of the input + * @return ov::Tensor containing the input data +*/ +template <typename T> +ov::Tensor CreateTensorImInfo(const std::pair<size_t, size_t>& image_size, + size_t batch_size, + const dla_benchmark::InputInfo& input_info, + const std::string& input_name) { + size_t tensor_size = + std::accumulate(input_info.data_shape.begin(), input_info.data_shape.end(), 1, std::multiplies<size_t>()); + auto allocator = std::make_shared<SharedTensorAllocator>(tensor_size * sizeof(T)); + auto data = reinterpret_cast<T*>(allocator->get_buffer()); + + size_t info_batch_size = 1; + if (!input_info.layout.empty() && ov::layout::has_batch(input_info.layout)) { + info_batch_size = batch_size; + } else { + slog::warn << input_name << ": layout is not set or does not contain batch dimension. Assuming batch 1. " + << slog::endl; + } + + for (size_t b = 0; b < info_batch_size; b++) { + size_t im_info_size = tensor_size / info_batch_size; + for (size_t i = 0; i < im_info_size; i++) { + size_t index = b * im_info_size + i; + if (0 == i) + data[index] = static_cast<T>(image_size.first); + else if (1 == i) + data[index] = static_cast<T>(image_size.second); + else + data[index] = 1; + } + } + + auto tensor = ov::Tensor(input_info.type, input_info.data_shape, ov::Allocator(allocator)); + return tensor; +} + +/** + * @brief Fills a tensor with binary data from input files + * + * Helper function to GetStaticTensors(), not used outside this file. + * Determines which image to use based of of input_id, batch_size, input_size, and request_id. + * Reads that and creates an input tensor of type T corresponding to input element type. + * + * @param files vector of file paths to the input images + * @param input_id binary input id, ie binary 1, binary 2... + * @param batch_size batch size of the tensor + * @param input_size number of images to be used + * @param request_id infer request id + * @param input_info InputInfo struct corresponding to the input node of the tensor + * @param input_name name of the input + * @param verbose prints extra logging information if true + * @return ov::Tensor containing the input data extracted from the binary +*/ +template <typename T> +ov::Tensor CreateTensorFromBinary(const std::vector<std::string>& files, + const size_t input_id, + const size_t batch_size, + const size_t input_size, + const size_t request_id, + const dla_benchmark::InputInfo& input_info, + const std::string& input_name, + const bool verbose = false) { + size_t tensor_size = + std::accumulate(input_info.data_shape.begin(), input_info.data_shape.end(), 1, std::multiplies<size_t>()); + auto allocator = std::make_shared<SharedTensorAllocator>(tensor_size * sizeof(T)); + char* data = allocator->get_buffer(); + size_t binary_batch_size = 1; + if (!input_info.layout.empty() && ov::layout::has_batch(input_info.layout)) { + binary_batch_size = batch_size; + } else { + slog::warn << input_name + << ": layout is not set or does not contain batch dimension. Assuming that binary " + "data read from file contains data for all batches." + << slog::endl; + } + + for (size_t b = 0, input_idx = request_id * batch_size * input_size + input_id; b < binary_batch_size; b++, input_idx += input_size) { + input_idx %= files.size(); + if (input_idx <= MAX_COUT_WITHOUT_VERBOSE || verbose) { + slog::info << "Prepare binary file " << files[input_idx] << slog::endl; + if (!verbose && input_idx == MAX_COUT_WITHOUT_VERBOSE) { + slog::info << "Truncating list of input files. Run with --verbose for complete list." << slog::endl; + } + } + std::ifstream binary_file(files[input_idx], std::ios_base::binary | std::ios_base::ate); + OPENVINO_ASSERT(binary_file, "Cannot open ", files[input_idx]); + + auto file_size = static_cast<std::size_t>(binary_file.tellg()); + binary_file.seekg(0, std::ios_base::beg); + OPENVINO_ASSERT(binary_file.good(), "Can not read ", files[input_idx]); + auto input_size = tensor_size * sizeof(T) / binary_batch_size; + OPENVINO_ASSERT(file_size == input_size, + "File ", + files[input_idx], + " contains ", + file_size, + " bytes, but the model expects ", + input_size); + + if (input_info.layout != "CN") { + binary_file.read(&data[b * input_size], input_size); + } else { + for (size_t i = 0; i < input_info.GetChannels(); i++) { + binary_file.read(&data[(i * binary_batch_size + b) * sizeof(T)], sizeof(T)); + } + } + } + + auto tensor = ov::Tensor(input_info.type, input_info.data_shape, ov::Allocator(allocator)); + return tensor; +} + +/** + * @brief Randomly fills input tensor, used when no input files is provided + * + * Helper function to GetStaticTensors(), not used outside this file. + * + * @param input_info InputInfo struct corresponding to the input node of the tensor + * @param rand_min Min. random value + * @param rand_max Max. random value + * @return ov::Tensor containing the the randomly generated input data +*/ +template <typename T, typename T2> +ov::Tensor CreateTensorRandom(const dla_benchmark::InputInfo& input_info, + T rand_min = std::numeric_limits<uint8_t>::min(), + T rand_max = std::numeric_limits<uint8_t>::max()) { + size_t tensor_size = + std::accumulate(input_info.data_shape.begin(), input_info.data_shape.end(), 1, std::multiplies<size_t>()); + auto allocator = std::make_shared<SharedTensorAllocator>(tensor_size * sizeof(T)); + auto data = reinterpret_cast<T*>(allocator->get_buffer()); + + std::mt19937 gen(0); + uniformDistribution<T2> distribution(rand_min, rand_max); + for (size_t i = 0; i < tensor_size; i++) { + data[i] = static_cast<T>(i%255); + } + + ov::Shape tensor_shape = input_info.data_shape; + // FPGA model only supports channel first. + // The transpose for case NHWC and HWC below is ok since the tensor has randomly generated input data. + if (input_info.layout == "NHWC") { + // Use NCHW instead of NHWC since FPGA model only supports channel first. + tensor_shape = {input_info.GetBatch(), input_info.GetChannels(), + input_info.GetHeight(), input_info.GetWidth()}; + } else if (input_info.layout == "HWC") { + // Use CHW instead of HWC since FPGA model only supports channel first. + tensor_shape = {input_info.GetChannels(), input_info.GetHeight(), input_info.GetWidth()}; + } + + auto tensor = ov::Tensor(input_info.type, tensor_shape, ov::Allocator(allocator)); + return tensor; +} + +/** + * @brief Wrapper for CreateImageTensorFromImage, uses approriate stl data type for precision + * + * See CreateImageTensorFromImage for params. Helper for GetStaticTensors, not used outside this file. +*/ +ov::Tensor GetImageTensor(const std::vector<std::string>& files, + const size_t input_id, + const size_t batch_size, + const size_t input_size, + const size_t request_id, + const std::pair<std::string, dla_benchmark::InputInfo>& input_info, + const FormatReader::Reader::ResizeType resize_type, + const bool bgr = false, + const bool verbose = false) { + // Edwinzha: All image data will be read as U8 but saved as a float in tensor data structure. + // Saving as U8 results in accuracy loss in diff check, especially in mobilenet graphs. + const ov::element::Type_t type = input_info.second.type; + if (type == ov::element::f16) { + return CreateTensorFromImage<ov::float16>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, resize_type, bgr, verbose); + } else { + return CreateTensorFromImage<float>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, resize_type, bgr, verbose); + } +} + +/** + * @brief Wrapper for CreateTensorFromVideo, uses appropriate stl data type for precision + * + * See CreateTensorFromVideo for params. Helper for GetStaticTensors, not used outside this file. +*/ +ov::Tensor GetVideoTensor(const std::vector<std::string>& files, + const size_t input_id, + const size_t batch_size, + const size_t input_size, + const size_t request_id, + const std::pair<std::string, dla_benchmark::InputInfo>& input_info, + const bool bgr = false, + const bool verbose = false) { + auto type = input_info.second.type; + if (type == ov::element::f32) { + return CreateTensorFromVideo<float>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, bgr, verbose); + } else if (type == ov::element::u8) { + return CreateTensorFromVideo<uint8_t>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, bgr, verbose); + } else if (type == ov::element::i32) { + return CreateTensorFromVideo<int32_t>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, bgr, verbose); + } else if (type == ov::element::f16) { + return CreateTensorFromVideo<ov::float16>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, bgr, verbose); + } else { + throw ov::Exception("Video input tensor type is not supported: " + input_info.first); + } +} + +/** + * @brief Wrapper for CreateTensorRandom, uses appropriate stl data type for precision + * + * See CreateTensorRandom for params. Helper for GetStaticTensors, not used outside this file. +*/ +ov::Tensor GetRandomTensor(const std::pair<std::string, dla_benchmark::InputInfo>& input_info) { + auto type = input_info.second.type; + if (type == ov::element::f32) { + return CreateTensorRandom<float, float>(input_info.second); + } else if (type == ov::element::f16) { + return CreateTensorRandom<short, short>(input_info.second); + } else if (type == ov::element::i32) { + return CreateTensorRandom<int32_t, int32_t>(input_info.second); + } else if (type == ov::element::u8) { + // uniform_int_distribution<uint8_t> is not allowed in the C++17 + // standard and vs2017/19 + return CreateTensorRandom<uint8_t, uint32_t>(input_info.second); + } else if (type == ov::element::i8) { + // uniform_int_distribution<int8_t> is not allowed in the C++17 standard + // and vs2017/19 + return CreateTensorRandom<int8_t, int32_t>( + input_info.second, std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); + } else if (type == ov::element::u16) { + return CreateTensorRandom<uint16_t, uint16_t>(input_info.second); + } else if (type == ov::element::i16) { + return CreateTensorRandom<int16_t, int16_t>(input_info.second); + } else { + throw ov::Exception("Random input tensor type is not supported: " + input_info.first); + } +} + +/** + * @brief Wrapper for CreateTensorImInfo, uses appropriate stl data type for precision + * + * See CreateTensorImInfo for params. Helper for GetStaticTensors, not used outside this file. +*/ +ov::Tensor GetImInfoTensor(const std::pair<size_t, size_t>& image_size, + size_t batch_size, + const std::pair<std::string, dla_benchmark::InputInfo>& input_info) { + auto type = input_info.second.type; + if (type == ov::element::f32) { + return CreateTensorImInfo<float>(image_size, batch_size, input_info.second, input_info.first); + } else if (type == ov::element::f64) { + return CreateTensorImInfo<double>(image_size, batch_size, input_info.second, input_info.first); + } else if (type == ov::element::f16) { + return CreateTensorImInfo<ov::float16>(image_size, batch_size, input_info.second, input_info.first); + } else if (type == ov::element::i32) { + return CreateTensorImInfo<int32_t>(image_size, batch_size, input_info.second, input_info.first); + } else if (type == ov::element::i64) { + return CreateTensorImInfo<int64_t>(image_size, batch_size, input_info.second, input_info.first); + } else { + throw ov::Exception("Image info input tensor type is not supported:" + input_info.first); + } +} + +/** + * @brief Wrapper for GetBinaryTensor, uses appropriate stl data type for precision + * + * See GetBinaryTensor for params. Helper for GetStaticTensors, not used outside this file. +*/ +ov::Tensor GetBinaryTensor(const std::vector<std::string>& files, + const size_t input_id, + const size_t batch_size, + const size_t input_size, + const size_t request_id, + const std::pair<std::string, dla_benchmark::InputInfo>& input_info, + const bool verbose = false) { + const auto& type = input_info.second.type; + if (type == ov::element::f32) { + return CreateTensorFromBinary<float>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, verbose); + } else if (type == ov::element::f16) { + return CreateTensorFromBinary<ov::float16>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, verbose); + } else if (type == ov::element::i32) { + return CreateTensorFromBinary<int32_t>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, verbose); + } else if ((type == ov::element::u8)) { + return CreateTensorFromBinary<uint8_t>( + files, input_id, batch_size, input_size, request_id, input_info.second, input_info.first, verbose); + } else { + throw ov::Exception("Binary input tensor type is not supported: " + input_info.first); + } +} + +/** + * @brief Main function used by DLA benchmark, creates input tensors based off of input files and precision + * + * Only creates static tensors (no dims of -1). Calls all other functions in this file. + * + * @param input_files vector of input file paths + * @param batch_size batch size of input + * @param inputs_info map of input name to InputInfo struct which contains useful input information + * such as precision, tensor layout + * @param requests_num number of infer requests + * @param bgr boolean indicating if channels are reversed, corresponds to user bgr flag + * @param is_binary_data boolean indicating if the image data should be binary, corresponding to user binary flag + * @param verbose Verbosity boolean. If true, additional logs are printed + * @return A map of input name with tensor vectors. TensorVector being an alias of ov::Tensors where + * each index corresponds to the batch +*/ +std::map<std::string, ov::TensorVector> GetStaticTensors(const std::vector<std::string>& input_files, + const size_t& batch_size, + dla_benchmark::InputsInfo& inputs_info, + size_t requests_num, + std::string resize_type, + bool bgr = false, + bool is_binary_data = false, + bool verbose = false) { + std::map<std::string, ov::TensorVector> blobs; + std::vector<std::pair<size_t, size_t>> net_input_im_sizes; + std::vector<std::tuple<size_t, size_t, size_t>> net_input_vid_sizes; + FormatReader::Reader::ResizeType resize_type_enum; + + if (resize_type == "resize") { + resize_type_enum = FormatReader::Reader::ResizeType::RESIZE; + } else if (resize_type == "pad_resize") { + resize_type_enum = FormatReader::Reader::ResizeType::PAD_RESIZE; + } else { + slog::err << resize_type << " is not a valid -resize_type option" << slog::endl; + exit(1); + } + + for (auto& item : inputs_info) { + const std::string& name = item.first; + const auto& input_info = item.second; + if (input_info.IsImage() && !is_binary_data) { + net_input_im_sizes.emplace_back(input_info.GetWidth(), input_info.GetHeight()); + } else if (input_info.IsVideo()) { + net_input_vid_sizes.emplace_back(input_info.GetDepth(), input_info.GetWidth(), input_info.GetHeight()); + } + slog::info << "Network input '" << name << "' precision " << input_info.type << ", dimensions " + << input_info.layout.to_string() << ": "; + slog::info << "["; + for (size_t i = 0; i < input_info.data_shape.size(); ++i) { + slog::info << input_info.data_shape[i]; + if (i < input_info.data_shape.size() - 1) { + slog::info << " "; + } + } + slog::info << "]" << slog::endl; + } + + size_t img_input_count = net_input_im_sizes.size(); + size_t vid_input_count = net_input_vid_sizes.size(); + size_t bin_input_count = inputs_info.size() - img_input_count - vid_input_count; + + std::vector<std::string> binary_files; + std::vector<std::string> image_files; + std::vector<std::string> video_files; + + if (input_files.empty()) { + slog::warn << "No input files were given: all inputs will be filled with random values!" << slog::endl; + } else { + binary_files = FilterFilesByExtensions(input_files, supported_binary_extensions); + std::sort(std::begin(binary_files), std::end(binary_files)); + + auto bins_to_be_used = bin_input_count * batch_size * requests_num; + if (bins_to_be_used > 0 && binary_files.empty()) { + std::stringstream ss; + for (auto& ext : supported_binary_extensions) { + if (!ss.str().empty()) { + ss << ", "; + } + ss << ext; + } + slog::warn << "No supported binary inputs found! Please check your file " + "extensions: " + << ss.str() << slog::endl; + } else if (bins_to_be_used > binary_files.size()) { + slog::warn << "Some binary input files will be duplicated: " << bins_to_be_used << " files are required but only " + << binary_files.size() << " are provided" << slog::endl; + } else if (bins_to_be_used < binary_files.size()) { + slog::warn << "Some binary input files will be ignored: only " << bins_to_be_used << " are required from " + << binary_files.size() << slog::endl; + } + + image_files = FilterFilesByExtensions(input_files, supported_image_extensions); + std::sort(std::begin(image_files), std::end(image_files)); + + auto imgs_to_be_used = img_input_count * batch_size * requests_num; + if (imgs_to_be_used > 0 && image_files.empty()) { + std::stringstream ss; + for (auto& ext : supported_image_extensions) { + if (!ss.str().empty()) { + ss << ", "; + } + ss << ext; + } + slog::warn << "No supported image inputs found! Please check your file " + "extensions: " + << ss.str() << slog::endl; + } else if (imgs_to_be_used > image_files.size()) { + slog::warn << "Some image input files will be duplicated: " << imgs_to_be_used << " files are required but only " + << image_files.size() << " are provided" << slog::endl; + } else if (imgs_to_be_used < image_files.size()) { + slog::warn << "Some image input files will be ignored: only " << imgs_to_be_used << " are required from " + << image_files.size() << slog::endl; + } + + video_files = FilterFilesByExtensions(input_files, supported_video_extensions); + std::sort(std::begin(video_files), std::end(video_files)); + auto vids_to_be_used = vid_input_count * requests_num; + if (vids_to_be_used > 0 && video_files.empty()) { + std::stringstream ss; + for (auto& ext : supported_video_extensions) { + if (!ss.str().empty()) { + ss << ", "; + } + ss << ext; + } + slog::warn << "No supported video inputs found! Please check your file extensions: " << ss.str() << slog::endl; + } else if (vids_to_be_used > video_files.size()) { + slog::warn << "Some video input files will be duplicated: " << vids_to_be_used << " files are required but only " + << video_files.size() << " are provided" << slog::endl; + } else if (vids_to_be_used < video_files.size()) { + slog::warn << "Some video input files will be ignored: only " << vids_to_be_used << " are required from " + << video_files.size() << slog::endl; + } + } + + for (size_t i = 0; i < requests_num; ++i) { + size_t img_input_id = 0; + size_t bin_input_id = 0; + size_t vid_input_id = 0; + + for (auto& item : inputs_info) { + const std::string& input_name = item.first; + const auto& input_info = item.second; + if (item.second.IsImage() && !is_binary_data) { + if (!image_files.empty()) { + // Fill with images + blobs[input_name].push_back(GetImageTensor( + image_files, img_input_id++, batch_size, img_input_count, i, {input_name, input_info}, resize_type_enum, bgr, verbose)); + continue; + } + } else if (input_info.IsVideo()) { + if (!video_files.empty()) { + // Fill with videos + blobs[input_name].push_back(GetVideoTensor( + video_files, vid_input_id++, batch_size, vid_input_count, i, {input_name, input_info}, bgr, verbose)); + continue; + } + } else { + if (!binary_files.empty()) { + // Fill with binary files + blobs[input_name].push_back( + GetBinaryTensor(binary_files, bin_input_id++, batch_size, bin_input_count, i, {input_name, input_info}, verbose)); + continue; + } + if (input_info.IsImageInfo() && (net_input_im_sizes.size() == 1)) { + // Most likely it is image info: fill with image information + auto image_size = net_input_im_sizes.at(0); + blobs[input_name].push_back(GetImInfoTensor(image_size, batch_size, {input_name, input_info})); + continue; + } + } + // Fill random + slog::info << "No suitable input data found, filling input tensors with random data.\n"; + blobs[input_name].push_back(GetRandomTensor({input_name, input_info})); + } + } + + return blobs; +} + +/** + * @brief Copies data from a source OpenVINO Tensor to a destination Tensor. + * + * @param dst The destination Tensor where data will be copied. + * @param src The source Tensor from which data will be copied. + */ +void CopyTensorData(ov::Tensor& dst, const ov::Tensor& src) { + if (src.get_shape() != dst.get_shape() || src.get_byte_size() != dst.get_byte_size()) { + throw std::runtime_error( + "Source and destination tensors shapes and byte sizes are expected to be equal for data copying."); + } + + memcpy(dst.data(), src.data(), src.get_byte_size()); +} |
