summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/dla_aot_splitter
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/runtime/dla_aot_splitter')
-rw-r--r--python/openvino/runtime/dla_aot_splitter/CMakeLists.txt71
-rw-r--r--python/openvino/runtime/dla_aot_splitter/CPPLINT.cfg8
-rw-r--r--python/openvino/runtime/dla_aot_splitter/README.md52
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/CMakeLists.txt209
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/src/main.cpp180
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt113
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h38
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h49
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h79
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h81
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h80
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml18
-rwxr-xr-xpython/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml22
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg4
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp117
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp68
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp67
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp89
-rw-r--r--python/openvino/runtime/dla_aot_splitter/inc/dla_aot_splitter.hpp130
-rw-r--r--python/openvino/runtime/dla_aot_splitter/sdl.cmake96
-rw-r--r--python/openvino/runtime/dla_aot_splitter/src/main.cpp475
21 files changed, 2046 insertions, 0 deletions
diff --git a/python/openvino/runtime/dla_aot_splitter/CMakeLists.txt b/python/openvino/runtime/dla_aot_splitter/CMakeLists.txt
new file mode 100644
index 0000000..0e1e4f8
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/CMakeLists.txt
@@ -0,0 +1,71 @@
+cmake_minimum_required(VERSION 3.10)
+
+# Use <package>_ROOT variables to help find_package locate packages
+if (POLICY CMP0074)
+ cmake_policy(SET CMP0074 NEW)
+endif()
+
+find_package(OpenCV COMPONENTS core highgui imgcodecs imgproc videoio REQUIRED)
+find_package(gflags COMPONENTS shared REQUIRED)
+
+add_subdirectory(dla_aot_splitter_plugin)
+add_subdirectory(dla_aot_splitter_example)
+
+if (DE10_AGILEX)
+ add_library(de10_agilex ALIAS de10_agilex_mmd)
+elseif (SYSTEM_CONSOLE_PLATFORM)
+ # DO NOTHING
+elseif (PAC_A10)
+ add_library(dcp_a10_pac ALIAS intel_opae_mmd)
+elseif(AGX7_IDK)
+ add_library(agx7_i_dk ALIAS intel_opae_mmd)
+elseif(AGX7_N6001)
+ add_library(agx7_n6001 ALIAS intel_opae_mmd)
+endif()
+
+add_executable(dla_aot_splitter ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp)
+
+target_compile_features(dla_aot_splitter PUBLIC cxx_std_11)
+
+target_sources(dla_aot_splitter PRIVATE
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_splitter.hpp
+ $ENV{COREDLA_ROOT}/runtime/dla_benchmark/inputs_filling.cpp #TODO REMOVE and replace with link library
+ $ENV{COREDLA_ROOT}/runtime/dla_benchmark/utils.cpp #TODO REMOVE and replace with link library
+ $ENV{COREDLA_ROOT}/runtime/common/utils/src/slog.cpp
+ $ENV{COREDLA_ROOT}/runtime/common/utils/src/args_helper.cpp
+ $ENV{COREDLA_ROOT}/runtime/common/utils/src/common.cpp
+ $ENV{COREDLA_ROOT}/runtime/common/utils/src/latency_metrics.cpp
+)
+
+target_include_directories(dla_aot_splitter PRIVATE
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc
+ $ENV{COREDLA_ROOT}/util/inc
+ $ENV{COREDLA_ROOT}/dla_plugin/inc
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia
+ $ENV{COREDLA_ROOT}/runtime/dla_benchmark #TODO REMOVE and replace with link library
+)
+
+if (WIN32)
+ target_include_directories(dla_aot_splitter PRIVATE
+ $ENV{COREDLA_ROOT}/compiler/inc # dla_performance_estimator.h
+ )
+endif()
+
+
+target_link_libraries(dla_aot_splitter PRIVATE
+ openvino::runtime
+ openvino_dev_api
+ format_reader
+ ie_samples_utils
+ ${OpenCV_LIBRARIES} # Needed for the directly compiled inputs_filling
+ dla_aot_splitter_plugin
+ gflags
+)
+
+if (NOT WIN32)
+ target_link_libraries(dla_aot_splitter PRIVATE
+ ${LIB_DL}
+ pthread
+ )
+endif()
diff --git a/python/openvino/runtime/dla_aot_splitter/CPPLINT.cfg b/python/openvino/runtime/dla_aot_splitter/CPPLINT.cfg
new file mode 100644
index 0000000..4bdae97
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/CPPLINT.cfg
@@ -0,0 +1,8 @@
+set noparent
+filter=-build/header_guard,-runtime/explicit,-build/include_subdir,-runtime/references,-build/c++11,-runtime/int,-runtime/string,-runtime/printf,-build/namespaces,-readability/todo,-readability/casting
+
+# Exlude Example code
+exclude_files=dla_aot_splitter_example
+
+linelength=160
+headers=h,hpp
diff --git a/python/openvino/runtime/dla_aot_splitter/README.md b/python/openvino/runtime/dla_aot_splitter/README.md
new file mode 100644
index 0000000..ffefe0d
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/README.md
@@ -0,0 +1,52 @@
+# Intel AI Suite Core DLA 'AoT Splitter'
+
+This tool is intended to split a compiled HETERO:FPGA OpenVINO model into Input memory, Config memory, and Filter memory data blobs that would normally exist in the DDR memory of a runtime CoreDLA IP. These blobs can be used to directly run an inference on the IP without using OpenVINO InferenceEngine.
+
+# How to Build the Splitter, Plugin, and Example
+
+First, follow all instructions to install CoreDLA compiler development environment
+
+Change directory to the dla runtime folder
+
+```
+sh build_runtime.sh -target_de10_agilex
+```
+
+# How to Run the Splitter Executable
+
+The executable outputs the memory blobs to the current working directory. Change directory to the location where you want the outputs to be generated
+
+```
+cd directory_where_you_want_output
+
+runtime/build_Release/dla_aot_splitter/dla_aot_splitter -cm compiled_hetero_fpga_model.bin -i path/to/image.bmp -bgr -plugins runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml
+```
+
+Ensure that the libdla_aot_splitter.so, libcoreDLAHeteroPlugin.so and other shared libraries are available to the utility.
+
+The tool outputs the following artifacts:
+ - arch_build.mem / arch_build.bin
+ - config.mem / config.bin
+ - filter.mem /filter.bin
+ - input.mem / input.bin
+ - inter_size.mem
+ - output_size.mem
+
+# Building the Example Inference Program
+
+The example inference program with static input,config,filter data is compiled with the following environment variables
+and option to build_runtime.sh
+
+## DE10 Agilex
+```
+export AOT_SPLITTER_EXAMPLE_MODEL=<path/to/model.xml>
+export AOT_SPLITTER_EXAMPLE_INPUT=<path/to/image.bmp>
+sh build_runtime.sh -aot_splitter_example -target_de10_agilex
+```
+
+This program directly embeds the input, config and filter data into the resulting exectuable file for direct use.
+
+## PCIE
+
+The emulation inference program uses the PCIE MMD driver from the example design to connect to and provision the IP.
+Your system may require a different driver to provision the IP
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/CMakeLists.txt b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/CMakeLists.txt
new file mode 100644
index 0000000..a6f2ce8
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/CMakeLists.txt
@@ -0,0 +1,209 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+add_executable(dla_aot_splitter_example EXCLUDE_FROM_ALL src/main.cpp)
+
+target_compile_features(dla_aot_splitter_example PUBLIC cxx_std_11)
+
+target_compile_definitions(dla_aot_splitter_example PRIVATE DLA_MMD)
+
+file(GLOB SOURCES
+ # coredla_device
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/device_memory_allocator.h
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/dla_dma_constants.h
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/mmd_wrapper.h
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/src/device_memory_allocator.cpp
+ #
+ src/main.cpp
+)
+if (SYSTEM_CONSOLE_PLATFORM)
+ list(APPEND SOURCES ${CMAKE_SOURCE_DIR}/coredla_device/mmd/system_console/mmd_wrapper.cpp)
+else ()
+ list(APPEND SOURCES $ENV{COREDLA_ROOT}/runtime/coredla_device/src/mmd_wrapper.cpp)
+endif ()
+
+target_sources (dla_aot_splitter_example PRIVATE ${SOURCES})
+
+if (DISABLE_JIT)
+# for dla_dma_constants.svh
+ if (EXISTS $ENV{COREDLA_ROOT}/inc)
+ target_include_directories(dla_aot_splitter_example PRIVATE $ENV{COREDLA_ROOT}/inc)
+ else()
+ target_include_directories(dla_aot_splitter_example PRIVATE $ENV{COREDLA_ROOT}/build/coredla/dla/inc)
+ endif()
+endif()
+
+target_link_libraries(dla_aot_splitter_example PRIVATE
+ pthread
+)
+
+if (DISABLE_JIT)
+ target_include_directories(dla_aot_splitter_example PRIVATE
+ $ENV{COREDLA_ROOT}/util/inc
+ $ENV{COREDLA_XUTIL_DIR}/compiled_result/inc
+ )
+ target_sources(dla_aot_splitter_example PRIVATE $ENV{COREDLA_XUTIL_DIR}/compiled_result/src/compiled_result_reader_writer.cpp)
+else()
+ target_link_libraries(dla_aot_splitter_example
+ PRIVATE
+ dla_compiled_result
+ )
+endif()
+
+if (DE10_AGILEX)
+ target_link_libraries(dla_aot_splitter_example PRIVATE de10_agilex)
+elseif(PAC_A10)
+ target_link_libraries(dla_aot_splitter_example PRIVATE dcp_a10_pac)
+elseif(AGX7_IDK)
+ target_link_libraries(dla_aot_splitter_example PRIVATE agx7_i_dk)
+ find_library(libjson-c_LIBRARIES
+ NAMES json-c
+ PATHS ${LIBOPAE-C_ROOT}/lib
+ ${LIBOPAE-C_ROOT}/lib64
+ /usr/local/lib
+ /usr/lib
+ /lib
+ /usr/lib/x86_64-linux-gnu
+ ${CMAKE_EXTRA_LIBS})
+ target_link_libraries(dla_aot_splitter_example PRIVATE ${libjson-c_LIBRARIES})
+elseif(AGX7_N6001)
+ target_link_libraries(dla_aot_splitter_example PRIVATE agx7_n6001)
+ find_library(libjson-c_LIBRARIES
+ NAMES json-c
+ PATHS ${LIBOPAE-C_ROOT}/lib
+ ${LIBOPAE-C_ROOT}/lib64
+ /usr/local/lib
+ /usr/lib
+ /lib
+ /usr/lib/x86_64-linux-gnu
+ ${CMAKE_EXTRA_LIBS})
+ target_link_libraries(dla_aot_splitter_example PRIVATE ${libjson-c_LIBRARIES})
+elseif(SYSTEM_CONSOLE_PLATFORM)
+ # Agilex 5 JTAG ED: do nothing
+elseif(REFERENCE)
+ # Reference: do nothing
+else()
+ message(FATAL_ERROR "Building DLA AOT Aplitter Example with unsupported platform")
+endif()
+
+target_include_directories(dla_aot_splitter_example PRIVATE
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc
+ if(PAC_A10)
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/mmd/dcp_a10_pac/host
+ endif()
+)
+
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include)
+
+target_sources (dla_aot_splitter_example PRIVATE
+ ${CMAKE_CURRENT_BINARY_DIR}/include/arch_build.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/config.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/filter.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/input.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/inter_size.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/output_size.mem
+)
+target_include_directories(dla_aot_splitter_example PRIVATE
+ ${CMAKE_CURRENT_BINARY_DIR}/include
+)
+
+if (DEFINED ENV{AOT_SPLITTER_EXAMPLE_MODEL})
+ set (AOT_SPLITTER_EXAMPLE_MODEL $ENV{AOT_SPLITTER_EXAMPLE_MODEL})
+else()
+ if (EXISTS $ENV{COREDLA_WORK}/demo/models/public/resnet-50-tf/FP32/resnet-50-tf.xml)
+ set (AOT_SPLITTER_EXAMPLE_MODEL $ENV{COREDLA_WORK}/demo/models/public/resnet-50-tf/FP32/resnet-50-tf.xml)
+ else()
+ # The path below is for Intel internal use only
+ if (EXISTS /p/psg/swip/dla/caffe/caffe_reference/ngraph_ir/coredla/ModelZoo/2021_4_1/resnet_50_tf/FP32/resnet-50-tf.xml)
+ set (AOT_SPLITTER_EXAMPLE_MODEL /p/psg/swip/dla/caffe/caffe_reference/ngraph_ir/coredla/ModelZoo/2021_4_1/resnet_50_tf/FP32/resnet-50-tf.xml)
+ endif()
+ endif()
+endif()
+
+if (DEFINED ENV{AOT_SPLITTER_EXAMPLE_INPUT})
+ set (AOT_SPLITTER_EXAMPLE_INPUT $ENV{AOT_SPLITTER_EXAMPLE_INPUT})
+else()
+ if (EXISTS $ENV{COREDLA_ROOT}/demo/sample_images/val_00000000.bmp)
+ set (AOT_SPLITTER_EXAMPLE_INPUT $ENV{COREDLA_ROOT}/demo/sample_images/val_00000000.bmp)
+ else()
+ # The path below is for Intel internal use only
+ if (EXISTS /p/psg/swip/dla/images/imagenet/ILSVRC2012_224x224/BMP/BMP/ILSVRC2012_val_00000000.bmp)
+ set (AOT_SPLITTER_EXAMPLE_INPUT /p/psg/swip/dla/images/imagenet/ILSVRC2012_224x224/BMP/BMP/ILSVRC2012_val_00000000.bmp)
+ endif()
+ endif()
+endif()
+
+if (EXISTS ${CoreDLA_DIR}/../bin)
+ set(COREDLA_BIN ${CoreDLA_DIR}/../bin)
+ set(COREDLA_LIB ${CoreDLA_DIR}/../lib)
+ set(COREDLA_EXARCH ${CoreDLA_DIR}/../example_architectures)
+ if(DE10_AGILEX OR AGX7_IDK OR AGX7_N6001)
+ set (AOT_SPLITTER_EXAMPLE_ARCH AGX7_Performance.arch)
+ elseif(SYSTEM_CONSOLE_PLATFORM)
+ set (AOT_SPLITTER_EXAMPLE_ARCH AGX5_Small_Softmax.arch)
+ else()
+ set (AOT_SPLITTER_EXAMPLE_ARCH A10_Performance.arch)
+ endif()
+else()
+ set(COREDLA_BIN $ENV{COREDLA_ROOT}/build/coredla/dla/bin)
+ set(COREDLA_LIB $ENV{COREDLA_ROOT}/build/coredla/dla/lib)
+ set(COREDLA_EXARCH $ENV{COREDLA_ROOT}/example_architectures)
+
+ # The paths below are for Intel internal use only
+ if(DE10_AGILEX)
+ set (AOT_SPLITTER_EXAMPLE_ARCH arch/descriptions/AGX7/64x32_i5x1_fp13agx_sb31744_xbark32_clamp_preluk32_poolk4_softmax_1inst.arch)
+ elseif(AGX7_IDK OR AGX7_N6001)
+ set (AOT_SPLITTER_EXAMPLE_ARCH arch/descriptions/AGX7/32x64_i5x1_fp13agx_sb32768_poolk4_actk32_prelu_rclamp_sig_softmaxk1.arch)
+ elseif(SYSTEM_CONSOLE_PLATFORM)
+ set (AOT_SPLITTER_EXAMPLE_ARCH 16x16_i12x1_fp12agx_sb8192_poolk4_actk16_clamp_softmaxk1.arch)
+ else()
+ set (AOT_SPLITTER_EXAMPLE_ARCH arch/descriptions/A10/64x32_i4x1_fp11_sb31744_xbark32_clamp_preluk32_poolk4_softmax.arch)
+ endif()
+endif()
+
+if (NOT DEFINED AOT_SPLITTER_INPUT_ARGUMENTS)
+ set (AOT_SPLITTER_INPUT_ARGUMENTS )
+ if (DEFINED AOT_SPLITTER_EXAMPLE_INPUT)
+ set (AOT_SPLITTER_INPUT_ARGUMENTS -i ${AOT_SPLITTER_EXAMPLE_INPUT} -bgr)
+ endif()
+endif()
+
+# Need to copy the system console script for Agilex 5E JTAG ED
+# Also link against Boost
+if (SYSTEM_CONSOLE_PLATFORM)
+ find_package(Boost REQUIRED COMPONENTS filesystem)
+ target_link_libraries(dla_aot_splitter_example PRIVATE Boost::filesystem)
+ add_custom_command(
+ TARGET dla_aot_splitter_example POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy
+ ${CMAKE_SOURCE_DIR}/coredla_device/mmd/system_console/system_console_script.tcl
+ ${CMAKE_CURRENT_BINARY_DIR}/system_console_script.tcl
+ )
+ target_compile_definitions(dla_aot_splitter_example PRIVATE DLA_SYSCON_SOURCE_ROOT=${CMAKE_CURRENT_BINARY_DIR})
+endif()
+
+add_custom_command(
+ OUTPUT
+ ${CMAKE_CURRENT_BINARY_DIR}/include/arch_build.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/config.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/filter.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/input.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/inter_size.mem
+ ${CMAKE_CURRENT_BINARY_DIR}/include/output_size.mem
+ COMMAND
+ LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:${COREDLA_LIB} ${COREDLA_BIN}/dlac --network-file ${AOT_SPLITTER_EXAMPLE_MODEL} --march ${COREDLA_EXARCH}/${AOT_SPLITTER_EXAMPLE_ARCH} --foutput-format open_vino_hetero --o ${CMAKE_CURRENT_BINARY_DIR}/resnet.bin
+ COMMAND
+ LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:${COREDLA_LIB} $<TARGET_FILE:dla_aot_splitter> ${AOT_SPLITTER_INPUT_ARGUMENTS} -cm ${CMAKE_CURRENT_BINARY_DIR}/resnet.bin -plugins $<TARGET_FILE_DIR:dla_aot_splitter_plugin>/plugins_aot_splitter.xml
+ DEPENDS
+ ${COREDLA_BIN}/dlac
+ dla_benchmark
+ dla_aot_splitter
+ dla_aot_splitter_plugin
+ ${AOT_SPLITTER_EXAMPLE_MODEL}
+ ${COREDLA_EXARCH}/${AOT_SPLITTER_EXAMPLE_ARCH}
+ ${AOT_SPLITTER_EXAMPLE_INPUT}
+ $<TARGET_FILE_DIR:dla_aot_splitter_plugin>/plugins_aot_splitter.xml
+ WORKING_DIRECTORY
+ ${CMAKE_CURRENT_BINARY_DIR}/include
+)
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/src/main.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/src/main.cpp
new file mode 100644
index 0000000..b90ccd5
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/src/main.cpp
@@ -0,0 +1,180 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+//
+// This small tool demonstrates the minimum number of steps necessary to run an
+// inference on the FPGA while using the output files from the AoT splitter.
+//
+
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <stdint.h>
+#include <array>
+#include <cstring> //memcpy
+
+uint32_t arch_build_mem_32[] =
+{
+ #include "arch_build.mem"
+};
+uint8_t* const arch_build_mem = (uint8_t*)&arch_build_mem_32[0];
+const uint32_t arch_build_mem_size = sizeof(arch_build_mem_32);
+
+uint32_t input_mem_32[] =
+{
+ #include "input.mem"
+};
+uint8_t* const input_mem = sizeof(input_mem_32) ? (uint8_t*)&input_mem_32[0] : nullptr;
+const uint32_t input_mem_size = sizeof(input_mem_32);
+
+uint32_t config_mem_32[] =
+{
+ #include "config.mem"
+};
+uint8_t* const config_mem = (uint8_t*)&config_mem_32[0];
+const uint32_t config_mem_size = sizeof(config_mem_32);
+
+uint32_t filter_mem_32[] =
+{
+ #include "filter.mem"
+};
+uint8_t* const filter_mem = (uint8_t*)&filter_mem_32[0];
+const uint32_t filter_mem_size = sizeof(filter_mem_32);
+
+constexpr uint32_t output_mem_size =
+ #include "output_size.mem"
+;
+
+constexpr uint32_t inter_mem_size =
+ #include "inter_size.mem"
+;
+
+#include "mmd_wrapper.h"
+#include "device_memory_allocator.h"
+#include "dla_dma_constants.h" //DLA_DMA_CSR_OFFSET_***
+
+int main(int argc, char *argv[]) {
+ std::array<uint8_t, output_mem_size> actual_output_mem;
+ for (uint64_t i=0u; i < actual_output_mem.size();i++)
+ {
+ actual_output_mem[i] = (0xDEADBEEF) >> ((3-(i%4)) * 8);
+ }
+
+ std::cout << "AOT Splitter Example" << std::endl;
+
+ constexpr int instance = 0;
+
+ constexpr int _maxNumPipelines = 5;
+ constexpr int numPipelines = _maxNumPipelines;
+
+ // TODO: retrieve this from the arch file
+ constexpr uint64_t featureWordSize = 32;
+ constexpr uint64_t filterWordSize = 64;
+
+
+ constexpr int ARCH_HASH_SIZE = 16;
+ constexpr int BUILD_VERSION_SIZE = 32;
+
+ MmdWrapper mmdWrapper{};
+ DeviceMemoryAllocator ddrAllocator{};
+
+ for (size_t i = 0; i < ARCH_HASH_SIZE; i+=4) {
+ uint32_t arch_build_word_from_device = mmdWrapper.ReadFromCsr(instance, i);
+ if (arch_build_mem_32[i/4] != arch_build_word_from_device)
+ {
+ std::cout << "Arch hash mismatch at word " << i << " : expected " <<
+ std::setfill('0') << std::setw(8) << std::uppercase << std::hex << (uint32_t)arch_build_mem_32[i/4] <<
+ " != " <<
+ std::setfill('0') << std::setw(8) << std::uppercase << std::hex << (uint32_t)arch_build_word_from_device << std::endl;
+ return 1;
+ }
+ }
+ char expected_build_version[BUILD_VERSION_SIZE + 1];
+ expected_build_version[BUILD_VERSION_SIZE] = '\0';
+ std::memcpy(expected_build_version, (uint8_t*)&arch_build_mem_32[ARCH_HASH_SIZE/sizeof(uint32_t)], BUILD_VERSION_SIZE);
+
+ char actual_build_version[BUILD_VERSION_SIZE + 1];
+ actual_build_version[BUILD_VERSION_SIZE] = '\0';
+
+ for (uint32_t i=0;i < BUILD_VERSION_SIZE; i+=4)
+ {
+ uint32_t chunk = mmdWrapper.ReadFromCsr(instance, ARCH_HASH_SIZE + i);
+ for (uint8_t j=0;j < 4; j++)
+ {
+ actual_build_version[i+j] = chunk & 0xFF;
+ chunk >>= 8;
+ }
+ }
+ if (0 != std::strncmp(expected_build_version, actual_build_version, BUILD_VERSION_SIZE))
+ {
+ std::cout << "Build version mismath. Expected " << expected_build_version << " actual " << actual_build_version << std::endl;
+ return 1;
+ }
+
+ ddrAllocator.Initialize(mmdWrapper.GetDDRSizePerInstance(), &mmdWrapper);
+
+ ddrAllocator.AllocateSharedBuffer(inter_mem_size, instance);
+ //mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_INTERMEDIATE_BASE_ADDR, 0);
+
+
+ uint64_t inputOutputBufferSize = numPipelines * (input_mem_size + output_mem_size); // how much space to allocate
+ uint64_t inputOutputBufferAlignment = featureWordSize; // starting address must be aligned to this
+ uint64_t inputOutputBufferAddr; // where did the allocator place this buffer
+ ddrAllocator.AllocatePrivateBuffer(inputOutputBufferSize, inputOutputBufferAlignment, inputOutputBufferAddr);
+
+ uint64_t configFilterBufferSize = config_mem_size + filter_mem_size;
+ uint64_t configFilterBufferAlignment = filterWordSize;
+ uint64_t configFilterBufferAddr;
+ ddrAllocator.AllocatePrivateBuffer(configFilterBufferSize, configFilterBufferAlignment, configFilterBufferAddr);
+
+ mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_INTERRUPT_MASK, 0);
+ mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_INTERRUPT_CONTROL, 3);
+ uint32_t completionCount = mmdWrapper.ReadFromCsr(instance, DLA_DMA_CSR_OFFSET_COMPLETION_COUNT);
+ std::cout << "Initial completion count " << completionCount << std::endl;
+
+ mmdWrapper.WriteToDDR(instance, inputOutputBufferAddr, input_mem_size, input_mem);
+
+ mmdWrapper.WriteToDDR(instance, configFilterBufferAddr, config_mem_size, config_mem);
+ mmdWrapper.WriteToDDR(instance, configFilterBufferAddr + config_mem_size, filter_mem_size, filter_mem);
+
+ mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_CONFIG_BASE_ADDR, configFilterBufferAddr);
+ constexpr int CONFIG_READER_DATA_BYTES = 8; // May want to move to a header in production code
+ mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_CONFIG_RANGE_MINUS_TWO, ((config_mem_size) / CONFIG_READER_DATA_BYTES) - 2);
+
+
+ // base address for feature reader -- this will trigger one run of DLA
+ mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_INPUT_OUTPUT_BASE_ADDR, inputOutputBufferAddr);
+
+ int i=0;
+ while(mmdWrapper.ReadFromCsr(instance, DLA_DMA_CSR_OFFSET_COMPLETION_COUNT) == completionCount)
+ {
+ i++;
+ if (i % 100000 == 0) {
+ std::cout << "Timeout" << std::endl;
+ return 1;
+ }
+ }
+
+ std::cout << "Completed infered in " << i << " polling intervals" << std::endl;
+
+ //Reading from pipeline zero
+ mmdWrapper.ReadFromDDR(instance, inputOutputBufferAddr + input_mem_size, actual_output_mem.size(), actual_output_mem.data());
+
+ std::ofstream of ("actual_output.mem", std::ios_base::out | std::ios_base::binary);
+ if (of) {
+ of.write((const char*)actual_output_mem.data(), actual_output_mem.size());
+ }
+ of.close();
+
+ return 0;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt
new file mode 100644
index 0000000..6f5e916
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt
@@ -0,0 +1,113 @@
+cmake_minimum_required(VERSION 3.10)
+
+add_library(dla_aot_splitter_plugin SHARED)
+
+target_compile_features(dla_aot_splitter_plugin PUBLIC cxx_std_11)
+
+target_compile_definitions(dla_aot_splitter_plugin PUBLIC DISABLE_JIT)
+
+set_target_properties(dla_aot_splitter_plugin PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+if (WIN32)
+ # Fix warning C4273: inconsistent dll linkage
+ target_compile_definitions(dla_aot_splitter_plugin PRIVATE XBYAK_NO_OP_NAMES
+ IMPLEMENT_INFERENCE_ENGINE_PLUGIN
+ $<TARGET_PROPERTY:openvino::runtime,INTERFACE_COMPILE_DEFINITIONS>)
+endif()
+
+target_include_directories(dla_aot_splitter_plugin PRIVATE
+ $ENV{COREDLA_ROOT}/dla_plugin
+ $ENV{COREDLA_ROOT}/dla_plugin/inc
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia
+ $ENV{COREDLA_ROOT}/util/inc # dla_error.h
+ $ENV{COREDLA_ROOT}/inc # dla_dma_constants.svh
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc # For abstract classes (BatchJob, Device etc.)
+ #
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc
+)
+
+target_sources(dla_aot_splitter_plugin PRIVATE
+##
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_async_infer_request.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_config.hpp
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_compiled_model.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_runtime_log.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_infer_request.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_plugin.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_utils.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_plugin_config.hpp
+##
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dla_async_infer_request.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dla_config.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dla_compiled_model.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_infer_request.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_plugin.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dla_plugin_jit_functions.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_utils.cpp
+ $ENV{COREDLA_ROOT}/util/src/dla_numeric_utils.cpp
+##
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/graph_job.h
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/batch_job.h
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/device.h
+##
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_graph_job.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_device.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_batch_job.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/dla_aot_utils.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_graph_job.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_device.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_batch_job.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_utils.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_structs.h
+)
+
+if (WIN32)
+ target_link_libraries(dla_aot_splitter_plugin
+ PRIVATE
+##
+ dla_op_transformation
+ dliaPluginIOTransformations
+ openvino::runtime
+ openvino_dev_api
+ ${TBB_IMPORTED_TARGETS}
+)
+else()
+ target_link_libraries(dla_aot_splitter_plugin
+ PRIVATE
+##
+ pthread
+ dla_op_transformation
+ dliaPluginIOTransformations
+ openvino::runtime
+ openvino_dev_api
+ ${TBB_IMPORTED_TARGETS}
+)
+endif()
+
+if (DISABLE_JIT)
+ target_include_directories(dla_aot_splitter_plugin PRIVATE
+ $ENV{COREDLA_ROOT}/util/inc
+ $ENV{COREDLA_XUTIL_DIR}/compiled_result/inc
+ )
+ target_sources(dla_aot_splitter_plugin PRIVATE $ENV{COREDLA_XUTIL_DIR}/compiled_result/src/compiled_result_reader_writer.cpp)
+
+ if (EXISTS $ENV{COREDLA_ROOT}/inc)
+ target_include_directories(dla_aot_splitter_plugin PUBLIC $ENV{COREDLA_ROOT}/inc)
+ else()
+ target_include_directories(dla_aot_splitter_plugin PUBLIC $ENV{COREDLA_ROOT}/build/coredla/dla/inc)
+ endif()
+else()
+ target_link_libraries(dla_aot_splitter_plugin
+ PRIVATE
+ dla_compiled_result
+ archparam
+ )
+endif()
+
+set_target_properties(dliaPluginIOTransformations PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+if (WIN32)
+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins_aot_splitter_win.xml ${CMAKE_CURRENT_BINARY_DIR}/plugins_aot_splitter.xml COPYONLY)
+else()
+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins_aot_splitter.xml ${CMAKE_CURRENT_BINARY_DIR}/ COPYONLY)
+endif()
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h
new file mode 100644
index 0000000..697b5d2
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h
@@ -0,0 +1,38 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#ifndef _DLA_AOT_STRUCTS_H_
+#define _DLA_AOT_STRUCTS_H_
+
+#include "compiled_result.h"
+
+// Custom type
+typedef unsigned char uint8_t;
+
+// All size and offset fields are in bytes.
+typedef struct {
+ const dla::CompiledResult* compiled_result;
+ uint32_t config_buffer_size;
+ uint32_t filter_bias_scale_buffer_size;
+ uint8_t *input_feature_buffer;
+ uint32_t input_feature_buffer_size;
+ uint32_t output_feature_buffer_size;
+ uint32_t intermediate_feature_buffer_size;
+} DLAInput;
+
+typedef struct {
+ // Its size is output_feature_buffer_size in DLAInput.
+ uint8_t *output_feature_buffer;
+} DLAOutput;
+
+#endif // _DLA_REF_H_
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h
new file mode 100644
index 0000000..7fa23e8
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h
@@ -0,0 +1,49 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#ifndef _DLA_AOT_UTILS_H_
+#define _DLA_AOT_UTILS_H_
+
+#include <fcntl.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+#include <sys/stat.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "dla_aot_structs.h"
+
+using google::protobuf::io::FileInputStream;
+
+// fp16 feature element (in bytes)
+// TODO: extract it from arch / compiled result
+const uint32_t feature_elem_size = 2;
+
+//////////////////////////////////////////////////////////////////////////////
+// Dump DLA input and output to the following files:
+// - config_filter.mem: config + filter buffer
+// - input_feature.mem: input feature buffer
+// - output_feature.mem: output feature buffer (emulation results)
+//
+// Each .mem file is a text file, with one byte (in hex) per line.
+//////////////////////////////////////////////////////////////////////////////
+
+void writeInputOutputToFiles(const std::vector<int>& arch_hash,
+ const std::string& build_version,
+ const std::string& arch_name,
+ const DLAInput& input,
+ const DLAOutput& output);
+
+#endif // _DLA_AOT_UTILS_H_
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h
new file mode 100644
index 0000000..dd8e5fa
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h
@@ -0,0 +1,79 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+#ifndef RAW_BATCH_JOB_H
+#define RAW_BATCH_JOB_H
+
+#include <assert.h>
+#include <cstdio>
+#if defined(_WIN32) || defined(_WIN64)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <thread>
+#include <memory>
+
+#include "batch_job.h"
+#include "dla_aot_structs.h"
+#include "raw_device.h"
+
+// RawBatchJob represents one batch execution
+// Contains functions to start DLA
+class RawBatchJob : public BatchJob {
+ private:
+ const CompiledResult* compiledResult;
+ DLAInput* dlaBuffers_;
+ DLAOutput output_;
+ int instance_;
+ uint32_t debugLevel_;
+ std::string AES_key_;
+ std::string IV_key_;
+ bool encryption_enabled_;
+ RawBatchJob(const CompiledResult* compiledResult,
+ DLAInput* dlaBuffers,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled);
+
+ public:
+ RawBatchJob(const RawBatchJob&) = delete;
+ RawBatchJob(RawBatchJob&) = delete;
+ RawBatchJob& operator=(const RawBatchJob&) = delete;
+ static unique_ptr<BatchJob> MakeUnique(const CompiledResult* compiledResult,
+ DLAInput* dlaBuffers,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled);
+ // @param inputArray - ptr to CPU array containing input data tp be copied to DDR
+ // blocking function
+ void LoadInputFeatureToDDR(void* inputArray);
+ // Starts DLA by writing to CSR in DLA DMA; the DDR addresses of graph config and input data
+ void StartDla() override;
+ // @param outputArray - ptr to CPU array where the output data in DDR is copied into
+ // outputArray must be allocated by the caller (size >= output_size_ddr)
+ // blocking function
+ void ReadOutputFeatureFromDDR(void* outputArray) const;
+ void ScheduleInputFeature() const {}
+};
+
+#endif
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h
new file mode 100644
index 0000000..168707e
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h
@@ -0,0 +1,81 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+#ifndef RAW_DEVICE_H
+#define RAW_DEVICE_H
+
+#include <assert.h>
+#include <chrono>
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <thread>
+#include <vector>
+#include <map>
+#include "arch_params.h"
+#include "compiled_result.h"
+#include "device.h"
+using namespace std;
+using namespace dla;
+class GraphJob;
+
+class RawDevice : public Device {
+ public:
+ GraphJob* CreateGraphJob(const CompiledResult* compiledResult,
+ size_t numPipelines,
+ int instance,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled,
+ const std::string export_dir,
+ const std::string parameter_rom_export_dir);
+ // Return number of DLA jobs completed till now
+ // Used for debugging
+ int GetNumInferencesCompleted(int instance) const override;
+ // Must be called when there are no active jobs on DLA
+ // Returns the total time taken by DLA jobs on hardware (in milliseconds)
+ double GetActiveHWTimeMs(int instance) const override;
+ // Must be called when there are no active jobs on DLA
+ // Returns the average of time taken per job (in milliseconds)
+ // Avg Time per job < Active Time
+ double GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const override;
+ RawDevice(const arch_params* archParams);
+ void WaitForDla(int instance,
+ size_t threadId = 0,
+ std::function<bool()> isCancelled = nullptr) override; // threadId is for debugging purpose only
+ std::string SchedulerGetStatus() const override { return ""; }
+ bool InitializeScheduler(uint32_t sourceBufferSize,
+ uint32_t dropSourceBuffers,
+ uint32_t numInferenceRequests,
+ const std::string source_fifo_file = "") override {
+ return true;
+ }
+ int GetNumInstances() const override { return numInstances_; }
+ int GetSizeCsrDescriptorQueue() const override { return -1; } // meaningless here
+ double GetCoreDlaClockFreq() const override { return -1.0; } // meaningless here
+ std::map<std::string, uint64_t> ReadDebugNetwork(int instance) const override {
+ return std::map<std::string, uint64_t>();
+ };
+ uint64_t GetNumInputFeatureMemoryReads(int instance) const override { return 0; };
+ uint64_t GetNumFilterMemoryReads(int instance) const override {return 0; };
+ uint64_t GetNumOutputFeatureMemoryWrites(int instance) const override {return 0; };
+
+ private:
+ RawDevice() = delete;
+ vector<unique_ptr<GraphJob>> allGraphJobs_;
+ int numInstances_;
+ const arch_params* archParams_;
+};
+
+#endif // REF_DEVCE_H
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h
new file mode 100644
index 0000000..38ad075
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h
@@ -0,0 +1,80 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+#ifndef RAW_GRAPH_JOB_H
+#define RAW_GRAPH_JOB_H
+
+#include <assert.h>
+#include <cstdio>
+#include <memory>
+#include <vector>
+#if defined(_WIN32) || defined(_WIN64)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <thread>
+#include "compiled_result.h"
+
+#include "dla_aot_structs.h"
+#include "graph_job.h"
+#include "raw_batch_job.h"
+#include "raw_device.h"
+using namespace dla;
+/*! RawGraphJob is a DLA compiled graph loaded onto a emulation device
+ * Initialized with Emulator Device object
+ * RawGraphJob stores arrays filter, bias, config, inputs and outputs
+ * It provides handle to "batch job" objects that are used to load input and start DLA for one batch
+ */
+class RawGraphJob : public GraphJob {
+ public:
+ static unique_ptr<GraphJob> MakeUnique(const arch_params* archParams,
+ const CompiledResult* compiled_result,
+ size_t numPipelines,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled);
+ // Returns an unused batch job object
+ // If all batch jobs are used, returns null
+ // Increments batchJobsRequested_
+ // Thread safe
+ BatchJob* GetBatchJob();
+ RawGraphJob(const GraphJob&) = delete;
+ RawGraphJob(RawGraphJob&) = delete;
+ RawGraphJob& operator=(const RawGraphJob&) = delete;
+
+ private:
+ DLAInput dlaBuffers_;
+ vector<unique_ptr<BatchJob>> batchJobs_;
+ int instance_;
+ uint32_t debugLevel_;
+ unsigned int batchJobsRequested_;
+ std::mutex graphJobMutex;
+ RawGraphJob(const arch_params* archParams,
+ const CompiledResult* compiledResult,
+ size_t numPipelines,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled);
+};
+
+#endif
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml
new file mode 100644
index 0000000..2f2d24e
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml
@@ -0,0 +1,18 @@
+<ie>
+ <plugins>
+ <plugin name="GNA" location="libopenvino_intel_gna_plugin.so">
+ </plugin>
+ <plugin name="HETERO" location="libcoreDLAHeteroPlugin.so">
+ </plugin>
+ <plugin name="CPU" location="libopenvino_intel_cpu_plugin.so">
+ </plugin>
+ <plugin name="MULTI" location="libopenvino_auto_plugin.so">
+ </plugin>
+ <plugin name="GPU" location="libopenvino_intel_gpu_plugin.so">
+ </plugin>
+ <plugin name="MYRIAD" location="libopenvino_intel_myriad_plugin.so">
+ </plugin>
+ <plugin name="FPGA" location="libdla_aot_splitter_plugin.so">
+ </plugin>
+ </plugins>
+</ie>
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml
new file mode 100755
index 0000000..aeeedde
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml
@@ -0,0 +1,22 @@
+<ie>
+ <plugins>
+ <plugin name="AUTO" location="openvino_auto_plugin.dll">
+ </plugin>
+ <plugin name="BATCH" location="openvino_auto_batch_plugin.dll">
+ </plugin>
+ <plugin name="CPU" location="openvino_intel_cpu_plugin.dll">
+ </plugin>
+ <plugin name="GNA" location="openvino_intel_gna_plugin.dll">
+ </plugin>
+ <plugin name="GPU" location="openvino_intel_gpu_plugin.dll">
+ </plugin>
+ <plugin name="HETERO" location="coreDLAHeteroPlugin.dll">
+ </plugin>
+ <plugin name="MULTI" location="openvino_auto_plugin.dll">
+ </plugin>
+ <plugin name="MYRIAD" location="openvino_intel_myriad_plugin.dll">
+ </plugin>
+ <plugin name="FPGA" location="dla_aot_splitter_plugin.dll">
+ </plugin>
+ </plugins>
+</ie>
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg
new file mode 100644
index 0000000..3288819
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg
@@ -0,0 +1,4 @@
+filter=-build/header_guard,-runtime/explicit,-build/include_subdir,-runtime/references,-build/c++11,-runtime/int
+exclude_files=^(?!pe_array_sim.cpp).*\.cpp
+linelength=160
+headers=h,hpp
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp
new file mode 100644
index 0000000..4317201
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp
@@ -0,0 +1,117 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ This file contains some helper utilities to output coredla data blobs to files
+ in the current working directory
+*/
+
+#include "dla_aot_utils.h"
+
+// The resulting file is expected to be consumed by RTL testbench or hardware.
+static void writeBufferToBinFile(const uint8_t *buffer, uint32_t buffer_size,
+ const char *file_path) {
+ FILE *fp = fopen(file_path, "wb");
+ assert(nullptr != fp);
+
+ if (buffer_size && !fwrite(buffer, buffer_size, 1, fp))
+ {
+ std::cout << "ERROR writing to output file " << file_path << std::endl;
+ }
+
+ fclose(fp);
+}
+
+// The resulting file is expected to be consumed by RTL testbench or hardware.
+static void writeBufferToFile(const uint8_t *buffer, uint32_t buffer_size,
+ const char *file_path) {
+ FILE *fp = fopen(file_path, "w");
+ assert(nullptr != fp);
+
+ // Write buffer size (in bytes) to the first line
+ for (uint32_t b = 0; b < buffer_size; b+=4) {
+ if (b && ((b % 128) == 0))
+ {
+ fprintf(fp, "\n");
+ }
+ fprintf(fp, "0x%08x", *((uint32_t*)&buffer[b]));
+ if(b + 4 < buffer_size)
+ {
+ fprintf(fp, ",");
+ }
+ }
+
+ fclose(fp);
+}
+
+// Create all files that the splitter is responsible for
+void writeInputOutputToFiles (
+ const std::vector<int>& arch_hash,
+ const std::string& build_version,
+ const std::string& arch_name,
+ const DLAInput &input,
+ const DLAOutput &output
+) {
+ uint8_t arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE + ARCH_NAME_SIZE];
+
+ memset(&arch_build[0], 0, ARCH_HASH_SIZE + BUILD_VERSION_SIZE);
+ memcpy(&arch_build[0], arch_hash.data(), ARCH_HASH_SIZE);
+ memcpy(&arch_build[ARCH_HASH_SIZE], build_version.c_str(), std::min(build_version.length(),static_cast<size_t>(BUILD_VERSION_SIZE)));
+ memcpy(&arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE], arch_name.c_str(), std::min(arch_name.length(),static_cast<size_t>(ARCH_NAME_SIZE)));
+ writeBufferToFile(arch_build,
+ sizeof(arch_build),
+ "arch_build.mem");
+ writeBufferToFile(arch_build,
+ sizeof(arch_build),
+ "arch_build.bin");
+ const auto &config_fbs_buffer =
+ input.compiled_result->get_config_filter_bias_scale_array();
+
+ // Only dump filters and config memory file when they are saved in DDR
+ if (!input.compiled_result->get_ddrfree_header().enable_parameter_rom) {
+ writeBufferToFile(&(config_fbs_buffer[0][0]),
+ input.config_buffer_size,
+ "config.mem");
+ writeBufferToBinFile(&(config_fbs_buffer[0][0]),
+ input.config_buffer_size,
+ "config.bin");
+ writeBufferToFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size,
+ input.filter_bias_scale_buffer_size,
+ "filter.mem");
+ writeBufferToBinFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size,
+ input.filter_bias_scale_buffer_size,
+ "filter.bin");
+ } else {
+ std::cout << "Graph filters and DLA configs are not dumped because parameter ROM is enabled in the AOT file." << std::endl;
+ }
+ uint8_t* input_buffer = nullptr;
+ size_t input_size = 0;
+ if (input.input_feature_buffer) {
+ input_buffer = input.input_feature_buffer;
+ input_size = input.input_feature_buffer_size;
+ }
+ writeBufferToFile(input_buffer,
+ input_size,
+ "input.mem");
+ writeBufferToBinFile(input_buffer,
+ input_size,
+ "input.bin");
+ uint32_t inter_size = input.intermediate_feature_buffer_size;
+ writeBufferToFile((const uint8_t*)&inter_size,
+ sizeof(inter_size),
+ "inter_size.mem");
+ uint32_t output_size = input.output_feature_buffer_size;
+ writeBufferToFile((const uint8_t*)&output_size,
+ sizeof(output_size),
+ "output_size.mem");
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp
new file mode 100644
index 0000000..23247d5
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp
@@ -0,0 +1,68 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+ used by dliaPlugin to mimic a inference flow without actually providing a
+ inference. It is used to get the transformed input performed by the dliaPlugin
+ upper layers
+*/
+
+#include "raw_batch_job.h"
+#include "dla_aot_utils.h"
+
+unique_ptr<BatchJob> RawBatchJob::MakeUnique(const CompiledResult * compiledResult,
+ DLAInput* dlaBuffers,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled) {
+ return unique_ptr<BatchJob>(new RawBatchJob(compiledResult, dlaBuffers, instance, debugLevel, AES_key, IV_key, encryption_enabled));
+}
+
+RawBatchJob::RawBatchJob(const CompiledResult * compiledResult,
+ DLAInput* dlaBuffers,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled) : compiledResult(compiledResult) {
+ dlaBuffers_ = dlaBuffers;
+ instance_ = instance;
+ debugLevel_= debugLevel;
+ AES_key_ = AES_key;
+ IV_key_ = IV_key;
+ encryption_enabled_ = encryption_enabled;
+ output_.output_feature_buffer = new uint8_t[dlaBuffers_->output_feature_buffer_size];
+ memset(output_.output_feature_buffer, 0, dlaBuffers_->output_feature_buffer_size);
+ assert(nullptr != output_.output_feature_buffer);
+}
+
+// Emulation device has no DDR. This function is just storing a pointer to the array
+// Note: inputAray should not be deleted until the end of the Emulation runs
+// i.e. StartDla completes
+void RawBatchJob::LoadInputFeatureToDDR(void* inputArray) {
+ dlaBuffers_->input_feature_buffer = (uint8_t*) inputArray;
+ StartDla();
+}
+
+void RawBatchJob::StartDla() {
+ // Write input / output buffers to files
+ writeInputOutputToFiles(compiledResult->get_arch_hash(), compiledResult->get_build_version_string(), compiledResult->get_arch_name(), *dlaBuffers_, output_);
+}
+
+// Emulation device has no DDR. Output is copied into the outputArray.
+void RawBatchJob::ReadOutputFeatureFromDDR(void* outputArray) const {
+ memcpy(outputArray, output_.output_feature_buffer, dlaBuffers_->output_feature_buffer_size);
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp
new file mode 100644
index 0000000..0b8e838
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp
@@ -0,0 +1,67 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+ used by dliaPlugin to mimic a inference flow without actually providing a
+ inference. It is used to get the transformed input performed by the dliaPlugin
+ upper layers
+*/
+
+#include "raw_device.h"
+#include "raw_graph_job.h"
+unique_ptr<Device> Device::MakeUnique(const arch_params* archParams,
+ uint32_t waitForDlaTimeoutSeconds) {
+ return unique_ptr<Device>(new RawDevice(archParams));
+}
+
+RawDevice::RawDevice(const arch_params* archParams) {
+ numInstances_ = 1;
+ archParams_ = archParams;
+}
+
+GraphJob* RawDevice::CreateGraphJob(const CompiledResult * compiledResult,
+ size_t numPipelines,
+ int instance,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled,
+ const std::string export_dir,
+ const std::string parameter_rom_export_dir)
+{
+ (void) export_dir; // unused in HW runtime. CoreDLA utilizes base pointers, which the SW reference utilizes this variable. We void it here.
+ (void) parameter_rom_export_dir;
+ assert(instance < numInstances_);
+ allGraphJobs_.push_back(move(RawGraphJob::MakeUnique(archParams_, compiledResult, numPipelines, instance, 0,
+ AES_key, IV_key, encryption_enabled)));
+ return (allGraphJobs_.back()).get();
+}
+
+void RawDevice::WaitForDla(int instance, size_t threadId/* = 0 */, std::function<bool()> isCancelled) {
+ //RawDevice does not do any real work. No need to wait
+}
+
+int RawDevice::GetNumInferencesCompleted(int instance) const {
+ std::cout << "This function, GetNumInferencesCompleted, is not implemented for raw device" << std::endl;
+ return 0;
+}
+
+double RawDevice::GetActiveHWTimeMs(int instance) const {
+ std::cout << "This function, GetActiveHWTimeMs, is not implemented for raw device" << std::endl;
+ return 0;
+}
+
+double RawDevice::GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const {
+ std::cout << "This function, GetAvgHWTimePerJobMs, is not implemented for raw device" << std::endl;
+ return 0;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp
new file mode 100644
index 0000000..c698110
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp
@@ -0,0 +1,89 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+ used by dliaPlugin to mimic a inference flow without actually providing a
+ inference. It is used to get the transformed input performed by the dliaPlugin
+ upper layers
+*/
+
+#include "raw_graph_job.h"
+#include "dla_aot_utils.h"
+#include <fstream>
+#include "dla_defines.h"
+
+unique_ptr<GraphJob> RawGraphJob::MakeUnique(const arch_params* archParams,
+ const CompiledResult * compiledResult,
+ size_t numPipelines,
+ int instance,
+ uint32_t debugLevel = 0,
+ std::string AES_key = "",
+ std::string IV_key = "",
+ bool encryption_enabled = false)
+{
+ return unique_ptr<GraphJob>(new RawGraphJob(archParams, compiledResult, numPipelines, instance, debugLevel, AES_key, IV_key, encryption_enabled));
+}
+
+RawGraphJob::RawGraphJob(const arch_params* archParams,
+ const CompiledResult * compiledResult,
+ size_t numPipelines,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled)
+{
+ assert(numPipelines);
+ instance_ = instance;
+ debugLevel_ = debugLevel;
+ batchJobsRequested_ = 0;
+ // input feature buffer size
+ // TODO: support multi-input graph
+ dlaBuffers_.input_feature_buffer_size =
+ compiledResult->get_conv_input_size_in_bytes();
+ // input feature buffer to be allocated outside this routine
+
+ // output buffer size
+ dlaBuffers_.output_feature_buffer_size =
+ compiledResult->get_conv_output_size_in_bytes();
+
+ // intermediate buffer size
+ dlaBuffers_.intermediate_feature_buffer_size =
+ compiledResult->get_conv_intermediate_size_in_bytes();
+
+ // config and filter buffer size
+ size_t num_config_words = compiledResult->get_num_config_words();
+ dlaBuffers_.config_buffer_size = num_config_words * CONFIG_WORD_SIZE;
+ dlaBuffers_.filter_bias_scale_buffer_size =
+ compiledResult->get_total_filter_bias_scale_buffer_size();
+ // store a pointer to CompiledResult to use config and filter buffer directly without copying
+ dlaBuffers_.compiled_result = compiledResult;
+ for(size_t i = 0; i < numPipelines; i++) {
+ batchJobs_.push_back(move(RawBatchJob::MakeUnique(compiledResult, &dlaBuffers_, instance_, debugLevel_, AES_key, IV_key, encryption_enabled)));
+ }
+
+ dlaBuffers_.input_feature_buffer = NULL;
+}
+
+BatchJob* RawGraphJob::GetBatchJob() {
+ graphJobMutex.lock();
+ if(batchJobsRequested_ >= batchJobs_.size()) {
+ graphJobMutex.unlock();
+ return nullptr;
+ }
+ auto * batchJob = batchJobs_[batchJobsRequested_].get();
+ batchJobsRequested_++;
+ graphJobMutex.unlock();
+ return batchJob;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/inc/dla_aot_splitter.hpp b/python/openvino/runtime/dla_aot_splitter/inc/dla_aot_splitter.hpp
new file mode 100644
index 0000000..44448e8
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/inc/dla_aot_splitter.hpp
@@ -0,0 +1,130 @@
+// Copyright 2022-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#pragma once
+
+#include <gflags/gflags.h>
+#include <iostream>
+#include <string>
+#include <vector>
+
+/// @brief message for help argument
+static const char help_message[] = "Print a usage message";
+
+/// @brief message for images argument
+static const char input_message[] =
+ "Optional. Path to a folder with images and/or binaries or to specific image or binary file.";
+
+/// @brief message for compiled model argument
+static const char compiled_model_message[] = "Optional. Path to a .bin file with a trained compiled model";
+
+// @brief message for the custom plugins.xml file option
+static const char plugins_message[] = "Optional. Select a custom plugins to use.";
+
+// @brief message folding_option flag
+static const char folding_option_message[] = "Optional. Set the folding options for dla compiler: options 0-3.";
+
+// @brief message fold_preprocessing flag
+static const char fold_preprocessing_message[] = "Optional. Enable fold preprocessing option for dla compiler.";
+
+// @brief message bgr flag
+static const char bgr_message[] = "Optional. Indicate images are in bgr format.";
+
+// @brief message encryption_key flag
+static const char encryption_key_message[] =
+ "Optional. Encryption key (using hexidecimal characters, 16 bytes- 32 hexidecimal char).";
+
+// @brief message encryption_iv flag
+static const char encryption_iv_message[] =
+ "Optional. Initialization vector for encryption. (8 bytes - 16 hexidecimal char)";
+
+// @brief message binary flag
+static const char bin_data_message[] =
+ "Optional. Specify that the input should be read as binary data (otherwise, if input tensor has depth 1, or 3 it "
+ "will default to U8 image processing).";
+
+/// @brief message resize flag
+static const char input_image_resize_message[] =
+ "Optional. Input image resizing methods when the input image width and height do not match the desired "
+ "input width and height of the model. resize: Resizing the input image to the model input size; "
+ "pad_resize: Pad the input image with black pixels (i.e., 0) into a squared image and "
+ "resize the padded image to model input size.";
+
+/// @brief message enable early-access features flag
+static const char enable_early_access_message[] =
+ "Optional. Enables early access (EA) features of FPGA AI Suite. These are features that are actively being "
+ "developed and have not yet met production quality standards. These features may have flaws. "
+ "Consult the FPGA AI Suite documentation for details.";
+
+/// @brief Define flag for showing help message <br>
+DEFINE_bool(h, false, help_message);
+
+/// @brief Declare flag for showing help message <br>
+DECLARE_bool(help);
+
+/// @brief Define parameter for set image file <br>
+/// i or mif is a required parameter
+DEFINE_string(i, "", input_message);
+
+/// @brief Define parameter for compiled model file <br>
+/// It is not a required parameter
+DEFINE_string(cm, "", compiled_model_message);
+
+/// @brief Path to a plugins_xml file
+DEFINE_string(plugins, "", plugins_message);
+
+/// @brief Define flag whether the image is in bgr format
+DEFINE_bool(bgr, false, bgr_message);
+
+/// Select folding options; 0,1,2,3
+DEFINE_int32(folding_option, 1, folding_option_message);
+
+/// @brief Define flag for enabling folding preprocessing
+DEFINE_bool(fold_preprocessing, false, fold_preprocessing_message);
+
+/// @brief encryption key
+DEFINE_string(encryption_key, "", encryption_key_message);
+
+/// @brief initialization vector
+DEFINE_string(encryption_iv, "", encryption_iv_message);
+
+/// @brief Specify that the inputs should be read as binary.
+DEFINE_bool(bin_data, false, bin_data_message);
+
+/// @brief Define flag for using input image resize <br>
+DEFINE_string(resize_type, "", input_image_resize_message);
+
+/// @brief Enables early-access (EA) features of CoreDLA <br>
+DEFINE_bool(enable_early_access, false, enable_early_access_message);
+
+/**
+ * @brief This function show a help message
+ */
+static void showUsage() {
+ std::cout << std::endl;
+ std::cout << "aot_splitter [OPTION]" << std::endl;
+ std::cout << "Options:" << std::endl;
+ std::cout << std::endl;
+ std::cout << " -h, --help " << help_message << std::endl;
+ std::cout << " -i \"<path>\" " << input_message << std::endl;
+ std::cout << " -cm \"<path>\" " << compiled_model_message << std::endl;
+ std::cout << " -plugins " << plugins_message << std::endl;
+ std::cout << " -bgr " << bgr_message << std::endl;
+ std::cout << " -bin_data " << bin_data_message << std::endl;
+ std::cout << " -resize_type \"resize/pad_resize\" " << input_image_resize_message << std::endl;
+ std::cout << " -folding_option " << folding_option_message << std::endl;
+ std::cout << " -fold_preprocessing " << fold_preprocessing_message << std::endl;
+ std::cout << " -encryption_key " << encryption_key_message << std::endl;
+ std::cout << " -encryption_iv " << encryption_iv_message << std::endl;
+ std::cout << " -enable_early_access " << enable_early_access_message << std::endl;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/sdl.cmake b/python/openvino/runtime/dla_aot_splitter/sdl.cmake
new file mode 100644
index 0000000..3f8af7a
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/sdl.cmake
@@ -0,0 +1,96 @@
+
+####################################################################
+## SDL required compiler flags
+####################################################################
+# Needed for all builds
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wformat -Wformat-security")
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
+
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
+
+set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fPIE")
+set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fPIE")
+
+# Release build only
+set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -D_FORTIFY_SOURCE=2")
+if (GCC_VERSION VERSION_GREATER 4.9 OR GCC_VERSION VERSION_EQUAL 4.9)
+ set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fstack-protector-strong")
+ set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -z noexecstack -z relro -z now")
+
+ # These are for 8478-CT158 in the SDL process
+ # ( https://sdp-prod.intel.com/bunits/intel/coredla/coredla-ip-20212/tasks/phase/development/8478-CT158/ )
+else()
+ set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fstack-protector-all")
+endif()
+
+set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -D_FORTIFY_SOURCE=2")
+if (GCC_VERSION VERSION_GREATER 4.9 OR GCC_VERSION VERSION_EQUAL 4.9)
+ set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fstack-protector-strong")
+ set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -z noexecstack -z relro -z now")
+else()
+ set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fstack-protector-all")
+endif()
+
+# These are for 8478-CT158 in the SDL process
+# ( https://sdp-prod.intel.com/bunits/intel/coredla/coredla-ip-20212/tasks/phase/development/8478-CT158/ )
+set (CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv")
+set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv")
+set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv")
+
+####################################################################
+
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
+
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -ggdb3")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb3")
+
+#### Sanitizer settings ####
+# Address
+set(CMAKE_C_FLAGS_ASAN "-O1 -g -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+set(CMAKE_CXX_FLAGS_ASAN "-O1 -g -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+
+# Memory
+set(CMAKE_C_FLAGS_MSAN "-O1 -g -fsanitize=memory -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+set(CMAKE_CXX_FLAGS_MSAN "-O1 -g -fsanitize=memory -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+
+# Thread
+set(CMAKE_C_FLAGS_TSAN "-O1 -g -fsanitize=thread -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+set(CMAKE_CXX_FLAGS_TSAN "-O1 -g -fsanitize=thread -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+
+
+set (CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+# Enable all warnings except unknown-pragmas. Wunknown-pragmas must be excluded because
+# it is triggered by header file included from OpenCL runtime
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unknown-pragmas")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unknown-pragmas")
+
+# Make warnings errors to avoid having them in SDL report
+#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
+#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")
+
+# Should cleanup the signed and unsigned compares then remove this exception
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=sign-compare -Wno-error=unused-function -Wno-error=switch -Wno-error=unused-variable -Wno-error=unused-value -Wno-error=unused-but-set-variable -Wno-error=undef -Wno-error=return-type -Wno-error=reorder")
+
+# This is required on Ubuntu 18; the new linker behaviour transforms
+# RPATH into RUNPATH (which can be seen in the output of 'readelf -d').
+# However, RUNPATH does not work recursively, so when OpenVINO reads
+# the plugins.xml file and searches for the specified libcoreDlaRuntimePlugin.so
+# library, it fails. The --disable-new-dtags option causes the linker
+# to keep RPATH as RPATH (rather than morphing to RUNPATH).
+#
+# References:
+# https://stackoverflow.com/questions/52018092/how-to-set-rpath-and-runpath-with-gcc-ld
+# https://stackoverflow.com/questions/59248421/c-secondary-dependency-resolution-with-runpath
+#
+# The solution below seems preferable to setting LD_LIBRARY_PATH, if only barely.
+# For additional motivation, go ahead and throw away part of your day reading either
+# of the screeds:
+# http://xahlee.info/UnixResource_dir/_/ldpath.html
+# https://gms.tf/ld_library_path-considered-harmful.html
+# You may find that neither is fully convincing, of course.
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--disable-new-dtags")
diff --git a/python/openvino/runtime/dla_aot_splitter/src/main.cpp b/python/openvino/runtime/dla_aot_splitter/src/main.cpp
new file mode 100644
index 0000000..ffc098e
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/src/main.cpp
@@ -0,0 +1,475 @@
+// Copyright 2022-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#if defined(_WIN32) || defined(_WIN64)
+#else
+#include <dirent.h>
+#include <unistd.h>
+#endif
+
+#include <openvino/openvino.hpp>
+#include "samples/args_helper.hpp"
+#include "samples/common.hpp"
+#include "samples/slog.hpp"
+
+// #include "average_precision.hpp"
+#include "dla_aot_splitter.hpp"
+// #include "infer_request_wrap.hpp"
+#include "dla_plugin_config.hpp"
+#include "inputs_filling.hpp"
+#include "utils.hpp"
+
+using DebugNetworkData = std::map<std::string, uint64_t>;
+
+bool exists_test(const std::string& name) {
+ struct stat buffer;
+ return (stat(name.c_str(), &buffer) == 0);
+}
+
+// This function appears in dla_benchmark/main.cpp too.
+bool dir_open_test(const std::string& name) {
+#if (!defined(_WIN32) && !defined(_WIN64))
+ // If we can open the directory then return true
+ DIR* dp = opendir(name.c_str());
+ if (dp != nullptr) {
+ closedir(dp);
+ return true;
+ }
+#endif // !_WIN32 && !_WIN64
+ struct stat sb;
+ if (stat(name.c_str(), &sb) == 0) {
+ if ((sb.st_mode & S_IFMT) != S_IFREG) {
+ slog::err << "File " << name << " cannot be opened!" << slog::endl;
+ throw std::logic_error("File cannot be opened!");
+ }
+ }
+ return true;
+}
+
+// copy arguments into a new array to split the '-i=<arg>' into
+// two arguments (i.e. '-i' and '<arg>') to overcome a bug
+// parseInputFilesArguments function where is doesn't recognize
+// the -i=<arg> format
+void parseCommandLine(int argc, char** argv) {
+ int num_args = argc;
+ // allocated enough memory in case we needed to split the -i argument into two
+ char** arguments = new char*[num_args + 1];
+ for (int i = 0, j = 0; j < argc; ++i, ++j) {
+ if (strstr(argv[j], "-i=")) {
+ // number of arguments will increase by one after splitting
+ num_args++;
+ arguments[i] = new char[3];
+ strcpy(arguments[i++], "-i");
+ // copy the reset of the argument (i.e. post "-i=")
+ arguments[i] = new char[strlen(argv[j]) - 2];
+ strcpy(arguments[i], argv[j] + 3);
+ continue;
+ }
+ arguments[i] = new char[strlen(argv[j]) + 1];
+ strcpy(arguments[i], argv[j]);
+ }
+ // the parse function is modifying the arguments point so we need to keep
+ // a copy of the original pointer value to delete it properly
+ char** orig_arg_ptr = arguments;
+ gflags::ParseCommandLineNonHelpFlags(&num_args, &arguments, true);
+ // delete the allocated memory
+ for (int i = 0; i < num_args; ++i) {
+ delete[] orig_arg_ptr[i];
+ }
+ delete[] orig_arg_ptr;
+}
+
+bool ParseAndCheckCommandLine(int argc, char* argv[], size_t& netSize) {
+ // ---------------------------Parsing and validating input arguments--------------------------------------
+ slog::info << "Parsing input parameters" << slog::endl;
+
+ // Check for any flags that are missing their preceding dashes
+ // GFlags quietly ignores any flags missing their dashes, which can cause
+ // aot_splitter to run with settings other than what the user intended
+
+ // GFlags supports two different styles of flag:
+ // 1. --<flag>
+ // 2. -<flag>
+ // It also supports two different ways of specifying values for flags which
+ // take values:
+ // 1. --<flag>=<value>
+ // 2. --<flag> <value>
+
+ // If we are not expecting a flag, we are expecting a value for the
+ // preceding flag
+ bool expectingFlag = true;
+ // Start at 1 to skip the command itself
+ for (int i = 1; i < argc; i++) {
+ if (expectingFlag) {
+ // A flag is always denoted by the first char being '-'
+ if (argv[i][0] != '-') {
+ slog::err << "Argument " << argv[i] << " is invalid. You"
+ << " may have forgotten a preceding '-'." << slog::endl;
+ throw std::logic_error("One or more invalid arguments");
+ }
+
+ char* flagNameStart = (argv[i][1] == '-') ? &argv[i][2] : &argv[i][1];
+ std::string flagName;
+
+ gflags::CommandLineFlagInfo flagInfo;
+ if (strstr(flagNameStart, "=")) {
+ flagName = std::string(flagNameStart, size_t(strstr(flagNameStart, "=") - flagNameStart));
+ } else {
+ flagName = std::string(flagNameStart);
+ }
+
+ // We expect a flag in the next argv if the current flag is a bool,
+ // because bool flags do not take a value.
+ // If GetCommandLineFlagInfo returns false, we assume the current
+ // flag is a boolean because boolean flags can be specified as
+ // -no<flag>, which is equivalent to -<flag>=false, or the flag
+ // simply being omitted. However, "no<flag>" is not recognized by
+ // GetCommandLineFlagInfo.
+ // Therefore, if the name is not recognized either the flag is a
+ // boolean flag or doesn't exist. In the latter case, gflags errors
+ // when we call parseCommandLine so we can assume here it's a bool.
+ if (!GetCommandLineFlagInfo(flagName.c_str(), &flagInfo) || strstr(argv[i], "=") || flagInfo.type == "bool") {
+ expectingFlag = true;
+ } else {
+ expectingFlag = false;
+ }
+ } else {
+ // If we were expecting a value, doesn't matter what it is
+ // gflags will check all values are the correct type, and
+ // aot_splitter checks if the values received are sane
+ expectingFlag = true;
+ }
+ }
+
+ parseCommandLine(argc, argv);
+
+ if (FLAGS_help || FLAGS_h) {
+ showUsage();
+ // CoreDLA: Version 2020.3 of OpenVINO assumes that the PAC board with OPAE on it
+ // is an OpenCL/DLAv1 device. Since it is not, it then errors-out when the device
+ // does not response as expected to the OpenCL query.
+ // showAvailableDevices();
+ std::cout << "\n";
+ return false;
+ }
+
+ if (FLAGS_cm.empty()) {
+ throw std::logic_error("Model is required but not set. Please set -cm option.");
+ } else {
+ std::vector<std::string> m_paths = split(FLAGS_cm, MULTIGRAPH_SEP);
+ netSize = m_paths.size();
+ slog::info << "Found " << netSize << " compiled graph" << (netSize == 1 ? "" : "s") << slog::endl;
+ for (auto& m_path : m_paths) {
+ if (!exists_test(m_path)) {
+ slog::err << "compiled model file: " << FLAGS_cm << " doesn't exist. Please provide a valid path with -cm."
+ << slog::endl;
+ throw std::logic_error("Compiled model file path does not exist.");
+ }
+ }
+ }
+
+ if (!FLAGS_plugins.empty()) {
+ slog::info << "Using custom plugins xml file - " << FLAGS_plugins << slog::endl;
+ }
+
+ if (!exists_test(FLAGS_plugins)) {
+ slog::err << "plugins_xml file: " << FLAGS_plugins << " doesn't exist. Please provide a valid path." << slog::endl;
+ throw std::logic_error("plugins_xml file path does not exist.");
+ }
+
+ return true;
+}
+
+static void next_step(const std::string additional_info = "") {
+ static size_t step_id = 0;
+ static const std::map<size_t, std::string> step_names = {
+ {1, "Parsing and validating input arguments"},
+ {2, "Loading Inference Engine"},
+ {3, "Setting device configuration"},
+ {4, "Reading the Intermediate Representation network"},
+ {5, "Resizing network to match image sizes and given batch"},
+ {6, "Configuring input of the model"},
+ {7, "Loading the model to the device"},
+ {8, "Setting optimal runtime parameters"},
+ {9, "Creating infer requests and filling input blobs with images"},
+ {10, "Measuring performance"},
+ {11, "Dumping statistics report"},
+ {12, "Dumping the output values"}};
+
+ step_id++;
+ if (step_names.count(step_id) == 0) {
+ THROW_IE_EXCEPTION << "Step ID " << step_id << " is out of total steps number " << step_names.size();
+ }
+
+ std::cout << "[Step " << step_id << "/" << step_names.size() << "] " << step_names.at(step_id)
+ << (additional_info.empty() ? "" : " (" + additional_info + ")") << std::endl;
+}
+
+template <typename T>
+T getMedianValue(const std::vector<T>& vec) {
+ std::vector<T> sortedVec(vec);
+ std::sort(sortedVec.begin(), sortedVec.end());
+ return (sortedVec.size() % 2 != 0)
+ ? sortedVec[sortedVec.size() / 2ULL]
+ : (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
+}
+
+/**
+ * @brief The entry point of the dla benchmark
+ */
+int main(int argc, char* argv[]) {
+ try {
+ // Declaring the ExecutableNetwork object as a pointer to workaround the segfault
+ // that occurs when destructing the object. Now that it's declared as a pointer
+ // the complier won't automatically call the destructor of the object at the end
+ // of this scope and we won't delete the allocated memory either
+ std::vector<ov::CompiledModel*> exeNetworks;
+ size_t netSize = 0; // parse the size of networks for arguments check
+
+ size_t return_code = 0; // universal return code, return this value after dumping out Debug info
+
+ // ----------------- 1. Parsing and validating input arguments -------------------------------------------------
+ next_step();
+
+ if (!ParseAndCheckCommandLine(argc, argv, netSize)) {
+ return 0;
+ }
+
+ bool isNetworkCompiled = !FLAGS_cm.empty();
+ if (isNetworkCompiled) {
+ slog::info << "Network is compiled" << slog::endl;
+ }
+
+ // The set of arguments printed is meant to be a useful summary to the
+ // user, rather than all of the arguments to aot_splitter
+ slog::info << "Printing summary of arguments being used by aot_splitter" << slog::endl
+ << "Device (-d) .......................... "
+ << "HETERO:FPGA" << slog::endl
+ << "Compiled model (-cm) ................. " << FLAGS_cm << slog::endl
+ << "Input images directory (-i) .......... "
+ << (!FLAGS_i.empty() ? FLAGS_i : "Not specified, will use randomly-generated images") << slog::endl
+ << "Plugins file (-plugins) ..... " << FLAGS_plugins << slog::endl
+ << "Reverse input image channels (-bgr) .. " << (FLAGS_bgr ? "True" : "False") << slog::endl;
+
+ /** This vector stores paths to the processed images **/
+ auto multiInputFiles = VectorMap<std::vector<std::string>>(
+ SplitMultiInputFilesArguments(netSize), // get input directory list
+ [&](const std::vector<std::string>& inputArgs) mutable {
+ std::vector<std::string> files;
+ for (auto& inputArg : inputArgs) {
+ // Test if the path exists
+ if (!exists_test(inputArg)) {
+ slog::err << "Specified image path: " << inputArg << " does not exist" << slog::endl;
+ throw std::logic_error("Image path does not exist");
+ }
+ // Test whether the path can be opened if it's a directory
+ dir_open_test(inputArg);
+ readInputFilesArguments(files, inputArg);
+ }
+
+ return files;
+ });
+ if (multiInputFiles.size() == 0) {
+ // failed to read input files
+ slog::err << "Failed to read input files" << slog::endl;
+ return 1;
+ }
+
+ uint32_t num_batches = 1;
+
+ // ----------------- 2. Loading the Inference Engine -----------------------------------------------------------
+ next_step();
+
+ // Get optimal runtime parameters for device
+ std::string device_name = "HETERO:FPGA";
+ ov::Core core(FLAGS_plugins);
+
+ if (device_name.find("FPGA") != std::string::npos) {
+ if (FLAGS_encryption_key != "") {
+ core.set_property("FPGA", {{DLIAPlugin::properties::encryption_key.name(), FLAGS_encryption_key}});
+ }
+ if (FLAGS_encryption_iv != "") {
+ core.set_property("FPGA", {{DLIAPlugin::properties::encryption_iv.name(), FLAGS_encryption_iv}});
+ }
+ }
+
+ slog::info << "OpenVINO: " << ov::get_openvino_version() << slog::endl;
+
+ // ----------------- 3. Setting device configuration -----------------------------------------------------------
+ next_step();
+
+ size_t batchSize = 1;
+ std::vector<std::string> topology_names;
+ if (!isNetworkCompiled) {
+ } else {
+ next_step();
+ slog::info << "Skipping the step for compiled network" << slog::endl;
+ next_step();
+ slog::info << "Skipping the step for compiled network" << slog::endl;
+ next_step();
+ slog::info << "Skipping the step for compiled network" << slog::endl;
+ // ----------------- 7. Loading the model to the device --------------------------------------------------------
+ next_step();
+
+ int folding_option = 1;
+ bool fold_preprocessing = false;
+ bool enable_early_access = false;
+ if (FLAGS_folding_option) {
+ folding_option = FLAGS_folding_option;
+ }
+ if (FLAGS_fold_preprocessing) {
+ fold_preprocessing = FLAGS_fold_preprocessing;
+ }
+ if (FLAGS_enable_early_access) {
+ enable_early_access = FLAGS_enable_early_access;
+ }
+ core.set_property("FPGA", {{DLIAPlugin::properties::folding_option.name(), std::to_string(folding_option)}});
+ core.set_property("FPGA",
+ {{DLIAPlugin::properties::fold_preprocessing.name(), fold_preprocessing}});
+ core.set_property("FPGA",
+ {{DLIAPlugin::properties::enable_early_access.name(), enable_early_access}});
+
+ auto compiled_graph_paths = split(FLAGS_cm, MULTIGRAPH_SEP);
+ exeNetworks = vectorMapWithIndex<ov::CompiledModel*>(
+ split(FLAGS_cm, MULTIGRAPH_SEP), // get a list of compiled graphs
+ [&](const std::string& compiled_graph_path, size_t index) {
+ std::stringstream generated_name;
+ generated_name << "Graph_" << index;
+ slog::info << "Importing model from " << compiled_graph_paths[index] << " to " << device_name << " as "
+ << generated_name.str() << slog::endl;
+ std::filebuf objFileBuf;
+ objFileBuf.open(compiled_graph_paths[index].c_str(), std::ios::in | std::ios::binary);
+ std::istream objIstream(&objFileBuf);
+ auto exeNetwork = new ov::CompiledModel();
+ *exeNetwork = core.import_model(objIstream, device_name, {});
+ topology_names.push_back(generated_name.str());
+ objFileBuf.close();
+ printInputAndOutputsInfoShort(*exeNetwork);
+ if (batchSize == 0) {
+ batchSize = 1;
+ }
+ const auto& inputs = exeNetwork->inputs();
+ for (const auto& item : inputs) {
+ auto& dims = item.get_shape();
+ if (dims[0] != batchSize) {
+ slog::err << "Batch size of the compiled model is " << dims[0] << " and batch size provided is "
+ << batchSize << slog::endl;
+ std::cout << "Set the same batch size = " << dims[0] << " when running the app" << std::endl;
+ std::cout << "Or recompile model with batch size = " << batchSize << std::endl;
+ exit(5);
+ }
+ }
+ return exeNetwork;
+ });
+ }
+ // ----------------- 8. Setting optimal runtime parameters -----------------------------------------------------
+ next_step();
+
+ // Number of requests
+ uint32_t nireq = 1;
+ if (nireq == 0) {
+ nireq = 1;
+ }
+ int niter = 1;
+
+ if (niter > 0) {
+ num_batches = niter;
+ }
+
+ // ----------------- 9. Creating infer requests and filling input blobs ----------------------------------------
+ next_step();
+ std::vector<dla_benchmark::InputsInfo> inputInfos;
+ // Data structure hierarchy
+ // Outermost vec: which model it corresponds to (multigraph)
+ // Map: input/output name and its corresponding TensorVector
+ // TensorVector: An alias for vector<ov::tensor> where each vector element correspond to the batch
+ std::vector<std::map<std::string, ov::TensorVector>> inputsData;
+ std::vector<std::map<std::string, ov::TensorVector>> outputTensors(exeNetworks.size());
+
+ std::vector<std::unique_ptr<InferRequestsQueue>> inferRequestsQueues;
+ const std::string resize_type = FLAGS_resize_type.empty() ? "resize" : FLAGS_resize_type;
+ for (size_t netIdx = 0; netIdx < exeNetworks.size(); netIdx++) {
+ // Handle the case that use same inputs for all networks
+ const auto& inputFiles = netIdx >= multiInputFiles.size() ? multiInputFiles.back() : multiInputFiles[netIdx];
+ inputInfos.push_back(GetInputsInfo(batchSize, exeNetworks[netIdx]->inputs(), FLAGS_bin_data));
+ inputsData.push_back(GetStaticTensors(inputFiles.empty() ? std::vector<std::string>{} : inputFiles,
+ batchSize,
+ inputInfos[netIdx],
+ num_batches,
+ resize_type,
+ FLAGS_bgr,
+ FLAGS_bin_data,
+ false /* verbose outputs not supported for aot splitter */));
+ // Use unique_ptr to create InferRequestsQueue objects and avoid copying mutex and cv
+ inferRequestsQueues.push_back(
+ std::move(std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(*(exeNetworks[netIdx]), nireq))));
+ }
+
+ /** Start inference & calculate performance **/
+ /** to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
+ std::vector<size_t> iterations(exeNetworks.size(), 0);
+
+ try {
+ {
+ // set up all infer request and prep all i/o Blobs
+ for (size_t net_id = 0; net_id < exeNetworks.size(); net_id++) {
+ for (size_t iireq = 0; iireq < nireq; iireq++) {
+ auto inferRequest = inferRequestsQueues.at(net_id)->get_idle_request();
+ if (!inferRequest) {
+ THROW_IE_EXCEPTION << "No idle Infer Requests!";
+ }
+
+ if (niter != 0LL) {
+ const auto& outputs = exeNetworks[net_id]->outputs();
+ for (const auto& output : outputs) {
+ const std::string& name = output.get_any_name();
+ outputTensors.at(net_id)[name].emplace_back(output.get_element_type(), output.get_shape());
+ inferRequest->set_tensor(name, outputTensors.at(net_id).at(name).at(iterations.at(net_id)));
+ }
+ const auto& inputs = exeNetworks[net_id]->inputs();
+ for (auto& input : inputs) {
+ const std::string& inputName = input.get_any_name();
+ const auto& data = inputsData.at(net_id).at(inputName)[iterations.at(net_id)];
+ inferRequest->set_tensor(inputName, data);
+ }
+ }
+
+ {
+ std::cout << "Generating Artifacts" << std::endl;
+ inferRequest->infer();
+ }
+ }
+ }
+ }
+ } catch (const std::exception& ex) {
+ std::cerr << ex.what() << std::endl;
+ slog::err << "Generation failed" << slog::endl;
+ return_code = 1;
+ }
+
+ if (return_code) return return_code;
+ } catch (const std::exception& ex) {
+ slog::err << ex.what() << slog::endl;
+ return 3;
+ }
+
+ return 0;
+}