completed thesisHEAD master

author: Eric Dao <eric@erickhangdao.com> 2025-03-10 17:54:31 -0400
committer: Eric Dao <eric@erickhangdao.com> 2025-03-10 17:54:31 -0400
commit: ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch)
tree: a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/runtime/dla_aot_splitter
parent: 40da1752f2c8639186b72f6838aa415e854d0b1d (diff)
download: thesis-master.tar.gz
thesis-master.tar.bz2
thesis-master.zip
21 files changed, 2046 insertions, 0 deletions
diff --git a/python/openvino/runtime/dla_aot_splitter/CMakeLists.txt b/python/openvino/runtime/dla_aot_splitter/CMakeLists.txt
new file mode 100644
index 0000000..0e1e4f8
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/CMakeLists.txt
@@ -0,0 +1,71 @@
+cmake_minimum_required(VERSION 3.10)
+
+# Use <package>_ROOT variables to help find_package locate packages
+if (POLICY CMP0074)
+  cmake_policy(SET CMP0074 NEW)
+endif()
+
+find_package(OpenCV COMPONENTS core highgui imgcodecs imgproc videoio REQUIRED)
+find_package(gflags COMPONENTS shared REQUIRED)
+
+add_subdirectory(dla_aot_splitter_plugin)
+add_subdirectory(dla_aot_splitter_example)
+
+if (DE10_AGILEX)
+  add_library(de10_agilex ALIAS de10_agilex_mmd)
+elseif (SYSTEM_CONSOLE_PLATFORM)
+  # DO NOTHING
+elseif (PAC_A10)
+  add_library(dcp_a10_pac ALIAS intel_opae_mmd)
+elseif(AGX7_IDK)
+  add_library(agx7_i_dk ALIAS intel_opae_mmd)
+elseif(AGX7_N6001)
+  add_library(agx7_n6001 ALIAS intel_opae_mmd)
+endif()
+
+add_executable(dla_aot_splitter ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp)
+
+target_compile_features(dla_aot_splitter PUBLIC cxx_std_11)
+
+target_sources(dla_aot_splitter PRIVATE
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_splitter.hpp
+  $ENV{COREDLA_ROOT}/runtime/dla_benchmark/inputs_filling.cpp #TODO REMOVE and replace with link library
+  $ENV{COREDLA_ROOT}/runtime/dla_benchmark/utils.cpp #TODO REMOVE and replace with link library
+  $ENV{COREDLA_ROOT}/runtime/common/utils/src/slog.cpp
+  $ENV{COREDLA_ROOT}/runtime/common/utils/src/args_helper.cpp
+  $ENV{COREDLA_ROOT}/runtime/common/utils/src/common.cpp
+  $ENV{COREDLA_ROOT}/runtime/common/utils/src/latency_metrics.cpp
+)
+
+target_include_directories(dla_aot_splitter PRIVATE
+  ${CMAKE_CURRENT_SOURCE_DIR}/inc
+  $ENV{COREDLA_ROOT}/util/inc
+  $ENV{COREDLA_ROOT}/dla_plugin/inc
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia
+  $ENV{COREDLA_ROOT}/runtime/dla_benchmark #TODO REMOVE and replace with link library
+)
+
+if (WIN32)
+  target_include_directories(dla_aot_splitter PRIVATE
+    $ENV{COREDLA_ROOT}/compiler/inc # dla_performance_estimator.h
+  )
+endif()
+
+
+target_link_libraries(dla_aot_splitter PRIVATE
+  openvino::runtime
+  openvino_dev_api
+  format_reader
+  ie_samples_utils
+  ${OpenCV_LIBRARIES} # Needed for the directly compiled inputs_filling
+  dla_aot_splitter_plugin
+  gflags
+)
+
+if (NOT WIN32)
+  target_link_libraries(dla_aot_splitter PRIVATE
+    ${LIB_DL}
+    pthread
+  )
+endif()
diff --git a/python/openvino/runtime/dla_aot_splitter/CPPLINT.cfg b/python/openvino/runtime/dla_aot_splitter/CPPLINT.cfg
new file mode 100644
index 0000000..4bdae97
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/CPPLINT.cfg
@@ -0,0 +1,8 @@
+set noparent
+filter=-build/header_guard,-runtime/explicit,-build/include_subdir,-runtime/references,-build/c++11,-runtime/int,-runtime/string,-runtime/printf,-build/namespaces,-readability/todo,-readability/casting
+
+# Exlude Example code
+exclude_files=dla_aot_splitter_example
+
+linelength=160
+headers=h,hpp
diff --git a/python/openvino/runtime/dla_aot_splitter/README.md b/python/openvino/runtime/dla_aot_splitter/README.md
new file mode 100644
index 0000000..ffefe0d
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/README.md
@@ -0,0 +1,52 @@
+# Intel AI Suite Core DLA 'AoT Splitter'
+
+This tool is intended to split a compiled HETERO:FPGA OpenVINO model into Input memory, Config memory, and Filter memory data blobs that would normally exist in the DDR memory of a runtime CoreDLA IP. These blobs can be used to directly run an inference on the IP without using OpenVINO InferenceEngine.
+
+# How to Build the Splitter, Plugin, and Example
+
+First, follow all instructions to install CoreDLA compiler development environment
+
+Change directory to the dla runtime folder
+
+```
+sh build_runtime.sh -target_de10_agilex
+```
+
+# How to Run the Splitter Executable
+
+The executable outputs the memory blobs to the current working directory. Change directory to the location where you want the outputs to be generated
+
+```
+cd directory_where_you_want_output
+
+runtime/build_Release/dla_aot_splitter/dla_aot_splitter -cm compiled_hetero_fpga_model.bin -i path/to/image.bmp -bgr -plugins runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml
+```
+
+Ensure that the libdla_aot_splitter.so, libcoreDLAHeteroPlugin.so and other shared libraries are available to the utility.
+
+The tool outputs the following artifacts:
+ - arch_build.mem / arch_build.bin
+ - config.mem / config.bin
+ - filter.mem /filter.bin
+ - input.mem / input.bin
+ - inter_size.mem
+ - output_size.mem
+
+# Building the Example Inference Program
+
+The example inference program with static input,config,filter data is compiled with the following environment variables
+and option to build_runtime.sh
+
+## DE10 Agilex
+```
+export AOT_SPLITTER_EXAMPLE_MODEL=<path/to/model.xml>
+export AOT_SPLITTER_EXAMPLE_INPUT=<path/to/image.bmp>
+sh build_runtime.sh -aot_splitter_example -target_de10_agilex
+```
+
+This program directly embeds the input, config and filter data into the resulting exectuable file for direct use.
+
+## PCIE
+
+The emulation inference program uses the PCIE MMD driver from the example design to connect to and provision the IP.
+Your system may require a different driver to provision the IP
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/CMakeLists.txt b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/CMakeLists.txt
new file mode 100644
index 0000000..a6f2ce8
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/CMakeLists.txt
@@ -0,0 +1,209 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+add_executable(dla_aot_splitter_example EXCLUDE_FROM_ALL src/main.cpp)
+
+target_compile_features(dla_aot_splitter_example PUBLIC cxx_std_11)
+
+target_compile_definitions(dla_aot_splitter_example PRIVATE DLA_MMD)
+
+file(GLOB SOURCES
+  # coredla_device
+  $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/device_memory_allocator.h
+  $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/dla_dma_constants.h
+  $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/mmd_wrapper.h
+  $ENV{COREDLA_ROOT}/runtime/coredla_device/src/device_memory_allocator.cpp
+  #
+  src/main.cpp
+)
+if (SYSTEM_CONSOLE_PLATFORM)
+  list(APPEND SOURCES ${CMAKE_SOURCE_DIR}/coredla_device/mmd/system_console/mmd_wrapper.cpp)
+else ()
+  list(APPEND SOURCES $ENV{COREDLA_ROOT}/runtime/coredla_device/src/mmd_wrapper.cpp)
+endif ()
+
+target_sources (dla_aot_splitter_example PRIVATE ${SOURCES})
+
+if (DISABLE_JIT)
+# for dla_dma_constants.svh
+  if (EXISTS $ENV{COREDLA_ROOT}/inc)
+    target_include_directories(dla_aot_splitter_example PRIVATE $ENV{COREDLA_ROOT}/inc)
+  else()
+    target_include_directories(dla_aot_splitter_example PRIVATE $ENV{COREDLA_ROOT}/build/coredla/dla/inc)
+  endif()
+endif()
+
+target_link_libraries(dla_aot_splitter_example PRIVATE
+  pthread
+)
+
+if (DISABLE_JIT)
+  target_include_directories(dla_aot_splitter_example PRIVATE
+    $ENV{COREDLA_ROOT}/util/inc
+    $ENV{COREDLA_XUTIL_DIR}/compiled_result/inc
+  )
+  target_sources(dla_aot_splitter_example PRIVATE $ENV{COREDLA_XUTIL_DIR}/compiled_result/src/compiled_result_reader_writer.cpp)
+else()
+  target_link_libraries(dla_aot_splitter_example
+    PRIVATE
+      dla_compiled_result
+  )
+endif()
+
+if (DE10_AGILEX)
+  target_link_libraries(dla_aot_splitter_example PRIVATE de10_agilex)
+elseif(PAC_A10)
+  target_link_libraries(dla_aot_splitter_example PRIVATE dcp_a10_pac)
+elseif(AGX7_IDK)
+  target_link_libraries(dla_aot_splitter_example PRIVATE agx7_i_dk)
+  find_library(libjson-c_LIBRARIES
+    NAMES json-c
+    PATHS ${LIBOPAE-C_ROOT}/lib
+    ${LIBOPAE-C_ROOT}/lib64
+    /usr/local/lib
+    /usr/lib
+    /lib
+    /usr/lib/x86_64-linux-gnu
+    ${CMAKE_EXTRA_LIBS})
+  target_link_libraries(dla_aot_splitter_example PRIVATE ${libjson-c_LIBRARIES})
+elseif(AGX7_N6001)
+  target_link_libraries(dla_aot_splitter_example PRIVATE agx7_n6001)
+  find_library(libjson-c_LIBRARIES
+    NAMES json-c
+    PATHS ${LIBOPAE-C_ROOT}/lib
+    ${LIBOPAE-C_ROOT}/lib64
+    /usr/local/lib
+    /usr/lib
+    /lib
+    /usr/lib/x86_64-linux-gnu
+    ${CMAKE_EXTRA_LIBS})
+  target_link_libraries(dla_aot_splitter_example PRIVATE ${libjson-c_LIBRARIES})
+elseif(SYSTEM_CONSOLE_PLATFORM)
+  # Agilex 5 JTAG ED: do nothing
+elseif(REFERENCE)
+  # Reference: do nothing
+else()
+  message(FATAL_ERROR "Building DLA AOT Aplitter Example with unsupported platform")
+endif()
+
+target_include_directories(dla_aot_splitter_example PRIVATE
+  $ENV{COREDLA_ROOT}/runtime/coredla_device/inc
+  if(PAC_A10)
+    $ENV{COREDLA_ROOT}/runtime/coredla_device/mmd/dcp_a10_pac/host
+  endif()
+)
+
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include)
+
+target_sources (dla_aot_splitter_example PRIVATE
+  ${CMAKE_CURRENT_BINARY_DIR}/include/arch_build.mem
+  ${CMAKE_CURRENT_BINARY_DIR}/include/config.mem
+  ${CMAKE_CURRENT_BINARY_DIR}/include/filter.mem
+  ${CMAKE_CURRENT_BINARY_DIR}/include/input.mem
+  ${CMAKE_CURRENT_BINARY_DIR}/include/inter_size.mem
+  ${CMAKE_CURRENT_BINARY_DIR}/include/output_size.mem
+)
+target_include_directories(dla_aot_splitter_example PRIVATE
+  ${CMAKE_CURRENT_BINARY_DIR}/include
+)
+
+if (DEFINED ENV{AOT_SPLITTER_EXAMPLE_MODEL})
+  set (AOT_SPLITTER_EXAMPLE_MODEL $ENV{AOT_SPLITTER_EXAMPLE_MODEL})
+else()
+  if (EXISTS $ENV{COREDLA_WORK}/demo/models/public/resnet-50-tf/FP32/resnet-50-tf.xml)
+    set (AOT_SPLITTER_EXAMPLE_MODEL $ENV{COREDLA_WORK}/demo/models/public/resnet-50-tf/FP32/resnet-50-tf.xml)
+  else()
+    # The path below is for Intel internal use only
+    if (EXISTS /p/psg/swip/dla/caffe/caffe_reference/ngraph_ir/coredla/ModelZoo/2021_4_1/resnet_50_tf/FP32/resnet-50-tf.xml)
+      set (AOT_SPLITTER_EXAMPLE_MODEL /p/psg/swip/dla/caffe/caffe_reference/ngraph_ir/coredla/ModelZoo/2021_4_1/resnet_50_tf/FP32/resnet-50-tf.xml)
+    endif()
+  endif()
+endif()
+
+if (DEFINED ENV{AOT_SPLITTER_EXAMPLE_INPUT})
+  set (AOT_SPLITTER_EXAMPLE_INPUT $ENV{AOT_SPLITTER_EXAMPLE_INPUT})
+else()
+  if (EXISTS $ENV{COREDLA_ROOT}/demo/sample_images/val_00000000.bmp)
+    set (AOT_SPLITTER_EXAMPLE_INPUT $ENV{COREDLA_ROOT}/demo/sample_images/val_00000000.bmp)
+  else()
+    # The path below is for Intel internal use only
+    if (EXISTS /p/psg/swip/dla/images/imagenet/ILSVRC2012_224x224/BMP/BMP/ILSVRC2012_val_00000000.bmp)
+      set (AOT_SPLITTER_EXAMPLE_INPUT /p/psg/swip/dla/images/imagenet/ILSVRC2012_224x224/BMP/BMP/ILSVRC2012_val_00000000.bmp)
+    endif()
+  endif()
+endif()
+
+if (EXISTS ${CoreDLA_DIR}/../bin)
+  set(COREDLA_BIN ${CoreDLA_DIR}/../bin)
+  set(COREDLA_LIB ${CoreDLA_DIR}/../lib)
+  set(COREDLA_EXARCH ${CoreDLA_DIR}/../example_architectures)
+  if(DE10_AGILEX OR AGX7_IDK OR AGX7_N6001)
+    set (AOT_SPLITTER_EXAMPLE_ARCH AGX7_Performance.arch)
+  elseif(SYSTEM_CONSOLE_PLATFORM)
+    set (AOT_SPLITTER_EXAMPLE_ARCH AGX5_Small_Softmax.arch)
+  else()
+    set (AOT_SPLITTER_EXAMPLE_ARCH A10_Performance.arch)
+  endif()
+else()
+  set(COREDLA_BIN $ENV{COREDLA_ROOT}/build/coredla/dla/bin)
+  set(COREDLA_LIB $ENV{COREDLA_ROOT}/build/coredla/dla/lib)
+  set(COREDLA_EXARCH $ENV{COREDLA_ROOT}/example_architectures)
+
+  # The paths below are for Intel internal use only
+  if(DE10_AGILEX)
+    set (AOT_SPLITTER_EXAMPLE_ARCH arch/descriptions/AGX7/64x32_i5x1_fp13agx_sb31744_xbark32_clamp_preluk32_poolk4_softmax_1inst.arch)
+  elseif(AGX7_IDK OR AGX7_N6001)
+    set (AOT_SPLITTER_EXAMPLE_ARCH arch/descriptions/AGX7/32x64_i5x1_fp13agx_sb32768_poolk4_actk32_prelu_rclamp_sig_softmaxk1.arch)
+  elseif(SYSTEM_CONSOLE_PLATFORM)
+    set (AOT_SPLITTER_EXAMPLE_ARCH 16x16_i12x1_fp12agx_sb8192_poolk4_actk16_clamp_softmaxk1.arch)
+  else()
+    set (AOT_SPLITTER_EXAMPLE_ARCH arch/descriptions/A10/64x32_i4x1_fp11_sb31744_xbark32_clamp_preluk32_poolk4_softmax.arch)
+  endif()
+endif()
+
+if (NOT DEFINED AOT_SPLITTER_INPUT_ARGUMENTS)
+  set (AOT_SPLITTER_INPUT_ARGUMENTS )
+  if (DEFINED AOT_SPLITTER_EXAMPLE_INPUT)
+    set (AOT_SPLITTER_INPUT_ARGUMENTS -i ${AOT_SPLITTER_EXAMPLE_INPUT} -bgr)
+  endif()
+endif()
+
+# Need to copy the system console script for Agilex 5E JTAG ED
+# Also link against Boost
+if (SYSTEM_CONSOLE_PLATFORM)
+  find_package(Boost REQUIRED COMPONENTS filesystem)
+  target_link_libraries(dla_aot_splitter_example PRIVATE Boost::filesystem)
+  add_custom_command(
+    TARGET dla_aot_splitter_example POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy
+    ${CMAKE_SOURCE_DIR}/coredla_device/mmd/system_console/system_console_script.tcl
+    ${CMAKE_CURRENT_BINARY_DIR}/system_console_script.tcl
+  )
+  target_compile_definitions(dla_aot_splitter_example PRIVATE DLA_SYSCON_SOURCE_ROOT=${CMAKE_CURRENT_BINARY_DIR})
+endif()
+
+add_custom_command(
+  OUTPUT
+    ${CMAKE_CURRENT_BINARY_DIR}/include/arch_build.mem
+    ${CMAKE_CURRENT_BINARY_DIR}/include/config.mem
+    ${CMAKE_CURRENT_BINARY_DIR}/include/filter.mem
+    ${CMAKE_CURRENT_BINARY_DIR}/include/input.mem
+    ${CMAKE_CURRENT_BINARY_DIR}/include/inter_size.mem
+    ${CMAKE_CURRENT_BINARY_DIR}/include/output_size.mem
+  COMMAND
+    LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:${COREDLA_LIB} ${COREDLA_BIN}/dlac --network-file ${AOT_SPLITTER_EXAMPLE_MODEL} --march ${COREDLA_EXARCH}/${AOT_SPLITTER_EXAMPLE_ARCH} --foutput-format open_vino_hetero --o ${CMAKE_CURRENT_BINARY_DIR}/resnet.bin
+  COMMAND
+    LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:${COREDLA_LIB} $<TARGET_FILE:dla_aot_splitter> ${AOT_SPLITTER_INPUT_ARGUMENTS} -cm ${CMAKE_CURRENT_BINARY_DIR}/resnet.bin -plugins $<TARGET_FILE_DIR:dla_aot_splitter_plugin>/plugins_aot_splitter.xml
+  DEPENDS
+    ${COREDLA_BIN}/dlac
+    dla_benchmark
+    dla_aot_splitter
+    dla_aot_splitter_plugin
+    ${AOT_SPLITTER_EXAMPLE_MODEL}
+    ${COREDLA_EXARCH}/${AOT_SPLITTER_EXAMPLE_ARCH}
+    ${AOT_SPLITTER_EXAMPLE_INPUT}
+    $<TARGET_FILE_DIR:dla_aot_splitter_plugin>/plugins_aot_splitter.xml
+  WORKING_DIRECTORY
+    ${CMAKE_CURRENT_BINARY_DIR}/include
+)
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/src/main.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/src/main.cpp
new file mode 100644
index 0000000..b90ccd5
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_example/src/main.cpp
@@ -0,0 +1,180 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+//
+// This small tool demonstrates the minimum number of steps necessary to run an
+// inference on the FPGA while using the output files from the AoT splitter.
+//
+
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <stdint.h>
+#include <array>
+#include <cstring> //memcpy
+
+uint32_t arch_build_mem_32[] =
+{
+  #include "arch_build.mem"
+};
+uint8_t* const arch_build_mem = (uint8_t*)&arch_build_mem_32[0];
+const uint32_t arch_build_mem_size = sizeof(arch_build_mem_32);
+
+uint32_t input_mem_32[] =
+{
+  #include "input.mem"
+};
+uint8_t* const input_mem = sizeof(input_mem_32) ? (uint8_t*)&input_mem_32[0] : nullptr;
+const uint32_t input_mem_size = sizeof(input_mem_32);
+
+uint32_t config_mem_32[] =
+{
+  #include "config.mem"
+};
+uint8_t* const config_mem = (uint8_t*)&config_mem_32[0];
+const uint32_t config_mem_size = sizeof(config_mem_32);
+
+uint32_t filter_mem_32[] =
+{
+  #include "filter.mem"
+};
+uint8_t* const filter_mem = (uint8_t*)&filter_mem_32[0];
+const uint32_t filter_mem_size = sizeof(filter_mem_32);
+
+constexpr uint32_t output_mem_size =
+  #include "output_size.mem"
+;
+
+constexpr uint32_t inter_mem_size =
+  #include "inter_size.mem"
+;
+
+#include "mmd_wrapper.h"
+#include "device_memory_allocator.h"
+#include "dla_dma_constants.h"  //DLA_DMA_CSR_OFFSET_***
+
+int main(int argc, char *argv[]) {
+  std::array<uint8_t, output_mem_size> actual_output_mem;
+  for (uint64_t i=0u; i < actual_output_mem.size();i++)
+  {
+    actual_output_mem[i] = (0xDEADBEEF) >> ((3-(i%4)) * 8);
+  }
+
+  std::cout << "AOT Splitter Example" << std::endl;
+
+  constexpr int instance = 0;
+
+  constexpr int _maxNumPipelines = 5;
+  constexpr int numPipelines = _maxNumPipelines;
+
+  // TODO: retrieve this from the arch file
+  constexpr uint64_t featureWordSize = 32;
+  constexpr uint64_t filterWordSize = 64;
+
+
+  constexpr int ARCH_HASH_SIZE = 16;
+  constexpr int BUILD_VERSION_SIZE = 32;
+
+  MmdWrapper mmdWrapper{};
+  DeviceMemoryAllocator ddrAllocator{};
+
+  for (size_t i = 0; i < ARCH_HASH_SIZE; i+=4) {
+    uint32_t arch_build_word_from_device = mmdWrapper.ReadFromCsr(instance, i);
+    if (arch_build_mem_32[i/4] != arch_build_word_from_device)
+    {
+      std::cout << "Arch hash mismatch at word " << i <<  " : expected " <<
+        std::setfill('0') << std::setw(8) << std::uppercase << std::hex << (uint32_t)arch_build_mem_32[i/4] <<
+        " != " <<
+        std::setfill('0') << std::setw(8) << std::uppercase << std::hex << (uint32_t)arch_build_word_from_device << std::endl;
+      return 1;
+    }
+  }
+  char expected_build_version[BUILD_VERSION_SIZE + 1];
+  expected_build_version[BUILD_VERSION_SIZE] = '\0';
+  std::memcpy(expected_build_version, (uint8_t*)&arch_build_mem_32[ARCH_HASH_SIZE/sizeof(uint32_t)], BUILD_VERSION_SIZE);
+
+  char actual_build_version[BUILD_VERSION_SIZE + 1];
+  actual_build_version[BUILD_VERSION_SIZE] = '\0';
+
+  for (uint32_t i=0;i < BUILD_VERSION_SIZE; i+=4)
+  {
+    uint32_t chunk = mmdWrapper.ReadFromCsr(instance, ARCH_HASH_SIZE + i);
+    for (uint8_t j=0;j < 4; j++)
+    {
+      actual_build_version[i+j] = chunk & 0xFF;
+      chunk >>= 8;
+    }
+  }
+  if (0 != std::strncmp(expected_build_version, actual_build_version, BUILD_VERSION_SIZE))
+  {
+    std::cout << "Build version mismath. Expected " << expected_build_version << " actual " << actual_build_version << std::endl;
+    return 1;
+  }
+
+  ddrAllocator.Initialize(mmdWrapper.GetDDRSizePerInstance(), &mmdWrapper);
+
+  ddrAllocator.AllocateSharedBuffer(inter_mem_size, instance);
+  //mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_INTERMEDIATE_BASE_ADDR, 0);
+
+
+  uint64_t inputOutputBufferSize = numPipelines * (input_mem_size + output_mem_size);  // how much space to allocate
+  uint64_t inputOutputBufferAlignment = featureWordSize;  // starting address must be aligned to this
+  uint64_t inputOutputBufferAddr;                         // where did the allocator place this buffer
+  ddrAllocator.AllocatePrivateBuffer(inputOutputBufferSize, inputOutputBufferAlignment, inputOutputBufferAddr);
+
+  uint64_t configFilterBufferSize = config_mem_size + filter_mem_size;
+  uint64_t configFilterBufferAlignment = filterWordSize;
+  uint64_t configFilterBufferAddr;
+  ddrAllocator.AllocatePrivateBuffer(configFilterBufferSize, configFilterBufferAlignment, configFilterBufferAddr);
+
+  mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_INTERRUPT_MASK, 0);
+  mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_INTERRUPT_CONTROL, 3);
+  uint32_t completionCount = mmdWrapper.ReadFromCsr(instance, DLA_DMA_CSR_OFFSET_COMPLETION_COUNT);
+  std::cout << "Initial completion count " << completionCount << std::endl;
+
+  mmdWrapper.WriteToDDR(instance, inputOutputBufferAddr, input_mem_size, input_mem);
+
+  mmdWrapper.WriteToDDR(instance, configFilterBufferAddr, config_mem_size, config_mem);
+  mmdWrapper.WriteToDDR(instance, configFilterBufferAddr + config_mem_size, filter_mem_size, filter_mem);
+
+  mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_CONFIG_BASE_ADDR, configFilterBufferAddr);
+  constexpr int CONFIG_READER_DATA_BYTES = 8;  // May want to move to a header in production code
+  mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_CONFIG_RANGE_MINUS_TWO, ((config_mem_size) / CONFIG_READER_DATA_BYTES) - 2);
+
+
+  // base address for feature reader -- this will trigger one run of DLA
+  mmdWrapper.WriteToCsr(instance, DLA_DMA_CSR_OFFSET_INPUT_OUTPUT_BASE_ADDR, inputOutputBufferAddr);
+
+  int i=0;
+  while(mmdWrapper.ReadFromCsr(instance, DLA_DMA_CSR_OFFSET_COMPLETION_COUNT) == completionCount)
+  {
+    i++;
+    if (i % 100000 == 0) {
+      std::cout << "Timeout" << std::endl;
+      return 1;
+    }
+  }
+
+  std::cout << "Completed infered in " << i << " polling intervals" << std::endl;
+
+  //Reading from pipeline zero
+  mmdWrapper.ReadFromDDR(instance, inputOutputBufferAddr + input_mem_size, actual_output_mem.size(), actual_output_mem.data());
+
+  std::ofstream of ("actual_output.mem", std::ios_base::out | std::ios_base::binary);
+  if (of) {
+    of.write((const char*)actual_output_mem.data(), actual_output_mem.size());
+  }
+  of.close();
+
+  return 0;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt
new file mode 100644
index 0000000..6f5e916
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt
@@ -0,0 +1,113 @@
+cmake_minimum_required(VERSION 3.10)
+
+add_library(dla_aot_splitter_plugin SHARED)
+
+target_compile_features(dla_aot_splitter_plugin PUBLIC cxx_std_11)
+
+target_compile_definitions(dla_aot_splitter_plugin PUBLIC DISABLE_JIT)
+
+set_target_properties(dla_aot_splitter_plugin PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+if (WIN32)
+  # Fix warning C4273: inconsistent dll linkage
+  target_compile_definitions(dla_aot_splitter_plugin PRIVATE XBYAK_NO_OP_NAMES
+    IMPLEMENT_INFERENCE_ENGINE_PLUGIN
+    $<TARGET_PROPERTY:openvino::runtime,INTERFACE_COMPILE_DEFINITIONS>)
+endif()
+
+target_include_directories(dla_aot_splitter_plugin PRIVATE
+  $ENV{COREDLA_ROOT}/dla_plugin
+  $ENV{COREDLA_ROOT}/dla_plugin/inc
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia
+  $ENV{COREDLA_ROOT}/util/inc # dla_error.h
+  $ENV{COREDLA_ROOT}/inc # dla_dma_constants.svh
+  $ENV{COREDLA_ROOT}/runtime/coredla_device/inc # For abstract classes (BatchJob, Device etc.)
+  #
+  ${CMAKE_CURRENT_SOURCE_DIR}/inc
+)
+
+target_sources(dla_aot_splitter_plugin PRIVATE
+##
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_async_infer_request.h
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_config.hpp
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_compiled_model.h
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_runtime_log.h
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_infer_request.h
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_plugin.h
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_utils.h
+  $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_plugin_config.hpp
+##
+  $ENV{COREDLA_ROOT}/dla_plugin/src/dla_async_infer_request.cpp
+  $ENV{COREDLA_ROOT}/dla_plugin/src/dla_config.cpp
+  $ENV{COREDLA_ROOT}/dla_plugin/src/dla_compiled_model.cpp
+  $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_infer_request.cpp
+  $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_plugin.cpp
+  $ENV{COREDLA_ROOT}/dla_plugin/src/dla_plugin_jit_functions.cpp
+  $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_utils.cpp
+  $ENV{COREDLA_ROOT}/util/src/dla_numeric_utils.cpp
+##
+  $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/graph_job.h
+  $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/batch_job.h
+  $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/device.h
+##
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_graph_job.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_device.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_batch_job.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/dla_aot_utils.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_graph_job.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_device.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_batch_job.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_utils.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_structs.h
+)
+
+if (WIN32)
+  target_link_libraries(dla_aot_splitter_plugin
+    PRIVATE
+##
+    dla_op_transformation
+    dliaPluginIOTransformations
+    openvino::runtime
+    openvino_dev_api
+    ${TBB_IMPORTED_TARGETS}
+)
+else()
+  target_link_libraries(dla_aot_splitter_plugin
+    PRIVATE
+##
+    pthread
+    dla_op_transformation
+    dliaPluginIOTransformations
+    openvino::runtime
+    openvino_dev_api
+    ${TBB_IMPORTED_TARGETS}
+)
+endif()
+
+if (DISABLE_JIT)
+  target_include_directories(dla_aot_splitter_plugin PRIVATE
+    $ENV{COREDLA_ROOT}/util/inc
+    $ENV{COREDLA_XUTIL_DIR}/compiled_result/inc
+  )
+  target_sources(dla_aot_splitter_plugin PRIVATE $ENV{COREDLA_XUTIL_DIR}/compiled_result/src/compiled_result_reader_writer.cpp)
+
+  if (EXISTS $ENV{COREDLA_ROOT}/inc)
+    target_include_directories(dla_aot_splitter_plugin PUBLIC $ENV{COREDLA_ROOT}/inc)
+  else()
+    target_include_directories(dla_aot_splitter_plugin PUBLIC $ENV{COREDLA_ROOT}/build/coredla/dla/inc)
+  endif()
+else()
+  target_link_libraries(dla_aot_splitter_plugin
+    PRIVATE
+      dla_compiled_result
+      archparam
+  )
+endif()
+
+set_target_properties(dliaPluginIOTransformations PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+if (WIN32)
+  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins_aot_splitter_win.xml ${CMAKE_CURRENT_BINARY_DIR}/plugins_aot_splitter.xml COPYONLY)
+else()
+  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins_aot_splitter.xml ${CMAKE_CURRENT_BINARY_DIR}/ COPYONLY)
+endif()
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h
new file mode 100644
index 0000000..697b5d2
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h
@@ -0,0 +1,38 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#ifndef _DLA_AOT_STRUCTS_H_
+#define _DLA_AOT_STRUCTS_H_
+
+#include "compiled_result.h"
+
+// Custom type
+typedef unsigned char uint8_t;
+
+// All size and offset fields are in bytes.
+typedef struct {
+  const dla::CompiledResult* compiled_result;
+  uint32_t config_buffer_size;
+  uint32_t filter_bias_scale_buffer_size;
+  uint8_t *input_feature_buffer;
+  uint32_t input_feature_buffer_size;
+  uint32_t output_feature_buffer_size;
+  uint32_t intermediate_feature_buffer_size;
+} DLAInput;
+
+typedef struct {
+  // Its size is output_feature_buffer_size in DLAInput.
+  uint8_t *output_feature_buffer;
+} DLAOutput;
+
+#endif    // _DLA_REF_H_
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h
new file mode 100644
index 0000000..7fa23e8
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h
@@ -0,0 +1,49 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#ifndef _DLA_AOT_UTILS_H_
+#define _DLA_AOT_UTILS_H_
+
+#include <fcntl.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+#include <sys/stat.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "dla_aot_structs.h"
+
+using google::protobuf::io::FileInputStream;
+
+// fp16 feature element (in bytes)
+// TODO: extract it from arch / compiled result
+const uint32_t feature_elem_size = 2;
+
+//////////////////////////////////////////////////////////////////////////////
+// Dump DLA input and output to the following files:
+// - config_filter.mem: config + filter buffer
+// - input_feature.mem: input feature buffer
+// - output_feature.mem: output feature buffer (emulation results)
+//
+// Each .mem file is a text file, with one byte (in hex) per line.
+//////////////////////////////////////////////////////////////////////////////
+
+void writeInputOutputToFiles(const std::vector<int>& arch_hash,
+                             const std::string& build_version,
+                             const std::string& arch_name,
+                             const DLAInput& input,
+                             const DLAOutput& output);
+
+#endif  // _DLA_AOT_UTILS_H_
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h
new file mode 100644
index 0000000..dd8e5fa
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h
@@ -0,0 +1,79 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+#ifndef RAW_BATCH_JOB_H
+#define RAW_BATCH_JOB_H
+
+#include <assert.h>
+#include <cstdio>
+#if defined(_WIN32) || defined(_WIN64)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <thread>
+#include <memory>
+
+#include "batch_job.h"
+#include "dla_aot_structs.h"
+#include "raw_device.h"
+
+// RawBatchJob represents one batch execution
+// Contains functions to start DLA
+class RawBatchJob : public BatchJob {
+ private:
+  const CompiledResult* compiledResult;
+  DLAInput* dlaBuffers_;
+  DLAOutput output_;
+  int instance_;
+  uint32_t debugLevel_;
+  std::string AES_key_;
+  std::string IV_key_;
+  bool encryption_enabled_;
+  RawBatchJob(const CompiledResult* compiledResult,
+              DLAInput* dlaBuffers,
+              int instance,
+              uint32_t debugLevel,
+              std::string AES_key,
+              std::string IV_key,
+              bool encryption_enabled);
+
+ public:
+  RawBatchJob(const RawBatchJob&) = delete;
+  RawBatchJob(RawBatchJob&) = delete;
+  RawBatchJob& operator=(const RawBatchJob&) = delete;
+  static unique_ptr<BatchJob> MakeUnique(const CompiledResult* compiledResult,
+                                         DLAInput* dlaBuffers,
+                                         int instance,
+                                         uint32_t debugLevel,
+                                         std::string AES_key,
+                                         std::string IV_key,
+                                         bool encryption_enabled);
+  // @param inputArray - ptr to CPU array containing input data tp be copied to DDR
+  // blocking function
+  void LoadInputFeatureToDDR(void* inputArray);
+  // Starts DLA by writing to CSR in DLA DMA; the DDR addresses of graph config and input data
+  void StartDla() override;
+  // @param outputArray - ptr to CPU array where the output data in DDR is copied into
+  // outputArray must be allocated by the caller (size >= output_size_ddr)
+  // blocking function
+  void ReadOutputFeatureFromDDR(void* outputArray) const;
+  void ScheduleInputFeature() const {}
+};
+
+#endif
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h
new file mode 100644
index 0000000..168707e
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h
@@ -0,0 +1,81 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+#ifndef RAW_DEVICE_H
+#define RAW_DEVICE_H
+
+#include <assert.h>
+#include <chrono>
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <thread>
+#include <vector>
+#include <map>
+#include "arch_params.h"
+#include "compiled_result.h"
+#include "device.h"
+using namespace std;
+using namespace dla;
+class GraphJob;
+
+class RawDevice : public Device {
+ public:
+  GraphJob* CreateGraphJob(const CompiledResult* compiledResult,
+                           size_t numPipelines,
+                           int instance,
+                           std::string AES_key,
+                           std::string IV_key,
+                           bool encryption_enabled,
+                           const std::string export_dir,
+                           const std::string parameter_rom_export_dir);
+  // Return number of DLA jobs completed till now
+  // Used for debugging
+  int GetNumInferencesCompleted(int instance) const override;
+  // Must be called when there are no active jobs on DLA
+  // Returns the total time taken by DLA jobs on hardware (in milliseconds)
+  double GetActiveHWTimeMs(int instance) const override;
+  // Must be called when there are no active jobs on DLA
+  // Returns the average of time taken per job (in milliseconds)
+  // Avg Time per job < Active Time
+  double GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const override;
+  RawDevice(const arch_params* archParams);
+  void WaitForDla(int instance,
+                  size_t threadId = 0,
+                  std::function<bool()> isCancelled = nullptr) override;  // threadId is for debugging purpose only
+  std::string SchedulerGetStatus() const override { return ""; }
+  bool InitializeScheduler(uint32_t sourceBufferSize,
+                           uint32_t dropSourceBuffers,
+                           uint32_t numInferenceRequests,
+                           const std::string source_fifo_file = "") override {
+    return true;
+  }
+  int GetNumInstances() const override { return numInstances_; }
+  int GetSizeCsrDescriptorQueue() const override { return -1; }  // meaningless here
+  double GetCoreDlaClockFreq() const override { return -1.0; }   // meaningless here
+  std::map<std::string, uint64_t> ReadDebugNetwork(int instance) const override {
+    return std::map<std::string, uint64_t>();
+  };
+  uint64_t GetNumInputFeatureMemoryReads(int instance) const override { return 0; };
+  uint64_t GetNumFilterMemoryReads(int instance) const override {return 0; };
+  uint64_t GetNumOutputFeatureMemoryWrites(int instance) const override {return 0; };
+
+ private:
+  RawDevice() = delete;
+  vector<unique_ptr<GraphJob>> allGraphJobs_;
+  int numInstances_;
+  const arch_params* archParams_;
+};
+
+#endif  // REF_DEVCE_H
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h
new file mode 100644
index 0000000..38ad075
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h
@@ -0,0 +1,80 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+#ifndef RAW_GRAPH_JOB_H
+#define RAW_GRAPH_JOB_H
+
+#include <assert.h>
+#include <cstdio>
+#include <memory>
+#include <vector>
+#if defined(_WIN32) || defined(_WIN64)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <thread>
+#include "compiled_result.h"
+
+#include "dla_aot_structs.h"
+#include "graph_job.h"
+#include "raw_batch_job.h"
+#include "raw_device.h"
+using namespace dla;
+/*! RawGraphJob is a DLA compiled graph loaded onto a emulation device
+ * Initialized with Emulator Device object
+ * RawGraphJob stores arrays filter, bias, config, inputs and outputs
+ * It provides handle to "batch job" objects that are used to load input and start DLA for one batch
+ */
+class RawGraphJob : public GraphJob {
+ public:
+  static unique_ptr<GraphJob> MakeUnique(const arch_params* archParams,
+                                         const CompiledResult* compiled_result,
+                                         size_t numPipelines,
+                                         int instance,
+                                         uint32_t debugLevel,
+                                         std::string AES_key,
+                                         std::string IV_key,
+                                         bool encryption_enabled);
+  // Returns an unused batch job object
+  // If all batch jobs are used, returns null
+  // Increments batchJobsRequested_
+  // Thread safe
+  BatchJob* GetBatchJob();
+  RawGraphJob(const GraphJob&) = delete;
+  RawGraphJob(RawGraphJob&) = delete;
+  RawGraphJob& operator=(const RawGraphJob&) = delete;
+
+ private:
+  DLAInput dlaBuffers_;
+  vector<unique_ptr<BatchJob>> batchJobs_;
+  int instance_;
+  uint32_t debugLevel_;
+  unsigned int batchJobsRequested_;
+  std::mutex graphJobMutex;
+  RawGraphJob(const arch_params* archParams,
+              const CompiledResult* compiledResult,
+              size_t numPipelines,
+              int instance,
+              uint32_t debugLevel,
+              std::string AES_key,
+              std::string IV_key,
+              bool encryption_enabled);
+};
+
+#endif
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml
new file mode 100644
index 0000000..2f2d24e
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml
@@ -0,0 +1,18 @@
+<ie>
+    <plugins>
+        <plugin name="GNA" location="libopenvino_intel_gna_plugin.so">
+        </plugin>
+        <plugin name="HETERO" location="libcoreDLAHeteroPlugin.so">
+        </plugin>
+        <plugin name="CPU" location="libopenvino_intel_cpu_plugin.so">
+        </plugin>
+        <plugin name="MULTI" location="libopenvino_auto_plugin.so">
+        </plugin>
+        <plugin name="GPU" location="libopenvino_intel_gpu_plugin.so">
+        </plugin>
+        <plugin name="MYRIAD" location="libopenvino_intel_myriad_plugin.so">
+        </plugin>
+        <plugin name="FPGA" location="libdla_aot_splitter_plugin.so">
+        </plugin>
+    </plugins>
+</ie>
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml
new file mode 100755
index 0000000..aeeedde
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml
@@ -0,0 +1,22 @@
+<ie>
+    <plugins>
+        <plugin name="AUTO" location="openvino_auto_plugin.dll">
+        </plugin>
+        <plugin name="BATCH" location="openvino_auto_batch_plugin.dll">
+        </plugin>
+        <plugin name="CPU" location="openvino_intel_cpu_plugin.dll">
+        </plugin>
+        <plugin name="GNA" location="openvino_intel_gna_plugin.dll">
+        </plugin>
+        <plugin name="GPU" location="openvino_intel_gpu_plugin.dll">
+        </plugin>
+        <plugin name="HETERO" location="coreDLAHeteroPlugin.dll">
+        </plugin>
+        <plugin name="MULTI" location="openvino_auto_plugin.dll">
+        </plugin>
+        <plugin name="MYRIAD" location="openvino_intel_myriad_plugin.dll">
+        </plugin>
+        <plugin name="FPGA" location="dla_aot_splitter_plugin.dll">
+        </plugin>
+    </plugins>
+</ie>
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg
new file mode 100644
index 0000000..3288819
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg
@@ -0,0 +1,4 @@
+filter=-build/header_guard,-runtime/explicit,-build/include_subdir,-runtime/references,-build/c++11,-runtime/int
+exclude_files=^(?!pe_array_sim.cpp).*\.cpp
+linelength=160
+headers=h,hpp
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp
new file mode 100644
index 0000000..4317201
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp
@@ -0,0 +1,117 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+  This file contains some helper utilities to output coredla data blobs to files
+  in the current working directory
+*/
+
+#include "dla_aot_utils.h"
+
+// The resulting file is expected to be consumed by RTL testbench or hardware.
+static void writeBufferToBinFile(const uint8_t *buffer, uint32_t buffer_size,
+                              const char *file_path) {
+  FILE *fp = fopen(file_path, "wb");
+  assert(nullptr != fp);
+
+  if (buffer_size && !fwrite(buffer, buffer_size, 1, fp))
+  {
+    std::cout << "ERROR writing to output file " << file_path << std::endl;
+  }
+
+  fclose(fp);
+}
+
+// The resulting file is expected to be consumed by RTL testbench or hardware.
+static void writeBufferToFile(const uint8_t *buffer, uint32_t buffer_size,
+                              const char *file_path) {
+  FILE *fp = fopen(file_path, "w");
+  assert(nullptr != fp);
+
+  // Write buffer size (in bytes) to the first line
+  for (uint32_t b = 0; b < buffer_size; b+=4) {
+    if (b && ((b % 128) == 0))
+    {
+      fprintf(fp, "\n");
+    }
+    fprintf(fp, "0x%08x", *((uint32_t*)&buffer[b]));
+    if(b + 4 < buffer_size)
+    {
+      fprintf(fp, ",");
+    }
+  }
+
+  fclose(fp);
+}
+
+// Create all files that the splitter is responsible for
+void writeInputOutputToFiles (
+  const std::vector<int>& arch_hash,
+  const std::string& build_version,
+  const std::string& arch_name,
+  const DLAInput &input,
+  const DLAOutput &output
+) {
+  uint8_t arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE + ARCH_NAME_SIZE];
+
+  memset(&arch_build[0], 0, ARCH_HASH_SIZE + BUILD_VERSION_SIZE);
+  memcpy(&arch_build[0], arch_hash.data(), ARCH_HASH_SIZE);
+  memcpy(&arch_build[ARCH_HASH_SIZE], build_version.c_str(), std::min(build_version.length(),static_cast<size_t>(BUILD_VERSION_SIZE)));
+  memcpy(&arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE], arch_name.c_str(), std::min(arch_name.length(),static_cast<size_t>(ARCH_NAME_SIZE)));
+  writeBufferToFile(arch_build,
+                    sizeof(arch_build),
+                    "arch_build.mem");
+  writeBufferToFile(arch_build,
+                    sizeof(arch_build),
+                    "arch_build.bin");
+  const auto &config_fbs_buffer =
+    input.compiled_result->get_config_filter_bias_scale_array();
+
+  // Only dump filters and config memory file when they are saved in DDR
+  if (!input.compiled_result->get_ddrfree_header().enable_parameter_rom) {
+    writeBufferToFile(&(config_fbs_buffer[0][0]),
+                      input.config_buffer_size,
+                      "config.mem");
+    writeBufferToBinFile(&(config_fbs_buffer[0][0]),
+                      input.config_buffer_size,
+                      "config.bin");
+    writeBufferToFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size,
+                      input.filter_bias_scale_buffer_size,
+                      "filter.mem");
+    writeBufferToBinFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size,
+                      input.filter_bias_scale_buffer_size,
+                      "filter.bin");
+  } else {
+    std::cout << "Graph filters and DLA configs are not dumped because parameter ROM is enabled in the AOT file." << std::endl;
+  }
+  uint8_t* input_buffer = nullptr;
+  size_t input_size = 0;
+  if (input.input_feature_buffer) {
+    input_buffer = input.input_feature_buffer;
+    input_size = input.input_feature_buffer_size;
+  }
+  writeBufferToFile(input_buffer,
+                    input_size,
+                    "input.mem");
+  writeBufferToBinFile(input_buffer,
+                    input_size,
+                    "input.bin");
+  uint32_t inter_size = input.intermediate_feature_buffer_size;
+  writeBufferToFile((const uint8_t*)&inter_size,
+                     sizeof(inter_size),
+                     "inter_size.mem");
+  uint32_t output_size = input.output_feature_buffer_size;
+  writeBufferToFile((const uint8_t*)&output_size,
+                     sizeof(output_size),
+                     "output_size.mem");
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp
new file mode 100644
index 0000000..23247d5
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp
@@ -0,0 +1,68 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+  The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+  used by dliaPlugin to mimic a inference flow without actually providing a
+  inference. It is used to get the transformed input performed by the dliaPlugin
+  upper layers
+*/
+
+#include "raw_batch_job.h"
+#include "dla_aot_utils.h"
+
+unique_ptr<BatchJob> RawBatchJob::MakeUnique(const CompiledResult * compiledResult,
+                            DLAInput* dlaBuffers,
+                            int instance,
+                            uint32_t debugLevel,
+                            std::string AES_key,
+                            std::string IV_key,
+                            bool encryption_enabled) {
+    return unique_ptr<BatchJob>(new RawBatchJob(compiledResult, dlaBuffers, instance, debugLevel, AES_key, IV_key, encryption_enabled));
+}
+
+RawBatchJob::RawBatchJob(const CompiledResult * compiledResult,
+        DLAInput* dlaBuffers,
+        int instance,
+        uint32_t debugLevel,
+        std::string AES_key,
+        std::string IV_key,
+        bool encryption_enabled) : compiledResult(compiledResult) {
+  dlaBuffers_ = dlaBuffers;
+  instance_ = instance;
+  debugLevel_= debugLevel;
+  AES_key_ = AES_key;
+  IV_key_ = IV_key;
+  encryption_enabled_ = encryption_enabled;
+  output_.output_feature_buffer = new uint8_t[dlaBuffers_->output_feature_buffer_size];
+  memset(output_.output_feature_buffer, 0, dlaBuffers_->output_feature_buffer_size);
+  assert(nullptr != output_.output_feature_buffer);
+}
+
+// Emulation device has no DDR. This function is just storing a pointer to the array
+// Note: inputAray should not be deleted until the end of the Emulation runs
+// i.e. StartDla completes
+void RawBatchJob::LoadInputFeatureToDDR(void* inputArray) {
+  dlaBuffers_->input_feature_buffer = (uint8_t*) inputArray;
+  StartDla();
+}
+
+void RawBatchJob::StartDla() {
+  // Write input / output buffers to files
+  writeInputOutputToFiles(compiledResult->get_arch_hash(), compiledResult->get_build_version_string(), compiledResult->get_arch_name(), *dlaBuffers_, output_);
+}
+
+// Emulation device has no DDR. Output is copied into the outputArray.
+void RawBatchJob::ReadOutputFeatureFromDDR(void* outputArray) const {
+  memcpy(outputArray, output_.output_feature_buffer, dlaBuffers_->output_feature_buffer_size);
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp
new file mode 100644
index 0000000..0b8e838
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp
@@ -0,0 +1,67 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+  The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+  used by dliaPlugin to mimic a inference flow without actually providing a
+  inference. It is used to get the transformed input performed by the dliaPlugin
+  upper layers
+*/
+
+#include "raw_device.h"
+#include "raw_graph_job.h"
+unique_ptr<Device> Device::MakeUnique(const arch_params* archParams,
+                                      uint32_t waitForDlaTimeoutSeconds) {
+  return unique_ptr<Device>(new RawDevice(archParams));
+}
+
+RawDevice::RawDevice(const arch_params* archParams) {
+  numInstances_ = 1;
+  archParams_ = archParams;
+}
+
+GraphJob* RawDevice::CreateGraphJob(const CompiledResult * compiledResult,
+  size_t numPipelines,
+  int instance,
+  std::string AES_key,
+  std::string IV_key,
+  bool encryption_enabled,
+  const std::string export_dir,
+  const std::string parameter_rom_export_dir)
+{
+  (void) export_dir;  // unused in HW runtime. CoreDLA utilizes base pointers, which the SW reference utilizes this variable. We void it here.
+  (void) parameter_rom_export_dir;
+  assert(instance < numInstances_);
+  allGraphJobs_.push_back(move(RawGraphJob::MakeUnique(archParams_, compiledResult, numPipelines, instance, 0,
+                          AES_key, IV_key, encryption_enabled)));
+  return (allGraphJobs_.back()).get();
+}
+
+void RawDevice::WaitForDla(int instance, size_t threadId/* = 0 */, std::function<bool()> isCancelled) {
+  //RawDevice does not do any real work. No need to wait
+}
+
+int RawDevice::GetNumInferencesCompleted(int instance) const {
+  std::cout << "This function, GetNumInferencesCompleted, is not implemented for raw device" << std::endl;
+  return 0;
+}
+
+double RawDevice::GetActiveHWTimeMs(int instance) const {
+  std::cout << "This function, GetActiveHWTimeMs, is not implemented for raw device" << std::endl;
+  return 0;
+}
+
+double RawDevice::GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const {
+  std::cout << "This function, GetAvgHWTimePerJobMs, is not implemented for raw device" << std::endl;
+  return 0;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp
new file mode 100644
index 0000000..c698110
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp
@@ -0,0 +1,89 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+  The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+  used by dliaPlugin to mimic a inference flow without actually providing a
+  inference. It is used to get the transformed input performed by the dliaPlugin
+  upper layers
+*/
+
+#include "raw_graph_job.h"
+#include "dla_aot_utils.h"
+#include <fstream>
+#include "dla_defines.h"
+
+unique_ptr<GraphJob> RawGraphJob::MakeUnique(const arch_params* archParams,
+  const CompiledResult * compiledResult,
+  size_t numPipelines,
+  int instance,
+  uint32_t debugLevel = 0,
+  std::string AES_key = "",
+  std::string IV_key = "",
+  bool encryption_enabled = false)
+{
+  return unique_ptr<GraphJob>(new RawGraphJob(archParams, compiledResult, numPipelines, instance, debugLevel, AES_key, IV_key, encryption_enabled));
+}
+
+RawGraphJob::RawGraphJob(const arch_params* archParams,
+  const CompiledResult * compiledResult,
+  size_t numPipelines,
+  int instance,
+  uint32_t debugLevel,
+  std::string AES_key,
+  std::string IV_key,
+  bool encryption_enabled)
+{
+  assert(numPipelines);
+  instance_ = instance;
+  debugLevel_ = debugLevel;
+  batchJobsRequested_ = 0;
+  // input feature buffer size
+  // TODO: support multi-input graph
+  dlaBuffers_.input_feature_buffer_size =
+      compiledResult->get_conv_input_size_in_bytes();
+  // input feature buffer to be allocated outside this routine
+
+  // output buffer size
+  dlaBuffers_.output_feature_buffer_size =
+      compiledResult->get_conv_output_size_in_bytes();
+
+  // intermediate buffer size
+  dlaBuffers_.intermediate_feature_buffer_size =
+      compiledResult->get_conv_intermediate_size_in_bytes();
+
+  // config and filter buffer size
+  size_t num_config_words = compiledResult->get_num_config_words();
+  dlaBuffers_.config_buffer_size = num_config_words * CONFIG_WORD_SIZE;
+  dlaBuffers_.filter_bias_scale_buffer_size =
+      compiledResult->get_total_filter_bias_scale_buffer_size();
+  // store a pointer to CompiledResult to use config and filter buffer directly without copying
+  dlaBuffers_.compiled_result = compiledResult;
+  for(size_t i = 0; i < numPipelines; i++) {
+    batchJobs_.push_back(move(RawBatchJob::MakeUnique(compiledResult, &dlaBuffers_, instance_, debugLevel_, AES_key, IV_key, encryption_enabled)));
+  }
+
+  dlaBuffers_.input_feature_buffer = NULL;
+}
+
+BatchJob* RawGraphJob::GetBatchJob() {
+  graphJobMutex.lock();
+  if(batchJobsRequested_ >= batchJobs_.size()) {
+    graphJobMutex.unlock();
+    return nullptr;
+  }
+  auto * batchJob = batchJobs_[batchJobsRequested_].get();
+  batchJobsRequested_++;
+  graphJobMutex.unlock();
+  return batchJob;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/inc/dla_aot_splitter.hpp b/python/openvino/runtime/dla_aot_splitter/inc/dla_aot_splitter.hpp
new file mode 100644
index 0000000..44448e8
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/inc/dla_aot_splitter.hpp
@@ -0,0 +1,130 @@
+// Copyright 2022-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#pragma once
+
+#include <gflags/gflags.h>
+#include <iostream>
+#include <string>
+#include <vector>
+
+/// @brief message for help argument
+static const char help_message[] = "Print a usage message";
+
+/// @brief message for images argument
+static const char input_message[] =
+    "Optional. Path to a folder with images and/or binaries or to specific image or binary file.";
+
+/// @brief message for compiled model argument
+static const char compiled_model_message[] = "Optional. Path to a .bin file with a trained compiled model";
+
+// @brief message for the custom plugins.xml file option
+static const char plugins_message[] = "Optional. Select a custom plugins to use.";
+
+// @brief message folding_option flag
+static const char folding_option_message[] = "Optional. Set the folding options for dla compiler: options 0-3.";
+
+// @brief message fold_preprocessing flag
+static const char fold_preprocessing_message[] = "Optional. Enable fold preprocessing option for dla compiler.";
+
+// @brief message bgr flag
+static const char bgr_message[] = "Optional. Indicate images are in bgr format.";
+
+// @brief message encryption_key flag
+static const char encryption_key_message[] =
+    "Optional. Encryption key (using hexidecimal characters, 16 bytes- 32 hexidecimal char).";
+
+// @brief message encryption_iv flag
+static const char encryption_iv_message[] =
+    "Optional. Initialization vector for encryption. (8 bytes - 16 hexidecimal char)";
+
+// @brief message binary flag
+static const char bin_data_message[] =
+    "Optional. Specify that the input should be read as binary data (otherwise, if input tensor has depth 1, or 3 it "
+    "will default to U8 image processing).";
+
+/// @brief message resize flag
+static const char input_image_resize_message[] =
+    "Optional. Input image resizing methods when the input image width and height do not match the desired "
+    "input width and height of the model. resize: Resizing the input image to the model input size; "
+    "pad_resize: Pad the input image with black pixels (i.e., 0) into a squared image and "
+    "resize the padded image to model input size.";
+
+/// @brief message enable early-access features flag
+static const char enable_early_access_message[] =
+    "Optional. Enables early access (EA) features of FPGA AI Suite. These are features that are actively being "
+    "developed and have not yet met production quality standards. These features may have flaws. "
+    "Consult the FPGA AI Suite documentation for details.";
+
+/// @brief Define flag for showing help message <br>
+DEFINE_bool(h, false, help_message);
+
+/// @brief Declare flag for showing help message <br>
+DECLARE_bool(help);
+
+/// @brief Define parameter for set image file <br>
+/// i or mif is a required parameter
+DEFINE_string(i, "", input_message);
+
+/// @brief Define parameter for compiled model file <br>
+/// It is not a required parameter
+DEFINE_string(cm, "", compiled_model_message);
+
+/// @brief Path to a plugins_xml file
+DEFINE_string(plugins, "", plugins_message);
+
+/// @brief Define flag whether the image is in bgr format
+DEFINE_bool(bgr, false, bgr_message);
+
+/// Select folding options; 0,1,2,3
+DEFINE_int32(folding_option, 1, folding_option_message);
+
+/// @brief Define flag for enabling folding preprocessing
+DEFINE_bool(fold_preprocessing, false, fold_preprocessing_message);
+
+/// @brief encryption key
+DEFINE_string(encryption_key, "", encryption_key_message);
+
+/// @brief initialization vector
+DEFINE_string(encryption_iv, "", encryption_iv_message);
+
+/// @brief Specify that the inputs should be read as binary.
+DEFINE_bool(bin_data, false, bin_data_message);
+
+/// @brief Define flag for using input image resize <br>
+DEFINE_string(resize_type, "", input_image_resize_message);
+
+/// @brief Enables early-access (EA) features of CoreDLA <br>
+DEFINE_bool(enable_early_access, false, enable_early_access_message);
+
+/**
+ * @brief This function show a help message
+ */
+static void showUsage() {
+  std::cout << std::endl;
+  std::cout << "aot_splitter [OPTION]" << std::endl;
+  std::cout << "Options:" << std::endl;
+  std::cout << std::endl;
+  std::cout << "    -h, --help                                  " << help_message << std::endl;
+  std::cout << "    -i \"<path>\"                                 " << input_message << std::endl;
+  std::cout << "    -cm \"<path>\"                                " << compiled_model_message << std::endl;
+  std::cout << "    -plugins                           " << plugins_message << std::endl;
+  std::cout << "    -bgr                                        " << bgr_message << std::endl;
+  std::cout << "    -bin_data                                   " << bin_data_message << std::endl;
+  std::cout << "    -resize_type \"resize/pad_resize\"            " << input_image_resize_message << std::endl;
+  std::cout << "    -folding_option                             " << folding_option_message << std::endl;
+  std::cout << "    -fold_preprocessing                         " << fold_preprocessing_message << std::endl;
+  std::cout << "    -encryption_key                             " << encryption_key_message << std::endl;
+  std::cout << "    -encryption_iv                              " << encryption_iv_message << std::endl;
+  std::cout << "    -enable_early_access                        " << enable_early_access_message << std::endl;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/sdl.cmake b/python/openvino/runtime/dla_aot_splitter/sdl.cmake
new file mode 100644
index 0000000..3f8af7a
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/sdl.cmake
@@ -0,0 +1,96 @@
+
+####################################################################
+## SDL required compiler flags
+####################################################################
+# Needed for all builds
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wformat -Wformat-security")
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
+
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
+
+set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fPIE")
+set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fPIE")
+
+# Release build only
+set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -D_FORTIFY_SOURCE=2")
+if (GCC_VERSION VERSION_GREATER 4.9 OR GCC_VERSION VERSION_EQUAL 4.9)
+  set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fstack-protector-strong")
+  set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -z noexecstack -z relro -z now")
+
+  # These are for 8478-CT158 in the SDL process
+  # ( https://sdp-prod.intel.com/bunits/intel/coredla/coredla-ip-20212/tasks/phase/development/8478-CT158/ )
+else()
+  set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fstack-protector-all")
+endif()
+
+set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -D_FORTIFY_SOURCE=2")
+if (GCC_VERSION VERSION_GREATER 4.9 OR GCC_VERSION VERSION_EQUAL 4.9)
+  set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fstack-protector-strong")
+  set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -z noexecstack -z relro -z now")
+else()
+  set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fstack-protector-all")
+endif()
+
+# These are for 8478-CT158 in the SDL process
+# ( https://sdp-prod.intel.com/bunits/intel/coredla/coredla-ip-20212/tasks/phase/development/8478-CT158/ )
+set (CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv")
+set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv")
+set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv")
+
+####################################################################
+
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
+
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -ggdb3")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb3")
+
+#### Sanitizer settings ####
+# Address
+set(CMAKE_C_FLAGS_ASAN "-O1 -g -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+set(CMAKE_CXX_FLAGS_ASAN "-O1 -g -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+
+# Memory
+set(CMAKE_C_FLAGS_MSAN "-O1 -g -fsanitize=memory -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+set(CMAKE_CXX_FLAGS_MSAN "-O1 -g -fsanitize=memory -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+
+# Thread
+set(CMAKE_C_FLAGS_TSAN "-O1 -g -fsanitize=thread -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+set(CMAKE_CXX_FLAGS_TSAN "-O1 -g -fsanitize=thread -fno-omit-frame-pointer -fno-optimize-sibling-calls")
+
+
+set (CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+# Enable all warnings except unknown-pragmas.  Wunknown-pragmas must be excluded because
+# it is triggered by header file included from OpenCL runtime
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unknown-pragmas")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unknown-pragmas")
+
+# Make warnings errors to avoid having them in SDL report
+#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
+#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")
+
+# Should cleanup the signed and unsigned compares then remove this exception
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=sign-compare -Wno-error=unused-function -Wno-error=switch -Wno-error=unused-variable -Wno-error=unused-value -Wno-error=unused-but-set-variable -Wno-error=undef -Wno-error=return-type -Wno-error=reorder")
+
+# This is required on Ubuntu 18; the new linker behaviour transforms
+# RPATH into RUNPATH (which can be seen in the output of 'readelf -d').
+# However, RUNPATH does not work recursively, so when OpenVINO reads
+# the plugins.xml file and searches for the specified libcoreDlaRuntimePlugin.so
+# library, it fails.  The --disable-new-dtags option causes the linker
+# to keep RPATH as RPATH (rather than morphing to RUNPATH).
+#
+# References:
+#  https://stackoverflow.com/questions/52018092/how-to-set-rpath-and-runpath-with-gcc-ld
+#  https://stackoverflow.com/questions/59248421/c-secondary-dependency-resolution-with-runpath
+#
+# The solution below seems preferable to setting LD_LIBRARY_PATH, if only barely.
+# For additional motivation, go ahead and throw away part of your day reading either
+# of the screeds:
+#  http://xahlee.info/UnixResource_dir/_/ldpath.html
+#  https://gms.tf/ld_library_path-considered-harmful.html
+# You may find that neither is fully convincing, of course.
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--disable-new-dtags")
diff --git a/python/openvino/runtime/dla_aot_splitter/src/main.cpp b/python/openvino/runtime/dla_aot_splitter/src/main.cpp
new file mode 100644
index 0000000..ffc098e
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/src/main.cpp
@@ -0,0 +1,475 @@
+// Copyright 2022-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#if defined(_WIN32) || defined(_WIN64)
+#else
+#include <dirent.h>
+#include <unistd.h>
+#endif
+
+#include <openvino/openvino.hpp>
+#include "samples/args_helper.hpp"
+#include "samples/common.hpp"
+#include "samples/slog.hpp"
+
+// #include "average_precision.hpp"
+#include "dla_aot_splitter.hpp"
+// #include "infer_request_wrap.hpp"
+#include "dla_plugin_config.hpp"
+#include "inputs_filling.hpp"
+#include "utils.hpp"
+
+using DebugNetworkData = std::map<std::string, uint64_t>;
+
+bool exists_test(const std::string& name) {
+  struct stat buffer;
+  return (stat(name.c_str(), &buffer) == 0);
+}
+
+// This function appears in dla_benchmark/main.cpp too.
+bool dir_open_test(const std::string& name) {
+#if (!defined(_WIN32) && !defined(_WIN64))
+  // If we can open the directory then return true
+  DIR* dp = opendir(name.c_str());
+  if (dp != nullptr) {
+    closedir(dp);
+    return true;
+  }
+#endif  // !_WIN32 && !_WIN64
+  struct stat sb;
+  if (stat(name.c_str(), &sb) == 0) {
+    if ((sb.st_mode & S_IFMT) != S_IFREG) {
+      slog::err << "File " << name << " cannot be opened!" << slog::endl;
+      throw std::logic_error("File cannot be opened!");
+    }
+  }
+  return true;
+}
+
+// copy arguments into a new array to split the '-i=<arg>' into
+// two arguments (i.e. '-i' and '<arg>') to overcome a bug
+// parseInputFilesArguments function where is doesn't recognize
+// the -i=<arg> format
+void parseCommandLine(int argc, char** argv) {
+  int num_args = argc;
+  // allocated enough memory in case we needed to split the -i argument into two
+  char** arguments = new char*[num_args + 1];
+  for (int i = 0, j = 0; j < argc; ++i, ++j) {
+    if (strstr(argv[j], "-i=")) {
+      // number of arguments will increase by one after splitting
+      num_args++;
+      arguments[i] = new char[3];
+      strcpy(arguments[i++], "-i");
+      // copy the reset of the argument (i.e. post "-i=")
+      arguments[i] = new char[strlen(argv[j]) - 2];
+      strcpy(arguments[i], argv[j] + 3);
+      continue;
+    }
+    arguments[i] = new char[strlen(argv[j]) + 1];
+    strcpy(arguments[i], argv[j]);
+  }
+  // the parse function is modifying the arguments point so we need to keep
+  // a copy of the original pointer value to delete it properly
+  char** orig_arg_ptr = arguments;
+  gflags::ParseCommandLineNonHelpFlags(&num_args, &arguments, true);
+  // delete the allocated memory
+  for (int i = 0; i < num_args; ++i) {
+    delete[] orig_arg_ptr[i];
+  }
+  delete[] orig_arg_ptr;
+}
+
+bool ParseAndCheckCommandLine(int argc, char* argv[], size_t& netSize) {
+  // ---------------------------Parsing and validating input arguments--------------------------------------
+  slog::info << "Parsing input parameters" << slog::endl;
+
+  // Check for any flags that are missing their preceding dashes
+  // GFlags quietly ignores any flags missing their dashes, which can cause
+  // aot_splitter to run with settings other than what the user intended
+
+  // GFlags supports two different styles of flag:
+  // 1. --<flag>
+  // 2. -<flag>
+  // It also supports two different ways of specifying values for flags which
+  // take values:
+  // 1. --<flag>=<value>
+  // 2. --<flag> <value>
+
+  // If we are not expecting a flag, we are expecting a value for the
+  // preceding flag
+  bool expectingFlag = true;
+  // Start at 1 to skip the command itself
+  for (int i = 1; i < argc; i++) {
+    if (expectingFlag) {
+      // A flag is always denoted by the first char being '-'
+      if (argv[i][0] != '-') {
+        slog::err << "Argument " << argv[i] << " is invalid. You"
+                  << " may have forgotten a preceding '-'." << slog::endl;
+        throw std::logic_error("One or more invalid arguments");
+      }
+
+      char* flagNameStart = (argv[i][1] == '-') ? &argv[i][2] : &argv[i][1];
+      std::string flagName;
+
+      gflags::CommandLineFlagInfo flagInfo;
+      if (strstr(flagNameStart, "=")) {
+        flagName = std::string(flagNameStart, size_t(strstr(flagNameStart, "=") - flagNameStart));
+      } else {
+        flagName = std::string(flagNameStart);
+      }
+
+      // We expect a flag in the next argv if the current flag is a bool,
+      // because bool flags do not take a value.
+      // If GetCommandLineFlagInfo returns false, we assume the current
+      // flag is a boolean because boolean flags can be specified as
+      // -no<flag>, which is equivalent to -<flag>=false, or the flag
+      // simply being omitted. However, "no<flag>" is not recognized by
+      // GetCommandLineFlagInfo.
+      // Therefore, if the name is not recognized either the flag is a
+      // boolean flag or doesn't exist. In the latter case, gflags errors
+      // when we call parseCommandLine so we can assume here it's a bool.
+      if (!GetCommandLineFlagInfo(flagName.c_str(), &flagInfo) || strstr(argv[i], "=") || flagInfo.type == "bool") {
+        expectingFlag = true;
+      } else {
+        expectingFlag = false;
+      }
+    } else {
+      // If we were expecting a value, doesn't matter what it is
+      // gflags will check all values are the correct type, and
+      // aot_splitter checks if the values received are sane
+      expectingFlag = true;
+    }
+  }
+
+  parseCommandLine(argc, argv);
+
+  if (FLAGS_help || FLAGS_h) {
+    showUsage();
+    // CoreDLA: Version 2020.3 of OpenVINO assumes that the PAC board with OPAE on it
+    // is an OpenCL/DLAv1 device.  Since it is not, it then errors-out when the device
+    // does not response as expected to the OpenCL query.
+    // showAvailableDevices();
+    std::cout << "\n";
+    return false;
+  }
+
+  if (FLAGS_cm.empty()) {
+    throw std::logic_error("Model is required but not set. Please set -cm option.");
+  } else {
+    std::vector<std::string> m_paths = split(FLAGS_cm, MULTIGRAPH_SEP);
+    netSize = m_paths.size();
+    slog::info << "Found " << netSize << " compiled graph" << (netSize == 1 ? "" : "s") << slog::endl;
+    for (auto& m_path : m_paths) {
+      if (!exists_test(m_path)) {
+        slog::err << "compiled model file: " << FLAGS_cm << " doesn't exist. Please provide a valid path with -cm."
+                  << slog::endl;
+        throw std::logic_error("Compiled model file path does not exist.");
+      }
+    }
+  }
+
+  if (!FLAGS_plugins.empty()) {
+    slog::info << "Using custom plugins xml file - " << FLAGS_plugins << slog::endl;
+  }
+
+  if (!exists_test(FLAGS_plugins)) {
+    slog::err << "plugins_xml file: " << FLAGS_plugins << " doesn't exist. Please provide a valid path." << slog::endl;
+    throw std::logic_error("plugins_xml file path does not exist.");
+  }
+
+  return true;
+}
+
+static void next_step(const std::string additional_info = "") {
+  static size_t step_id = 0;
+  static const std::map<size_t, std::string> step_names = {
+      {1, "Parsing and validating input arguments"},
+      {2, "Loading Inference Engine"},
+      {3, "Setting device configuration"},
+      {4, "Reading the Intermediate Representation network"},
+      {5, "Resizing network to match image sizes and given batch"},
+      {6, "Configuring input of the model"},
+      {7, "Loading the model to the device"},
+      {8, "Setting optimal runtime parameters"},
+      {9, "Creating infer requests and filling input blobs with images"},
+      {10, "Measuring performance"},
+      {11, "Dumping statistics report"},
+      {12, "Dumping the output values"}};
+
+  step_id++;
+  if (step_names.count(step_id) == 0) {
+    THROW_IE_EXCEPTION << "Step ID " << step_id << " is out of total steps number " << step_names.size();
+  }
+
+  std::cout << "[Step " << step_id << "/" << step_names.size() << "] " << step_names.at(step_id)
+            << (additional_info.empty() ? "" : " (" + additional_info + ")") << std::endl;
+}
+
+template <typename T>
+T getMedianValue(const std::vector<T>& vec) {
+  std::vector<T> sortedVec(vec);
+  std::sort(sortedVec.begin(), sortedVec.end());
+  return (sortedVec.size() % 2 != 0)
+             ? sortedVec[sortedVec.size() / 2ULL]
+             : (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
+}
+
+/**
+ * @brief The entry point of the dla benchmark
+ */
+int main(int argc, char* argv[]) {
+  try {
+    // Declaring the ExecutableNetwork object as a pointer to workaround the segfault
+    // that occurs when destructing the object. Now that it's declared as a pointer
+    // the complier won't automatically call the destructor of the object at the end
+    // of this scope and we won't delete the allocated memory either
+    std::vector<ov::CompiledModel*> exeNetworks;
+    size_t netSize = 0;  // parse the size of networks for arguments check
+
+    size_t return_code = 0;  // universal return code, return this value after dumping out Debug info
+
+    // ----------------- 1. Parsing and validating input arguments -------------------------------------------------
+    next_step();
+
+    if (!ParseAndCheckCommandLine(argc, argv, netSize)) {
+      return 0;
+    }
+
+    bool isNetworkCompiled = !FLAGS_cm.empty();
+    if (isNetworkCompiled) {
+      slog::info << "Network is compiled" << slog::endl;
+    }
+
+    // The set of arguments printed is meant to be a useful summary to the
+    // user, rather than all of the arguments to aot_splitter
+    slog::info << "Printing summary of arguments being used by aot_splitter" << slog::endl
+               << "Device (-d) .......................... "
+               << "HETERO:FPGA" << slog::endl
+               << "Compiled model (-cm) ................. " << FLAGS_cm << slog::endl
+               << "Input images directory (-i) .......... "
+               << (!FLAGS_i.empty() ? FLAGS_i : "Not specified, will use randomly-generated images") << slog::endl
+               << "Plugins file (-plugins) ..... " << FLAGS_plugins << slog::endl
+               << "Reverse input image channels (-bgr) .. " << (FLAGS_bgr ? "True" : "False") << slog::endl;
+
+    /** This vector stores paths to the processed images **/
+    auto multiInputFiles = VectorMap<std::vector<std::string>>(
+        SplitMultiInputFilesArguments(netSize),  // get input directory list
+        [&](const std::vector<std::string>& inputArgs) mutable {
+          std::vector<std::string> files;
+          for (auto& inputArg : inputArgs) {
+            // Test if the path exists
+            if (!exists_test(inputArg)) {
+              slog::err << "Specified image path: " << inputArg << " does not exist" << slog::endl;
+              throw std::logic_error("Image path does not exist");
+            }
+            // Test whether the path can be opened if it's a directory
+            dir_open_test(inputArg);
+            readInputFilesArguments(files, inputArg);
+          }
+
+          return files;
+        });
+    if (multiInputFiles.size() == 0) {
+      // failed to read input files
+      slog::err << "Failed to read input files" << slog::endl;
+      return 1;
+    }
+
+    uint32_t num_batches = 1;
+
+    // ----------------- 2. Loading the Inference Engine -----------------------------------------------------------
+    next_step();
+
+    // Get optimal runtime parameters for device
+    std::string device_name = "HETERO:FPGA";
+    ov::Core core(FLAGS_plugins);
+
+    if (device_name.find("FPGA") != std::string::npos) {
+      if (FLAGS_encryption_key != "") {
+        core.set_property("FPGA", {{DLIAPlugin::properties::encryption_key.name(), FLAGS_encryption_key}});
+      }
+      if (FLAGS_encryption_iv != "") {
+        core.set_property("FPGA", {{DLIAPlugin::properties::encryption_iv.name(), FLAGS_encryption_iv}});
+      }
+    }
+
+    slog::info << "OpenVINO: " << ov::get_openvino_version() << slog::endl;
+
+    // ----------------- 3. Setting device configuration -----------------------------------------------------------
+    next_step();
+
+    size_t batchSize = 1;
+    std::vector<std::string> topology_names;
+    if (!isNetworkCompiled) {
+    } else {
+      next_step();
+      slog::info << "Skipping the step for compiled network" << slog::endl;
+      next_step();
+      slog::info << "Skipping the step for compiled network" << slog::endl;
+      next_step();
+      slog::info << "Skipping the step for compiled network" << slog::endl;
+      // ----------------- 7. Loading the model to the device --------------------------------------------------------
+      next_step();
+
+      int folding_option = 1;
+      bool fold_preprocessing = false;
+      bool enable_early_access = false;
+      if (FLAGS_folding_option) {
+        folding_option = FLAGS_folding_option;
+      }
+      if (FLAGS_fold_preprocessing) {
+        fold_preprocessing = FLAGS_fold_preprocessing;
+      }
+      if (FLAGS_enable_early_access) {
+        enable_early_access = FLAGS_enable_early_access;
+      }
+      core.set_property("FPGA", {{DLIAPlugin::properties::folding_option.name(), std::to_string(folding_option)}});
+      core.set_property("FPGA",
+                        {{DLIAPlugin::properties::fold_preprocessing.name(), fold_preprocessing}});
+      core.set_property("FPGA",
+                        {{DLIAPlugin::properties::enable_early_access.name(), enable_early_access}});
+
+      auto compiled_graph_paths = split(FLAGS_cm, MULTIGRAPH_SEP);
+      exeNetworks = vectorMapWithIndex<ov::CompiledModel*>(
+          split(FLAGS_cm, MULTIGRAPH_SEP),  // get a list of compiled graphs
+          [&](const std::string& compiled_graph_path, size_t index) {
+            std::stringstream generated_name;
+            generated_name << "Graph_" << index;
+            slog::info << "Importing model from " << compiled_graph_paths[index] << " to " << device_name << " as "
+                       << generated_name.str() << slog::endl;
+            std::filebuf objFileBuf;
+            objFileBuf.open(compiled_graph_paths[index].c_str(), std::ios::in | std::ios::binary);
+            std::istream objIstream(&objFileBuf);
+            auto exeNetwork = new ov::CompiledModel();
+            *exeNetwork = core.import_model(objIstream, device_name, {});
+            topology_names.push_back(generated_name.str());
+            objFileBuf.close();
+            printInputAndOutputsInfoShort(*exeNetwork);
+            if (batchSize == 0) {
+              batchSize = 1;
+            }
+            const auto& inputs = exeNetwork->inputs();
+            for (const auto& item : inputs) {
+              auto& dims = item.get_shape();
+              if (dims[0] != batchSize) {
+                slog::err << "Batch size of the compiled model is " << dims[0] << " and batch size provided is "
+                          << batchSize << slog::endl;
+                std::cout << "Set the same batch size = " << dims[0] << " when running the app" << std::endl;
+                std::cout << "Or recompile model with batch size = " << batchSize << std::endl;
+                exit(5);
+              }
+            }
+            return exeNetwork;
+          });
+    }
+    // ----------------- 8. Setting optimal runtime parameters -----------------------------------------------------
+    next_step();
+
+    // Number of requests
+    uint32_t nireq = 1;
+    if (nireq == 0) {
+      nireq = 1;
+    }
+    int niter = 1;
+
+    if (niter > 0) {
+      num_batches = niter;
+    }
+
+    // ----------------- 9. Creating infer requests and filling input blobs ----------------------------------------
+    next_step();
+    std::vector<dla_benchmark::InputsInfo> inputInfos;
+    // Data structure hierarchy
+    // Outermost vec: which model it corresponds to (multigraph)
+    // Map: input/output name and its corresponding TensorVector
+    // TensorVector: An alias for vector<ov::tensor> where each vector element correspond to the batch
+    std::vector<std::map<std::string, ov::TensorVector>> inputsData;
+    std::vector<std::map<std::string, ov::TensorVector>> outputTensors(exeNetworks.size());
+
+    std::vector<std::unique_ptr<InferRequestsQueue>> inferRequestsQueues;
+    const std::string resize_type = FLAGS_resize_type.empty() ? "resize" : FLAGS_resize_type;
+    for (size_t netIdx = 0; netIdx < exeNetworks.size(); netIdx++) {
+      // Handle the case that use same inputs for all networks
+      const auto& inputFiles = netIdx >= multiInputFiles.size() ? multiInputFiles.back() : multiInputFiles[netIdx];
+      inputInfos.push_back(GetInputsInfo(batchSize, exeNetworks[netIdx]->inputs(), FLAGS_bin_data));
+      inputsData.push_back(GetStaticTensors(inputFiles.empty() ? std::vector<std::string>{} : inputFiles,
+                                            batchSize,
+                                            inputInfos[netIdx],
+                                            num_batches,
+                                            resize_type,
+                                            FLAGS_bgr,
+                                            FLAGS_bin_data,
+                                            false /* verbose outputs not supported for aot splitter */));
+      // Use unique_ptr to create InferRequestsQueue objects and avoid copying mutex and cv
+      inferRequestsQueues.push_back(
+          std::move(std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(*(exeNetworks[netIdx]), nireq))));
+    }
+
+    /** Start inference & calculate performance **/
+    /** to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
+    std::vector<size_t> iterations(exeNetworks.size(), 0);
+
+    try {
+      {
+        // set up all infer request and prep all i/o Blobs
+        for (size_t net_id = 0; net_id < exeNetworks.size(); net_id++) {
+          for (size_t iireq = 0; iireq < nireq; iireq++) {
+            auto inferRequest = inferRequestsQueues.at(net_id)->get_idle_request();
+            if (!inferRequest) {
+              THROW_IE_EXCEPTION << "No idle Infer Requests!";
+            }
+
+            if (niter != 0LL) {
+              const auto& outputs = exeNetworks[net_id]->outputs();
+              for (const auto& output : outputs) {
+                const std::string& name = output.get_any_name();
+                outputTensors.at(net_id)[name].emplace_back(output.get_element_type(), output.get_shape());
+                inferRequest->set_tensor(name, outputTensors.at(net_id).at(name).at(iterations.at(net_id)));
+              }
+              const auto& inputs = exeNetworks[net_id]->inputs();
+              for (auto& input : inputs) {
+                const std::string& inputName = input.get_any_name();
+                const auto& data = inputsData.at(net_id).at(inputName)[iterations.at(net_id)];
+                inferRequest->set_tensor(inputName, data);
+              }
+            }
+
+            {
+              std::cout << "Generating Artifacts" << std::endl;
+              inferRequest->infer();
+            }
+          }
+        }
+      }
+    } catch (const std::exception& ex) {
+      std::cerr << ex.what() << std::endl;
+      slog::err << "Generation failed" << slog::endl;
+      return_code = 1;
+    }
+
+    if (return_code) return return_code;
+  } catch (const std::exception& ex) {
+    slog::err << ex.what() << slog::endl;
+    return 3;
+  }
+
+  return 0;
+}
author	Eric Dao <eric@erickhangdao.com>	2025-03-10 17:54:31 -0400
committer	Eric Dao <eric@erickhangdao.com>	2025-03-10 17:54:31 -0400
commit	ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch)
tree	a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/runtime/dla_aot_splitter
parent	40da1752f2c8639186b72f6838aa415e854d0b1d (diff)
download	thesis-master.tar.gz thesis-master.tar.bz2 thesis-master.zip