summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin')
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt113
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h38
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h49
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h79
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h81
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h80
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml18
-rwxr-xr-xpython/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml22
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg4
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp117
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp68
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp67
-rw-r--r--python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp89
13 files changed, 825 insertions, 0 deletions
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt
new file mode 100644
index 0000000..6f5e916
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt
@@ -0,0 +1,113 @@
+cmake_minimum_required(VERSION 3.10)
+
+add_library(dla_aot_splitter_plugin SHARED)
+
+target_compile_features(dla_aot_splitter_plugin PUBLIC cxx_std_11)
+
+target_compile_definitions(dla_aot_splitter_plugin PUBLIC DISABLE_JIT)
+
+set_target_properties(dla_aot_splitter_plugin PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+if (WIN32)
+ # Fix warning C4273: inconsistent dll linkage
+ target_compile_definitions(dla_aot_splitter_plugin PRIVATE XBYAK_NO_OP_NAMES
+ IMPLEMENT_INFERENCE_ENGINE_PLUGIN
+ $<TARGET_PROPERTY:openvino::runtime,INTERFACE_COMPILE_DEFINITIONS>)
+endif()
+
+target_include_directories(dla_aot_splitter_plugin PRIVATE
+ $ENV{COREDLA_ROOT}/dla_plugin
+ $ENV{COREDLA_ROOT}/dla_plugin/inc
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia
+ $ENV{COREDLA_ROOT}/util/inc # dla_error.h
+ $ENV{COREDLA_ROOT}/inc # dla_dma_constants.svh
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc # For abstract classes (BatchJob, Device etc.)
+ #
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc
+)
+
+target_sources(dla_aot_splitter_plugin PRIVATE
+##
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_async_infer_request.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_config.hpp
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_compiled_model.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_runtime_log.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_infer_request.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_plugin.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_utils.h
+ $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_plugin_config.hpp
+##
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dla_async_infer_request.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dla_config.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dla_compiled_model.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_infer_request.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_plugin.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dla_plugin_jit_functions.cpp
+ $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_utils.cpp
+ $ENV{COREDLA_ROOT}/util/src/dla_numeric_utils.cpp
+##
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/graph_job.h
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/batch_job.h
+ $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/device.h
+##
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_graph_job.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_device.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_batch_job.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/dla_aot_utils.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_graph_job.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_device.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_batch_job.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_utils.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_structs.h
+)
+
+if (WIN32)
+ target_link_libraries(dla_aot_splitter_plugin
+ PRIVATE
+##
+ dla_op_transformation
+ dliaPluginIOTransformations
+ openvino::runtime
+ openvino_dev_api
+ ${TBB_IMPORTED_TARGETS}
+)
+else()
+ target_link_libraries(dla_aot_splitter_plugin
+ PRIVATE
+##
+ pthread
+ dla_op_transformation
+ dliaPluginIOTransformations
+ openvino::runtime
+ openvino_dev_api
+ ${TBB_IMPORTED_TARGETS}
+)
+endif()
+
+if (DISABLE_JIT)
+ target_include_directories(dla_aot_splitter_plugin PRIVATE
+ $ENV{COREDLA_ROOT}/util/inc
+ $ENV{COREDLA_XUTIL_DIR}/compiled_result/inc
+ )
+ target_sources(dla_aot_splitter_plugin PRIVATE $ENV{COREDLA_XUTIL_DIR}/compiled_result/src/compiled_result_reader_writer.cpp)
+
+ if (EXISTS $ENV{COREDLA_ROOT}/inc)
+ target_include_directories(dla_aot_splitter_plugin PUBLIC $ENV{COREDLA_ROOT}/inc)
+ else()
+ target_include_directories(dla_aot_splitter_plugin PUBLIC $ENV{COREDLA_ROOT}/build/coredla/dla/inc)
+ endif()
+else()
+ target_link_libraries(dla_aot_splitter_plugin
+ PRIVATE
+ dla_compiled_result
+ archparam
+ )
+endif()
+
+set_target_properties(dliaPluginIOTransformations PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+if (WIN32)
+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins_aot_splitter_win.xml ${CMAKE_CURRENT_BINARY_DIR}/plugins_aot_splitter.xml COPYONLY)
+else()
+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins_aot_splitter.xml ${CMAKE_CURRENT_BINARY_DIR}/ COPYONLY)
+endif()
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h
new file mode 100644
index 0000000..697b5d2
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h
@@ -0,0 +1,38 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#ifndef _DLA_AOT_STRUCTS_H_
+#define _DLA_AOT_STRUCTS_H_
+
+#include "compiled_result.h"
+
+// Custom type
+typedef unsigned char uint8_t;
+
+// All size and offset fields are in bytes.
+typedef struct {
+ const dla::CompiledResult* compiled_result;
+ uint32_t config_buffer_size;
+ uint32_t filter_bias_scale_buffer_size;
+ uint8_t *input_feature_buffer;
+ uint32_t input_feature_buffer_size;
+ uint32_t output_feature_buffer_size;
+ uint32_t intermediate_feature_buffer_size;
+} DLAInput;
+
+typedef struct {
+ // Its size is output_feature_buffer_size in DLAInput.
+ uint8_t *output_feature_buffer;
+} DLAOutput;
+
+#endif // _DLA_REF_H_
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h
new file mode 100644
index 0000000..7fa23e8
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h
@@ -0,0 +1,49 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+#ifndef _DLA_AOT_UTILS_H_
+#define _DLA_AOT_UTILS_H_
+
+#include <fcntl.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+#include <sys/stat.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "dla_aot_structs.h"
+
+using google::protobuf::io::FileInputStream;
+
+// fp16 feature element (in bytes)
+// TODO: extract it from arch / compiled result
+const uint32_t feature_elem_size = 2;
+
+//////////////////////////////////////////////////////////////////////////////
+// Dump DLA input and output to the following files:
+// - config_filter.mem: config + filter buffer
+// - input_feature.mem: input feature buffer
+// - output_feature.mem: output feature buffer (emulation results)
+//
+// Each .mem file is a text file, with one byte (in hex) per line.
+//////////////////////////////////////////////////////////////////////////////
+
+void writeInputOutputToFiles(const std::vector<int>& arch_hash,
+ const std::string& build_version,
+ const std::string& arch_name,
+ const DLAInput& input,
+ const DLAOutput& output);
+
+#endif // _DLA_AOT_UTILS_H_
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h
new file mode 100644
index 0000000..dd8e5fa
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h
@@ -0,0 +1,79 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+#ifndef RAW_BATCH_JOB_H
+#define RAW_BATCH_JOB_H
+
+#include <assert.h>
+#include <cstdio>
+#if defined(_WIN32) || defined(_WIN64)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <thread>
+#include <memory>
+
+#include "batch_job.h"
+#include "dla_aot_structs.h"
+#include "raw_device.h"
+
+// RawBatchJob represents one batch execution
+// Contains functions to start DLA
+class RawBatchJob : public BatchJob {
+ private:
+ const CompiledResult* compiledResult;
+ DLAInput* dlaBuffers_;
+ DLAOutput output_;
+ int instance_;
+ uint32_t debugLevel_;
+ std::string AES_key_;
+ std::string IV_key_;
+ bool encryption_enabled_;
+ RawBatchJob(const CompiledResult* compiledResult,
+ DLAInput* dlaBuffers,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled);
+
+ public:
+ RawBatchJob(const RawBatchJob&) = delete;
+ RawBatchJob(RawBatchJob&) = delete;
+ RawBatchJob& operator=(const RawBatchJob&) = delete;
+ static unique_ptr<BatchJob> MakeUnique(const CompiledResult* compiledResult,
+ DLAInput* dlaBuffers,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled);
+ // @param inputArray - ptr to CPU array containing input data tp be copied to DDR
+ // blocking function
+ void LoadInputFeatureToDDR(void* inputArray);
+ // Starts DLA by writing to CSR in DLA DMA; the DDR addresses of graph config and input data
+ void StartDla() override;
+ // @param outputArray - ptr to CPU array where the output data in DDR is copied into
+ // outputArray must be allocated by the caller (size >= output_size_ddr)
+ // blocking function
+ void ReadOutputFeatureFromDDR(void* outputArray) const;
+ void ScheduleInputFeature() const {}
+};
+
+#endif
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h
new file mode 100644
index 0000000..168707e
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h
@@ -0,0 +1,81 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+#ifndef RAW_DEVICE_H
+#define RAW_DEVICE_H
+
+#include <assert.h>
+#include <chrono>
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <thread>
+#include <vector>
+#include <map>
+#include "arch_params.h"
+#include "compiled_result.h"
+#include "device.h"
+using namespace std;
+using namespace dla;
+class GraphJob;
+
+class RawDevice : public Device {
+ public:
+ GraphJob* CreateGraphJob(const CompiledResult* compiledResult,
+ size_t numPipelines,
+ int instance,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled,
+ const std::string export_dir,
+ const std::string parameter_rom_export_dir);
+ // Return number of DLA jobs completed till now
+ // Used for debugging
+ int GetNumInferencesCompleted(int instance) const override;
+ // Must be called when there are no active jobs on DLA
+ // Returns the total time taken by DLA jobs on hardware (in milliseconds)
+ double GetActiveHWTimeMs(int instance) const override;
+ // Must be called when there are no active jobs on DLA
+ // Returns the average of time taken per job (in milliseconds)
+ // Avg Time per job < Active Time
+ double GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const override;
+ RawDevice(const arch_params* archParams);
+ void WaitForDla(int instance,
+ size_t threadId = 0,
+ std::function<bool()> isCancelled = nullptr) override; // threadId is for debugging purpose only
+ std::string SchedulerGetStatus() const override { return ""; }
+ bool InitializeScheduler(uint32_t sourceBufferSize,
+ uint32_t dropSourceBuffers,
+ uint32_t numInferenceRequests,
+ const std::string source_fifo_file = "") override {
+ return true;
+ }
+ int GetNumInstances() const override { return numInstances_; }
+ int GetSizeCsrDescriptorQueue() const override { return -1; } // meaningless here
+ double GetCoreDlaClockFreq() const override { return -1.0; } // meaningless here
+ std::map<std::string, uint64_t> ReadDebugNetwork(int instance) const override {
+ return std::map<std::string, uint64_t>();
+ };
+ uint64_t GetNumInputFeatureMemoryReads(int instance) const override { return 0; };
+ uint64_t GetNumFilterMemoryReads(int instance) const override {return 0; };
+ uint64_t GetNumOutputFeatureMemoryWrites(int instance) const override {return 0; };
+
+ private:
+ RawDevice() = delete;
+ vector<unique_ptr<GraphJob>> allGraphJobs_;
+ int numInstances_;
+ const arch_params* archParams_;
+};
+
+#endif // REF_DEVCE_H
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h
new file mode 100644
index 0000000..38ad075
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h
@@ -0,0 +1,80 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+#ifndef RAW_GRAPH_JOB_H
+#define RAW_GRAPH_JOB_H
+
+#include <assert.h>
+#include <cstdio>
+#include <memory>
+#include <vector>
+#if defined(_WIN32) || defined(_WIN64)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <thread>
+#include "compiled_result.h"
+
+#include "dla_aot_structs.h"
+#include "graph_job.h"
+#include "raw_batch_job.h"
+#include "raw_device.h"
+using namespace dla;
+/*! RawGraphJob is a DLA compiled graph loaded onto a emulation device
+ * Initialized with Emulator Device object
+ * RawGraphJob stores arrays filter, bias, config, inputs and outputs
+ * It provides handle to "batch job" objects that are used to load input and start DLA for one batch
+ */
+class RawGraphJob : public GraphJob {
+ public:
+ static unique_ptr<GraphJob> MakeUnique(const arch_params* archParams,
+ const CompiledResult* compiled_result,
+ size_t numPipelines,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled);
+ // Returns an unused batch job object
+ // If all batch jobs are used, returns null
+ // Increments batchJobsRequested_
+ // Thread safe
+ BatchJob* GetBatchJob();
+ RawGraphJob(const GraphJob&) = delete;
+ RawGraphJob(RawGraphJob&) = delete;
+ RawGraphJob& operator=(const RawGraphJob&) = delete;
+
+ private:
+ DLAInput dlaBuffers_;
+ vector<unique_ptr<BatchJob>> batchJobs_;
+ int instance_;
+ uint32_t debugLevel_;
+ unsigned int batchJobsRequested_;
+ std::mutex graphJobMutex;
+ RawGraphJob(const arch_params* archParams,
+ const CompiledResult* compiledResult,
+ size_t numPipelines,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled);
+};
+
+#endif
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml
new file mode 100644
index 0000000..2f2d24e
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml
@@ -0,0 +1,18 @@
+<ie>
+ <plugins>
+ <plugin name="GNA" location="libopenvino_intel_gna_plugin.so">
+ </plugin>
+ <plugin name="HETERO" location="libcoreDLAHeteroPlugin.so">
+ </plugin>
+ <plugin name="CPU" location="libopenvino_intel_cpu_plugin.so">
+ </plugin>
+ <plugin name="MULTI" location="libopenvino_auto_plugin.so">
+ </plugin>
+ <plugin name="GPU" location="libopenvino_intel_gpu_plugin.so">
+ </plugin>
+ <plugin name="MYRIAD" location="libopenvino_intel_myriad_plugin.so">
+ </plugin>
+ <plugin name="FPGA" location="libdla_aot_splitter_plugin.so">
+ </plugin>
+ </plugins>
+</ie>
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml
new file mode 100755
index 0000000..aeeedde
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml
@@ -0,0 +1,22 @@
+<ie>
+ <plugins>
+ <plugin name="AUTO" location="openvino_auto_plugin.dll">
+ </plugin>
+ <plugin name="BATCH" location="openvino_auto_batch_plugin.dll">
+ </plugin>
+ <plugin name="CPU" location="openvino_intel_cpu_plugin.dll">
+ </plugin>
+ <plugin name="GNA" location="openvino_intel_gna_plugin.dll">
+ </plugin>
+ <plugin name="GPU" location="openvino_intel_gpu_plugin.dll">
+ </plugin>
+ <plugin name="HETERO" location="coreDLAHeteroPlugin.dll">
+ </plugin>
+ <plugin name="MULTI" location="openvino_auto_plugin.dll">
+ </plugin>
+ <plugin name="MYRIAD" location="openvino_intel_myriad_plugin.dll">
+ </plugin>
+ <plugin name="FPGA" location="dla_aot_splitter_plugin.dll">
+ </plugin>
+ </plugins>
+</ie>
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg
new file mode 100644
index 0000000..3288819
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg
@@ -0,0 +1,4 @@
+filter=-build/header_guard,-runtime/explicit,-build/include_subdir,-runtime/references,-build/c++11,-runtime/int
+exclude_files=^(?!pe_array_sim.cpp).*\.cpp
+linelength=160
+headers=h,hpp
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp
new file mode 100644
index 0000000..4317201
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp
@@ -0,0 +1,117 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ This file contains some helper utilities to output coredla data blobs to files
+ in the current working directory
+*/
+
+#include "dla_aot_utils.h"
+
+// The resulting file is expected to be consumed by RTL testbench or hardware.
+static void writeBufferToBinFile(const uint8_t *buffer, uint32_t buffer_size,
+ const char *file_path) {
+ FILE *fp = fopen(file_path, "wb");
+ assert(nullptr != fp);
+
+ if (buffer_size && !fwrite(buffer, buffer_size, 1, fp))
+ {
+ std::cout << "ERROR writing to output file " << file_path << std::endl;
+ }
+
+ fclose(fp);
+}
+
+// The resulting file is expected to be consumed by RTL testbench or hardware.
+static void writeBufferToFile(const uint8_t *buffer, uint32_t buffer_size,
+ const char *file_path) {
+ FILE *fp = fopen(file_path, "w");
+ assert(nullptr != fp);
+
+ // Write buffer size (in bytes) to the first line
+ for (uint32_t b = 0; b < buffer_size; b+=4) {
+ if (b && ((b % 128) == 0))
+ {
+ fprintf(fp, "\n");
+ }
+ fprintf(fp, "0x%08x", *((uint32_t*)&buffer[b]));
+ if(b + 4 < buffer_size)
+ {
+ fprintf(fp, ",");
+ }
+ }
+
+ fclose(fp);
+}
+
+// Create all files that the splitter is responsible for
+void writeInputOutputToFiles (
+ const std::vector<int>& arch_hash,
+ const std::string& build_version,
+ const std::string& arch_name,
+ const DLAInput &input,
+ const DLAOutput &output
+) {
+ uint8_t arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE + ARCH_NAME_SIZE];
+
+ memset(&arch_build[0], 0, ARCH_HASH_SIZE + BUILD_VERSION_SIZE);
+ memcpy(&arch_build[0], arch_hash.data(), ARCH_HASH_SIZE);
+ memcpy(&arch_build[ARCH_HASH_SIZE], build_version.c_str(), std::min(build_version.length(),static_cast<size_t>(BUILD_VERSION_SIZE)));
+ memcpy(&arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE], arch_name.c_str(), std::min(arch_name.length(),static_cast<size_t>(ARCH_NAME_SIZE)));
+ writeBufferToFile(arch_build,
+ sizeof(arch_build),
+ "arch_build.mem");
+ writeBufferToFile(arch_build,
+ sizeof(arch_build),
+ "arch_build.bin");
+ const auto &config_fbs_buffer =
+ input.compiled_result->get_config_filter_bias_scale_array();
+
+ // Only dump filters and config memory file when they are saved in DDR
+ if (!input.compiled_result->get_ddrfree_header().enable_parameter_rom) {
+ writeBufferToFile(&(config_fbs_buffer[0][0]),
+ input.config_buffer_size,
+ "config.mem");
+ writeBufferToBinFile(&(config_fbs_buffer[0][0]),
+ input.config_buffer_size,
+ "config.bin");
+ writeBufferToFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size,
+ input.filter_bias_scale_buffer_size,
+ "filter.mem");
+ writeBufferToBinFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size,
+ input.filter_bias_scale_buffer_size,
+ "filter.bin");
+ } else {
+ std::cout << "Graph filters and DLA configs are not dumped because parameter ROM is enabled in the AOT file." << std::endl;
+ }
+ uint8_t* input_buffer = nullptr;
+ size_t input_size = 0;
+ if (input.input_feature_buffer) {
+ input_buffer = input.input_feature_buffer;
+ input_size = input.input_feature_buffer_size;
+ }
+ writeBufferToFile(input_buffer,
+ input_size,
+ "input.mem");
+ writeBufferToBinFile(input_buffer,
+ input_size,
+ "input.bin");
+ uint32_t inter_size = input.intermediate_feature_buffer_size;
+ writeBufferToFile((const uint8_t*)&inter_size,
+ sizeof(inter_size),
+ "inter_size.mem");
+ uint32_t output_size = input.output_feature_buffer_size;
+ writeBufferToFile((const uint8_t*)&output_size,
+ sizeof(output_size),
+ "output_size.mem");
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp
new file mode 100644
index 0000000..23247d5
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp
@@ -0,0 +1,68 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+ used by dliaPlugin to mimic a inference flow without actually providing a
+ inference. It is used to get the transformed input performed by the dliaPlugin
+ upper layers
+*/
+
+#include "raw_batch_job.h"
+#include "dla_aot_utils.h"
+
+unique_ptr<BatchJob> RawBatchJob::MakeUnique(const CompiledResult * compiledResult,
+ DLAInput* dlaBuffers,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled) {
+ return unique_ptr<BatchJob>(new RawBatchJob(compiledResult, dlaBuffers, instance, debugLevel, AES_key, IV_key, encryption_enabled));
+}
+
+RawBatchJob::RawBatchJob(const CompiledResult * compiledResult,
+ DLAInput* dlaBuffers,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled) : compiledResult(compiledResult) {
+ dlaBuffers_ = dlaBuffers;
+ instance_ = instance;
+ debugLevel_= debugLevel;
+ AES_key_ = AES_key;
+ IV_key_ = IV_key;
+ encryption_enabled_ = encryption_enabled;
+ output_.output_feature_buffer = new uint8_t[dlaBuffers_->output_feature_buffer_size];
+ memset(output_.output_feature_buffer, 0, dlaBuffers_->output_feature_buffer_size);
+ assert(nullptr != output_.output_feature_buffer);
+}
+
+// Emulation device has no DDR. This function is just storing a pointer to the array
+// Note: inputAray should not be deleted until the end of the Emulation runs
+// i.e. StartDla completes
+void RawBatchJob::LoadInputFeatureToDDR(void* inputArray) {
+ dlaBuffers_->input_feature_buffer = (uint8_t*) inputArray;
+ StartDla();
+}
+
+void RawBatchJob::StartDla() {
+ // Write input / output buffers to files
+ writeInputOutputToFiles(compiledResult->get_arch_hash(), compiledResult->get_build_version_string(), compiledResult->get_arch_name(), *dlaBuffers_, output_);
+}
+
+// Emulation device has no DDR. Output is copied into the outputArray.
+void RawBatchJob::ReadOutputFeatureFromDDR(void* outputArray) const {
+ memcpy(outputArray, output_.output_feature_buffer, dlaBuffers_->output_feature_buffer_size);
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp
new file mode 100644
index 0000000..0b8e838
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp
@@ -0,0 +1,67 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+ used by dliaPlugin to mimic a inference flow without actually providing a
+ inference. It is used to get the transformed input performed by the dliaPlugin
+ upper layers
+*/
+
+#include "raw_device.h"
+#include "raw_graph_job.h"
+unique_ptr<Device> Device::MakeUnique(const arch_params* archParams,
+ uint32_t waitForDlaTimeoutSeconds) {
+ return unique_ptr<Device>(new RawDevice(archParams));
+}
+
+RawDevice::RawDevice(const arch_params* archParams) {
+ numInstances_ = 1;
+ archParams_ = archParams;
+}
+
+GraphJob* RawDevice::CreateGraphJob(const CompiledResult * compiledResult,
+ size_t numPipelines,
+ int instance,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled,
+ const std::string export_dir,
+ const std::string parameter_rom_export_dir)
+{
+ (void) export_dir; // unused in HW runtime. CoreDLA utilizes base pointers, which the SW reference utilizes this variable. We void it here.
+ (void) parameter_rom_export_dir;
+ assert(instance < numInstances_);
+ allGraphJobs_.push_back(move(RawGraphJob::MakeUnique(archParams_, compiledResult, numPipelines, instance, 0,
+ AES_key, IV_key, encryption_enabled)));
+ return (allGraphJobs_.back()).get();
+}
+
+void RawDevice::WaitForDla(int instance, size_t threadId/* = 0 */, std::function<bool()> isCancelled) {
+ //RawDevice does not do any real work. No need to wait
+}
+
+int RawDevice::GetNumInferencesCompleted(int instance) const {
+ std::cout << "This function, GetNumInferencesCompleted, is not implemented for raw device" << std::endl;
+ return 0;
+}
+
+double RawDevice::GetActiveHWTimeMs(int instance) const {
+ std::cout << "This function, GetActiveHWTimeMs, is not implemented for raw device" << std::endl;
+ return 0;
+}
+
+double RawDevice::GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const {
+ std::cout << "This function, GetAvgHWTimePerJobMs, is not implemented for raw device" << std::endl;
+ return 0;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp
new file mode 100644
index 0000000..c698110
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp
@@ -0,0 +1,89 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+ used by dliaPlugin to mimic a inference flow without actually providing a
+ inference. It is used to get the transformed input performed by the dliaPlugin
+ upper layers
+*/
+
+#include "raw_graph_job.h"
+#include "dla_aot_utils.h"
+#include <fstream>
+#include "dla_defines.h"
+
+unique_ptr<GraphJob> RawGraphJob::MakeUnique(const arch_params* archParams,
+ const CompiledResult * compiledResult,
+ size_t numPipelines,
+ int instance,
+ uint32_t debugLevel = 0,
+ std::string AES_key = "",
+ std::string IV_key = "",
+ bool encryption_enabled = false)
+{
+ return unique_ptr<GraphJob>(new RawGraphJob(archParams, compiledResult, numPipelines, instance, debugLevel, AES_key, IV_key, encryption_enabled));
+}
+
+RawGraphJob::RawGraphJob(const arch_params* archParams,
+ const CompiledResult * compiledResult,
+ size_t numPipelines,
+ int instance,
+ uint32_t debugLevel,
+ std::string AES_key,
+ std::string IV_key,
+ bool encryption_enabled)
+{
+ assert(numPipelines);
+ instance_ = instance;
+ debugLevel_ = debugLevel;
+ batchJobsRequested_ = 0;
+ // input feature buffer size
+ // TODO: support multi-input graph
+ dlaBuffers_.input_feature_buffer_size =
+ compiledResult->get_conv_input_size_in_bytes();
+ // input feature buffer to be allocated outside this routine
+
+ // output buffer size
+ dlaBuffers_.output_feature_buffer_size =
+ compiledResult->get_conv_output_size_in_bytes();
+
+ // intermediate buffer size
+ dlaBuffers_.intermediate_feature_buffer_size =
+ compiledResult->get_conv_intermediate_size_in_bytes();
+
+ // config and filter buffer size
+ size_t num_config_words = compiledResult->get_num_config_words();
+ dlaBuffers_.config_buffer_size = num_config_words * CONFIG_WORD_SIZE;
+ dlaBuffers_.filter_bias_scale_buffer_size =
+ compiledResult->get_total_filter_bias_scale_buffer_size();
+ // store a pointer to CompiledResult to use config and filter buffer directly without copying
+ dlaBuffers_.compiled_result = compiledResult;
+ for(size_t i = 0; i < numPipelines; i++) {
+ batchJobs_.push_back(move(RawBatchJob::MakeUnique(compiledResult, &dlaBuffers_, instance_, debugLevel_, AES_key, IV_key, encryption_enabled)));
+ }
+
+ dlaBuffers_.input_feature_buffer = NULL;
+}
+
+BatchJob* RawGraphJob::GetBatchJob() {
+ graphJobMutex.lock();
+ if(batchJobsRequested_ >= batchJobs_.size()) {
+ graphJobMutex.unlock();
+ return nullptr;
+ }
+ auto * batchJob = batchJobs_[batchJobsRequested_].get();
+ batchJobsRequested_++;
+ graphJobMutex.unlock();
+ return batchJob;
+}