diff options
Diffstat (limited to 'python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin')
13 files changed, 825 insertions, 0 deletions
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt new file mode 100644 index 0000000..6f5e916 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/CMakeLists.txt @@ -0,0 +1,113 @@ +cmake_minimum_required(VERSION 3.10) + +add_library(dla_aot_splitter_plugin SHARED) + +target_compile_features(dla_aot_splitter_plugin PUBLIC cxx_std_11) + +target_compile_definitions(dla_aot_splitter_plugin PUBLIC DISABLE_JIT) + +set_target_properties(dla_aot_splitter_plugin PROPERTIES POSITION_INDEPENDENT_CODE ON) + +if (WIN32) + # Fix warning C4273: inconsistent dll linkage + target_compile_definitions(dla_aot_splitter_plugin PRIVATE XBYAK_NO_OP_NAMES + IMPLEMENT_INFERENCE_ENGINE_PLUGIN + $<TARGET_PROPERTY:openvino::runtime,INTERFACE_COMPILE_DEFINITIONS>) +endif() + +target_include_directories(dla_aot_splitter_plugin PRIVATE + $ENV{COREDLA_ROOT}/dla_plugin + $ENV{COREDLA_ROOT}/dla_plugin/inc + $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia + $ENV{COREDLA_ROOT}/util/inc # dla_error.h + $ENV{COREDLA_ROOT}/inc # dla_dma_constants.svh + $ENV{COREDLA_ROOT}/runtime/coredla_device/inc # For abstract classes (BatchJob, Device etc.) + # + ${CMAKE_CURRENT_SOURCE_DIR}/inc +) + +target_sources(dla_aot_splitter_plugin PRIVATE +## + $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_async_infer_request.h + $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_config.hpp + $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_compiled_model.h + $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_runtime_log.h + $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_infer_request.h + $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_plugin.h + $ENV{COREDLA_ROOT}/dla_plugin/inc/dlia_utils.h + $ENV{COREDLA_ROOT}/dla_plugin/inc/dla_plugin_config.hpp +## + $ENV{COREDLA_ROOT}/dla_plugin/src/dla_async_infer_request.cpp + $ENV{COREDLA_ROOT}/dla_plugin/src/dla_config.cpp + $ENV{COREDLA_ROOT}/dla_plugin/src/dla_compiled_model.cpp + $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_infer_request.cpp + $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_plugin.cpp + $ENV{COREDLA_ROOT}/dla_plugin/src/dla_plugin_jit_functions.cpp + $ENV{COREDLA_ROOT}/dla_plugin/src/dlia_utils.cpp + $ENV{COREDLA_ROOT}/util/src/dla_numeric_utils.cpp +## + $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/graph_job.h + $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/batch_job.h + $ENV{COREDLA_ROOT}/runtime/coredla_device/inc/device.h +## + ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_graph_job.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/raw_batch_job.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/dla_aot_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_graph_job.h + ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_device.h + ${CMAKE_CURRENT_SOURCE_DIR}/inc/raw_batch_job.h + ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_utils.h + ${CMAKE_CURRENT_SOURCE_DIR}/inc/dla_aot_structs.h +) + +if (WIN32) + target_link_libraries(dla_aot_splitter_plugin + PRIVATE +## + dla_op_transformation + dliaPluginIOTransformations + openvino::runtime + openvino_dev_api + ${TBB_IMPORTED_TARGETS} +) +else() + target_link_libraries(dla_aot_splitter_plugin + PRIVATE +## + pthread + dla_op_transformation + dliaPluginIOTransformations + openvino::runtime + openvino_dev_api + ${TBB_IMPORTED_TARGETS} +) +endif() + +if (DISABLE_JIT) + target_include_directories(dla_aot_splitter_plugin PRIVATE + $ENV{COREDLA_ROOT}/util/inc + $ENV{COREDLA_XUTIL_DIR}/compiled_result/inc + ) + target_sources(dla_aot_splitter_plugin PRIVATE $ENV{COREDLA_XUTIL_DIR}/compiled_result/src/compiled_result_reader_writer.cpp) + + if (EXISTS $ENV{COREDLA_ROOT}/inc) + target_include_directories(dla_aot_splitter_plugin PUBLIC $ENV{COREDLA_ROOT}/inc) + else() + target_include_directories(dla_aot_splitter_plugin PUBLIC $ENV{COREDLA_ROOT}/build/coredla/dla/inc) + endif() +else() + target_link_libraries(dla_aot_splitter_plugin + PRIVATE + dla_compiled_result + archparam + ) +endif() + +set_target_properties(dliaPluginIOTransformations PROPERTIES POSITION_INDEPENDENT_CODE ON) + +if (WIN32) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins_aot_splitter_win.xml ${CMAKE_CURRENT_BINARY_DIR}/plugins_aot_splitter.xml COPYONLY) +else() + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins_aot_splitter.xml ${CMAKE_CURRENT_BINARY_DIR}/ COPYONLY) +endif() diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h new file mode 100644 index 0000000..697b5d2 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h @@ -0,0 +1,38 @@ +// Copyright 2020 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +#ifndef _DLA_AOT_STRUCTS_H_ +#define _DLA_AOT_STRUCTS_H_ + +#include "compiled_result.h" + +// Custom type +typedef unsigned char uint8_t; + +// All size and offset fields are in bytes. +typedef struct { + const dla::CompiledResult* compiled_result; + uint32_t config_buffer_size; + uint32_t filter_bias_scale_buffer_size; + uint8_t *input_feature_buffer; + uint32_t input_feature_buffer_size; + uint32_t output_feature_buffer_size; + uint32_t intermediate_feature_buffer_size; +} DLAInput; + +typedef struct { + // Its size is output_feature_buffer_size in DLAInput. + uint8_t *output_feature_buffer; +} DLAOutput; + +#endif // _DLA_REF_H_ diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h new file mode 100644 index 0000000..7fa23e8 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h @@ -0,0 +1,49 @@ +// Copyright 2020-2023 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +#ifndef _DLA_AOT_UTILS_H_ +#define _DLA_AOT_UTILS_H_ + +#include <fcntl.h> +#include <google/protobuf/io/zero_copy_stream_impl.h> +#include <google/protobuf/text_format.h> +#include <sys/stat.h> + +#include <iostream> +#include <string> +#include <vector> + +#include "dla_aot_structs.h" + +using google::protobuf::io::FileInputStream; + +// fp16 feature element (in bytes) +// TODO: extract it from arch / compiled result +const uint32_t feature_elem_size = 2; + +////////////////////////////////////////////////////////////////////////////// +// Dump DLA input and output to the following files: +// - config_filter.mem: config + filter buffer +// - input_feature.mem: input feature buffer +// - output_feature.mem: output feature buffer (emulation results) +// +// Each .mem file is a text file, with one byte (in hex) per line. +////////////////////////////////////////////////////////////////////////////// + +void writeInputOutputToFiles(const std::vector<int>& arch_hash, + const std::string& build_version, + const std::string& arch_name, + const DLAInput& input, + const DLAOutput& output); + +#endif // _DLA_AOT_UTILS_H_ diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h new file mode 100644 index 0000000..dd8e5fa --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h @@ -0,0 +1,79 @@ +// Copyright 2020-2023 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. +#ifndef RAW_BATCH_JOB_H +#define RAW_BATCH_JOB_H + +#include <assert.h> +#include <cstdio> +#if defined(_WIN32) || defined(_WIN64) +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include <windows.h> +#else +#include <dlfcn.h> +#endif +#include <cstring> +#include <iostream> +#include <string> +#include <thread> +#include <memory> + +#include "batch_job.h" +#include "dla_aot_structs.h" +#include "raw_device.h" + +// RawBatchJob represents one batch execution +// Contains functions to start DLA +class RawBatchJob : public BatchJob { + private: + const CompiledResult* compiledResult; + DLAInput* dlaBuffers_; + DLAOutput output_; + int instance_; + uint32_t debugLevel_; + std::string AES_key_; + std::string IV_key_; + bool encryption_enabled_; + RawBatchJob(const CompiledResult* compiledResult, + DLAInput* dlaBuffers, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled); + + public: + RawBatchJob(const RawBatchJob&) = delete; + RawBatchJob(RawBatchJob&) = delete; + RawBatchJob& operator=(const RawBatchJob&) = delete; + static unique_ptr<BatchJob> MakeUnique(const CompiledResult* compiledResult, + DLAInput* dlaBuffers, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled); + // @param inputArray - ptr to CPU array containing input data tp be copied to DDR + // blocking function + void LoadInputFeatureToDDR(void* inputArray); + // Starts DLA by writing to CSR in DLA DMA; the DDR addresses of graph config and input data + void StartDla() override; + // @param outputArray - ptr to CPU array where the output data in DDR is copied into + // outputArray must be allocated by the caller (size >= output_size_ddr) + // blocking function + void ReadOutputFeatureFromDDR(void* outputArray) const; + void ScheduleInputFeature() const {} +}; + +#endif diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h new file mode 100644 index 0000000..168707e --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h @@ -0,0 +1,81 @@ +// Copyright 2020-2023 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. +#ifndef RAW_DEVICE_H +#define RAW_DEVICE_H + +#include <assert.h> +#include <chrono> +#include <cstdio> +#include <cstring> +#include <iostream> +#include <memory> +#include <string> +#include <thread> +#include <vector> +#include <map> +#include "arch_params.h" +#include "compiled_result.h" +#include "device.h" +using namespace std; +using namespace dla; +class GraphJob; + +class RawDevice : public Device { + public: + GraphJob* CreateGraphJob(const CompiledResult* compiledResult, + size_t numPipelines, + int instance, + std::string AES_key, + std::string IV_key, + bool encryption_enabled, + const std::string export_dir, + const std::string parameter_rom_export_dir); + // Return number of DLA jobs completed till now + // Used for debugging + int GetNumInferencesCompleted(int instance) const override; + // Must be called when there are no active jobs on DLA + // Returns the total time taken by DLA jobs on hardware (in milliseconds) + double GetActiveHWTimeMs(int instance) const override; + // Must be called when there are no active jobs on DLA + // Returns the average of time taken per job (in milliseconds) + // Avg Time per job < Active Time + double GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const override; + RawDevice(const arch_params* archParams); + void WaitForDla(int instance, + size_t threadId = 0, + std::function<bool()> isCancelled = nullptr) override; // threadId is for debugging purpose only + std::string SchedulerGetStatus() const override { return ""; } + bool InitializeScheduler(uint32_t sourceBufferSize, + uint32_t dropSourceBuffers, + uint32_t numInferenceRequests, + const std::string source_fifo_file = "") override { + return true; + } + int GetNumInstances() const override { return numInstances_; } + int GetSizeCsrDescriptorQueue() const override { return -1; } // meaningless here + double GetCoreDlaClockFreq() const override { return -1.0; } // meaningless here + std::map<std::string, uint64_t> ReadDebugNetwork(int instance) const override { + return std::map<std::string, uint64_t>(); + }; + uint64_t GetNumInputFeatureMemoryReads(int instance) const override { return 0; }; + uint64_t GetNumFilterMemoryReads(int instance) const override {return 0; }; + uint64_t GetNumOutputFeatureMemoryWrites(int instance) const override {return 0; }; + + private: + RawDevice() = delete; + vector<unique_ptr<GraphJob>> allGraphJobs_; + int numInstances_; + const arch_params* archParams_; +}; + +#endif // REF_DEVCE_H diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h new file mode 100644 index 0000000..38ad075 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h @@ -0,0 +1,80 @@ +// Copyright 2020-2023 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. +#ifndef RAW_GRAPH_JOB_H +#define RAW_GRAPH_JOB_H + +#include <assert.h> +#include <cstdio> +#include <memory> +#include <vector> +#if defined(_WIN32) || defined(_WIN64) +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include <windows.h> +#else +#include <dlfcn.h> +#endif +#include <cstring> +#include <iostream> +#include <string> +#include <thread> +#include "compiled_result.h" + +#include "dla_aot_structs.h" +#include "graph_job.h" +#include "raw_batch_job.h" +#include "raw_device.h" +using namespace dla; +/*! RawGraphJob is a DLA compiled graph loaded onto a emulation device + * Initialized with Emulator Device object + * RawGraphJob stores arrays filter, bias, config, inputs and outputs + * It provides handle to "batch job" objects that are used to load input and start DLA for one batch + */ +class RawGraphJob : public GraphJob { + public: + static unique_ptr<GraphJob> MakeUnique(const arch_params* archParams, + const CompiledResult* compiled_result, + size_t numPipelines, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled); + // Returns an unused batch job object + // If all batch jobs are used, returns null + // Increments batchJobsRequested_ + // Thread safe + BatchJob* GetBatchJob(); + RawGraphJob(const GraphJob&) = delete; + RawGraphJob(RawGraphJob&) = delete; + RawGraphJob& operator=(const RawGraphJob&) = delete; + + private: + DLAInput dlaBuffers_; + vector<unique_ptr<BatchJob>> batchJobs_; + int instance_; + uint32_t debugLevel_; + unsigned int batchJobsRequested_; + std::mutex graphJobMutex; + RawGraphJob(const arch_params* archParams, + const CompiledResult* compiledResult, + size_t numPipelines, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled); +}; + +#endif diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml new file mode 100644 index 0000000..2f2d24e --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter.xml @@ -0,0 +1,18 @@ +<ie> + <plugins> + <plugin name="GNA" location="libopenvino_intel_gna_plugin.so"> + </plugin> + <plugin name="HETERO" location="libcoreDLAHeteroPlugin.so"> + </plugin> + <plugin name="CPU" location="libopenvino_intel_cpu_plugin.so"> + </plugin> + <plugin name="MULTI" location="libopenvino_auto_plugin.so"> + </plugin> + <plugin name="GPU" location="libopenvino_intel_gpu_plugin.so"> + </plugin> + <plugin name="MYRIAD" location="libopenvino_intel_myriad_plugin.so"> + </plugin> + <plugin name="FPGA" location="libdla_aot_splitter_plugin.so"> + </plugin> + </plugins> +</ie> diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml new file mode 100755 index 0000000..aeeedde --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/plugins_aot_splitter_win.xml @@ -0,0 +1,22 @@ +<ie>
+ <plugins>
+ <plugin name="AUTO" location="openvino_auto_plugin.dll">
+ </plugin>
+ <plugin name="BATCH" location="openvino_auto_batch_plugin.dll">
+ </plugin>
+ <plugin name="CPU" location="openvino_intel_cpu_plugin.dll">
+ </plugin>
+ <plugin name="GNA" location="openvino_intel_gna_plugin.dll">
+ </plugin>
+ <plugin name="GPU" location="openvino_intel_gpu_plugin.dll">
+ </plugin>
+ <plugin name="HETERO" location="coreDLAHeteroPlugin.dll">
+ </plugin>
+ <plugin name="MULTI" location="openvino_auto_plugin.dll">
+ </plugin>
+ <plugin name="MYRIAD" location="openvino_intel_myriad_plugin.dll">
+ </plugin>
+ <plugin name="FPGA" location="dla_aot_splitter_plugin.dll">
+ </plugin>
+ </plugins>
+</ie>
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg new file mode 100644 index 0000000..3288819 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg @@ -0,0 +1,4 @@ +filter=-build/header_guard,-runtime/explicit,-build/include_subdir,-runtime/references,-build/c++11,-runtime/int +exclude_files=^(?!pe_array_sim.cpp).*\.cpp +linelength=160 +headers=h,hpp diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp new file mode 100644 index 0000000..4317201 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp @@ -0,0 +1,117 @@ +// Copyright 2020 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/* + This file contains some helper utilities to output coredla data blobs to files + in the current working directory +*/ + +#include "dla_aot_utils.h" + +// The resulting file is expected to be consumed by RTL testbench or hardware. +static void writeBufferToBinFile(const uint8_t *buffer, uint32_t buffer_size, + const char *file_path) { + FILE *fp = fopen(file_path, "wb"); + assert(nullptr != fp); + + if (buffer_size && !fwrite(buffer, buffer_size, 1, fp)) + { + std::cout << "ERROR writing to output file " << file_path << std::endl; + } + + fclose(fp); +} + +// The resulting file is expected to be consumed by RTL testbench or hardware. +static void writeBufferToFile(const uint8_t *buffer, uint32_t buffer_size, + const char *file_path) { + FILE *fp = fopen(file_path, "w"); + assert(nullptr != fp); + + // Write buffer size (in bytes) to the first line + for (uint32_t b = 0; b < buffer_size; b+=4) { + if (b && ((b % 128) == 0)) + { + fprintf(fp, "\n"); + } + fprintf(fp, "0x%08x", *((uint32_t*)&buffer[b])); + if(b + 4 < buffer_size) + { + fprintf(fp, ","); + } + } + + fclose(fp); +} + +// Create all files that the splitter is responsible for +void writeInputOutputToFiles ( + const std::vector<int>& arch_hash, + const std::string& build_version, + const std::string& arch_name, + const DLAInput &input, + const DLAOutput &output +) { + uint8_t arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE + ARCH_NAME_SIZE]; + + memset(&arch_build[0], 0, ARCH_HASH_SIZE + BUILD_VERSION_SIZE); + memcpy(&arch_build[0], arch_hash.data(), ARCH_HASH_SIZE); + memcpy(&arch_build[ARCH_HASH_SIZE], build_version.c_str(), std::min(build_version.length(),static_cast<size_t>(BUILD_VERSION_SIZE))); + memcpy(&arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE], arch_name.c_str(), std::min(arch_name.length(),static_cast<size_t>(ARCH_NAME_SIZE))); + writeBufferToFile(arch_build, + sizeof(arch_build), + "arch_build.mem"); + writeBufferToFile(arch_build, + sizeof(arch_build), + "arch_build.bin"); + const auto &config_fbs_buffer = + input.compiled_result->get_config_filter_bias_scale_array(); + + // Only dump filters and config memory file when they are saved in DDR + if (!input.compiled_result->get_ddrfree_header().enable_parameter_rom) { + writeBufferToFile(&(config_fbs_buffer[0][0]), + input.config_buffer_size, + "config.mem"); + writeBufferToBinFile(&(config_fbs_buffer[0][0]), + input.config_buffer_size, + "config.bin"); + writeBufferToFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size, + input.filter_bias_scale_buffer_size, + "filter.mem"); + writeBufferToBinFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size, + input.filter_bias_scale_buffer_size, + "filter.bin"); + } else { + std::cout << "Graph filters and DLA configs are not dumped because parameter ROM is enabled in the AOT file." << std::endl; + } + uint8_t* input_buffer = nullptr; + size_t input_size = 0; + if (input.input_feature_buffer) { + input_buffer = input.input_feature_buffer; + input_size = input.input_feature_buffer_size; + } + writeBufferToFile(input_buffer, + input_size, + "input.mem"); + writeBufferToBinFile(input_buffer, + input_size, + "input.bin"); + uint32_t inter_size = input.intermediate_feature_buffer_size; + writeBufferToFile((const uint8_t*)&inter_size, + sizeof(inter_size), + "inter_size.mem"); + uint32_t output_size = input.output_feature_buffer_size; + writeBufferToFile((const uint8_t*)&output_size, + sizeof(output_size), + "output_size.mem"); +} diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp new file mode 100644 index 0000000..23247d5 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp @@ -0,0 +1,68 @@ +// Copyright 2022 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/* + The raw_batch_job, raw_graph_job, and raw_device implement the interfaces + used by dliaPlugin to mimic a inference flow without actually providing a + inference. It is used to get the transformed input performed by the dliaPlugin + upper layers +*/ + +#include "raw_batch_job.h" +#include "dla_aot_utils.h" + +unique_ptr<BatchJob> RawBatchJob::MakeUnique(const CompiledResult * compiledResult, + DLAInput* dlaBuffers, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled) { + return unique_ptr<BatchJob>(new RawBatchJob(compiledResult, dlaBuffers, instance, debugLevel, AES_key, IV_key, encryption_enabled)); +} + +RawBatchJob::RawBatchJob(const CompiledResult * compiledResult, + DLAInput* dlaBuffers, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled) : compiledResult(compiledResult) { + dlaBuffers_ = dlaBuffers; + instance_ = instance; + debugLevel_= debugLevel; + AES_key_ = AES_key; + IV_key_ = IV_key; + encryption_enabled_ = encryption_enabled; + output_.output_feature_buffer = new uint8_t[dlaBuffers_->output_feature_buffer_size]; + memset(output_.output_feature_buffer, 0, dlaBuffers_->output_feature_buffer_size); + assert(nullptr != output_.output_feature_buffer); +} + +// Emulation device has no DDR. This function is just storing a pointer to the array +// Note: inputAray should not be deleted until the end of the Emulation runs +// i.e. StartDla completes +void RawBatchJob::LoadInputFeatureToDDR(void* inputArray) { + dlaBuffers_->input_feature_buffer = (uint8_t*) inputArray; + StartDla(); +} + +void RawBatchJob::StartDla() { + // Write input / output buffers to files + writeInputOutputToFiles(compiledResult->get_arch_hash(), compiledResult->get_build_version_string(), compiledResult->get_arch_name(), *dlaBuffers_, output_); +} + +// Emulation device has no DDR. Output is copied into the outputArray. +void RawBatchJob::ReadOutputFeatureFromDDR(void* outputArray) const { + memcpy(outputArray, output_.output_feature_buffer, dlaBuffers_->output_feature_buffer_size); +} diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp new file mode 100644 index 0000000..0b8e838 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp @@ -0,0 +1,67 @@ +// Copyright 2022 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/* + The raw_batch_job, raw_graph_job, and raw_device implement the interfaces + used by dliaPlugin to mimic a inference flow without actually providing a + inference. It is used to get the transformed input performed by the dliaPlugin + upper layers +*/ + +#include "raw_device.h" +#include "raw_graph_job.h" +unique_ptr<Device> Device::MakeUnique(const arch_params* archParams, + uint32_t waitForDlaTimeoutSeconds) { + return unique_ptr<Device>(new RawDevice(archParams)); +} + +RawDevice::RawDevice(const arch_params* archParams) { + numInstances_ = 1; + archParams_ = archParams; +} + +GraphJob* RawDevice::CreateGraphJob(const CompiledResult * compiledResult, + size_t numPipelines, + int instance, + std::string AES_key, + std::string IV_key, + bool encryption_enabled, + const std::string export_dir, + const std::string parameter_rom_export_dir) +{ + (void) export_dir; // unused in HW runtime. CoreDLA utilizes base pointers, which the SW reference utilizes this variable. We void it here. + (void) parameter_rom_export_dir; + assert(instance < numInstances_); + allGraphJobs_.push_back(move(RawGraphJob::MakeUnique(archParams_, compiledResult, numPipelines, instance, 0, + AES_key, IV_key, encryption_enabled))); + return (allGraphJobs_.back()).get(); +} + +void RawDevice::WaitForDla(int instance, size_t threadId/* = 0 */, std::function<bool()> isCancelled) { + //RawDevice does not do any real work. No need to wait +} + +int RawDevice::GetNumInferencesCompleted(int instance) const { + std::cout << "This function, GetNumInferencesCompleted, is not implemented for raw device" << std::endl; + return 0; +} + +double RawDevice::GetActiveHWTimeMs(int instance) const { + std::cout << "This function, GetActiveHWTimeMs, is not implemented for raw device" << std::endl; + return 0; +} + +double RawDevice::GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const { + std::cout << "This function, GetAvgHWTimePerJobMs, is not implemented for raw device" << std::endl; + return 0; +} diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp new file mode 100644 index 0000000..c698110 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp @@ -0,0 +1,89 @@ +// Copyright 2022 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/* + The raw_batch_job, raw_graph_job, and raw_device implement the interfaces + used by dliaPlugin to mimic a inference flow without actually providing a + inference. It is used to get the transformed input performed by the dliaPlugin + upper layers +*/ + +#include "raw_graph_job.h" +#include "dla_aot_utils.h" +#include <fstream> +#include "dla_defines.h" + +unique_ptr<GraphJob> RawGraphJob::MakeUnique(const arch_params* archParams, + const CompiledResult * compiledResult, + size_t numPipelines, + int instance, + uint32_t debugLevel = 0, + std::string AES_key = "", + std::string IV_key = "", + bool encryption_enabled = false) +{ + return unique_ptr<GraphJob>(new RawGraphJob(archParams, compiledResult, numPipelines, instance, debugLevel, AES_key, IV_key, encryption_enabled)); +} + +RawGraphJob::RawGraphJob(const arch_params* archParams, + const CompiledResult * compiledResult, + size_t numPipelines, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled) +{ + assert(numPipelines); + instance_ = instance; + debugLevel_ = debugLevel; + batchJobsRequested_ = 0; + // input feature buffer size + // TODO: support multi-input graph + dlaBuffers_.input_feature_buffer_size = + compiledResult->get_conv_input_size_in_bytes(); + // input feature buffer to be allocated outside this routine + + // output buffer size + dlaBuffers_.output_feature_buffer_size = + compiledResult->get_conv_output_size_in_bytes(); + + // intermediate buffer size + dlaBuffers_.intermediate_feature_buffer_size = + compiledResult->get_conv_intermediate_size_in_bytes(); + + // config and filter buffer size + size_t num_config_words = compiledResult->get_num_config_words(); + dlaBuffers_.config_buffer_size = num_config_words * CONFIG_WORD_SIZE; + dlaBuffers_.filter_bias_scale_buffer_size = + compiledResult->get_total_filter_bias_scale_buffer_size(); + // store a pointer to CompiledResult to use config and filter buffer directly without copying + dlaBuffers_.compiled_result = compiledResult; + for(size_t i = 0; i < numPipelines; i++) { + batchJobs_.push_back(move(RawBatchJob::MakeUnique(compiledResult, &dlaBuffers_, instance_, debugLevel_, AES_key, IV_key, encryption_enabled))); + } + + dlaBuffers_.input_feature_buffer = NULL; +} + +BatchJob* RawGraphJob::GetBatchJob() { + graphJobMutex.lock(); + if(batchJobsRequested_ >= batchJobs_.size()) { + graphJobMutex.unlock(); + return nullptr; + } + auto * batchJob = batchJobs_[batchJobsRequested_].get(); + batchJobsRequested_++; + graphJobMutex.unlock(); + return batchJob; +} |
