diff options
Diffstat (limited to 'python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src')
5 files changed, 345 insertions, 0 deletions
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg new file mode 100644 index 0000000..3288819 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg @@ -0,0 +1,4 @@ +filter=-build/header_guard,-runtime/explicit,-build/include_subdir,-runtime/references,-build/c++11,-runtime/int +exclude_files=^(?!pe_array_sim.cpp).*\.cpp +linelength=160 +headers=h,hpp diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp new file mode 100644 index 0000000..4317201 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp @@ -0,0 +1,117 @@ +// Copyright 2020 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/* + This file contains some helper utilities to output coredla data blobs to files + in the current working directory +*/ + +#include "dla_aot_utils.h" + +// The resulting file is expected to be consumed by RTL testbench or hardware. +static void writeBufferToBinFile(const uint8_t *buffer, uint32_t buffer_size, + const char *file_path) { + FILE *fp = fopen(file_path, "wb"); + assert(nullptr != fp); + + if (buffer_size && !fwrite(buffer, buffer_size, 1, fp)) + { + std::cout << "ERROR writing to output file " << file_path << std::endl; + } + + fclose(fp); +} + +// The resulting file is expected to be consumed by RTL testbench or hardware. +static void writeBufferToFile(const uint8_t *buffer, uint32_t buffer_size, + const char *file_path) { + FILE *fp = fopen(file_path, "w"); + assert(nullptr != fp); + + // Write buffer size (in bytes) to the first line + for (uint32_t b = 0; b < buffer_size; b+=4) { + if (b && ((b % 128) == 0)) + { + fprintf(fp, "\n"); + } + fprintf(fp, "0x%08x", *((uint32_t*)&buffer[b])); + if(b + 4 < buffer_size) + { + fprintf(fp, ","); + } + } + + fclose(fp); +} + +// Create all files that the splitter is responsible for +void writeInputOutputToFiles ( + const std::vector<int>& arch_hash, + const std::string& build_version, + const std::string& arch_name, + const DLAInput &input, + const DLAOutput &output +) { + uint8_t arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE + ARCH_NAME_SIZE]; + + memset(&arch_build[0], 0, ARCH_HASH_SIZE + BUILD_VERSION_SIZE); + memcpy(&arch_build[0], arch_hash.data(), ARCH_HASH_SIZE); + memcpy(&arch_build[ARCH_HASH_SIZE], build_version.c_str(), std::min(build_version.length(),static_cast<size_t>(BUILD_VERSION_SIZE))); + memcpy(&arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE], arch_name.c_str(), std::min(arch_name.length(),static_cast<size_t>(ARCH_NAME_SIZE))); + writeBufferToFile(arch_build, + sizeof(arch_build), + "arch_build.mem"); + writeBufferToFile(arch_build, + sizeof(arch_build), + "arch_build.bin"); + const auto &config_fbs_buffer = + input.compiled_result->get_config_filter_bias_scale_array(); + + // Only dump filters and config memory file when they are saved in DDR + if (!input.compiled_result->get_ddrfree_header().enable_parameter_rom) { + writeBufferToFile(&(config_fbs_buffer[0][0]), + input.config_buffer_size, + "config.mem"); + writeBufferToBinFile(&(config_fbs_buffer[0][0]), + input.config_buffer_size, + "config.bin"); + writeBufferToFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size, + input.filter_bias_scale_buffer_size, + "filter.mem"); + writeBufferToBinFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size, + input.filter_bias_scale_buffer_size, + "filter.bin"); + } else { + std::cout << "Graph filters and DLA configs are not dumped because parameter ROM is enabled in the AOT file." << std::endl; + } + uint8_t* input_buffer = nullptr; + size_t input_size = 0; + if (input.input_feature_buffer) { + input_buffer = input.input_feature_buffer; + input_size = input.input_feature_buffer_size; + } + writeBufferToFile(input_buffer, + input_size, + "input.mem"); + writeBufferToBinFile(input_buffer, + input_size, + "input.bin"); + uint32_t inter_size = input.intermediate_feature_buffer_size; + writeBufferToFile((const uint8_t*)&inter_size, + sizeof(inter_size), + "inter_size.mem"); + uint32_t output_size = input.output_feature_buffer_size; + writeBufferToFile((const uint8_t*)&output_size, + sizeof(output_size), + "output_size.mem"); +} diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp new file mode 100644 index 0000000..23247d5 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp @@ -0,0 +1,68 @@ +// Copyright 2022 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/* + The raw_batch_job, raw_graph_job, and raw_device implement the interfaces + used by dliaPlugin to mimic a inference flow without actually providing a + inference. It is used to get the transformed input performed by the dliaPlugin + upper layers +*/ + +#include "raw_batch_job.h" +#include "dla_aot_utils.h" + +unique_ptr<BatchJob> RawBatchJob::MakeUnique(const CompiledResult * compiledResult, + DLAInput* dlaBuffers, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled) { + return unique_ptr<BatchJob>(new RawBatchJob(compiledResult, dlaBuffers, instance, debugLevel, AES_key, IV_key, encryption_enabled)); +} + +RawBatchJob::RawBatchJob(const CompiledResult * compiledResult, + DLAInput* dlaBuffers, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled) : compiledResult(compiledResult) { + dlaBuffers_ = dlaBuffers; + instance_ = instance; + debugLevel_= debugLevel; + AES_key_ = AES_key; + IV_key_ = IV_key; + encryption_enabled_ = encryption_enabled; + output_.output_feature_buffer = new uint8_t[dlaBuffers_->output_feature_buffer_size]; + memset(output_.output_feature_buffer, 0, dlaBuffers_->output_feature_buffer_size); + assert(nullptr != output_.output_feature_buffer); +} + +// Emulation device has no DDR. This function is just storing a pointer to the array +// Note: inputAray should not be deleted until the end of the Emulation runs +// i.e. StartDla completes +void RawBatchJob::LoadInputFeatureToDDR(void* inputArray) { + dlaBuffers_->input_feature_buffer = (uint8_t*) inputArray; + StartDla(); +} + +void RawBatchJob::StartDla() { + // Write input / output buffers to files + writeInputOutputToFiles(compiledResult->get_arch_hash(), compiledResult->get_build_version_string(), compiledResult->get_arch_name(), *dlaBuffers_, output_); +} + +// Emulation device has no DDR. Output is copied into the outputArray. +void RawBatchJob::ReadOutputFeatureFromDDR(void* outputArray) const { + memcpy(outputArray, output_.output_feature_buffer, dlaBuffers_->output_feature_buffer_size); +} diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp new file mode 100644 index 0000000..0b8e838 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp @@ -0,0 +1,67 @@ +// Copyright 2022 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/* + The raw_batch_job, raw_graph_job, and raw_device implement the interfaces + used by dliaPlugin to mimic a inference flow without actually providing a + inference. It is used to get the transformed input performed by the dliaPlugin + upper layers +*/ + +#include "raw_device.h" +#include "raw_graph_job.h" +unique_ptr<Device> Device::MakeUnique(const arch_params* archParams, + uint32_t waitForDlaTimeoutSeconds) { + return unique_ptr<Device>(new RawDevice(archParams)); +} + +RawDevice::RawDevice(const arch_params* archParams) { + numInstances_ = 1; + archParams_ = archParams; +} + +GraphJob* RawDevice::CreateGraphJob(const CompiledResult * compiledResult, + size_t numPipelines, + int instance, + std::string AES_key, + std::string IV_key, + bool encryption_enabled, + const std::string export_dir, + const std::string parameter_rom_export_dir) +{ + (void) export_dir; // unused in HW runtime. CoreDLA utilizes base pointers, which the SW reference utilizes this variable. We void it here. + (void) parameter_rom_export_dir; + assert(instance < numInstances_); + allGraphJobs_.push_back(move(RawGraphJob::MakeUnique(archParams_, compiledResult, numPipelines, instance, 0, + AES_key, IV_key, encryption_enabled))); + return (allGraphJobs_.back()).get(); +} + +void RawDevice::WaitForDla(int instance, size_t threadId/* = 0 */, std::function<bool()> isCancelled) { + //RawDevice does not do any real work. No need to wait +} + +int RawDevice::GetNumInferencesCompleted(int instance) const { + std::cout << "This function, GetNumInferencesCompleted, is not implemented for raw device" << std::endl; + return 0; +} + +double RawDevice::GetActiveHWTimeMs(int instance) const { + std::cout << "This function, GetActiveHWTimeMs, is not implemented for raw device" << std::endl; + return 0; +} + +double RawDevice::GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const { + std::cout << "This function, GetAvgHWTimePerJobMs, is not implemented for raw device" << std::endl; + return 0; +} diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp new file mode 100644 index 0000000..c698110 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp @@ -0,0 +1,89 @@ +// Copyright 2022 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/* + The raw_batch_job, raw_graph_job, and raw_device implement the interfaces + used by dliaPlugin to mimic a inference flow without actually providing a + inference. It is used to get the transformed input performed by the dliaPlugin + upper layers +*/ + +#include "raw_graph_job.h" +#include "dla_aot_utils.h" +#include <fstream> +#include "dla_defines.h" + +unique_ptr<GraphJob> RawGraphJob::MakeUnique(const arch_params* archParams, + const CompiledResult * compiledResult, + size_t numPipelines, + int instance, + uint32_t debugLevel = 0, + std::string AES_key = "", + std::string IV_key = "", + bool encryption_enabled = false) +{ + return unique_ptr<GraphJob>(new RawGraphJob(archParams, compiledResult, numPipelines, instance, debugLevel, AES_key, IV_key, encryption_enabled)); +} + +RawGraphJob::RawGraphJob(const arch_params* archParams, + const CompiledResult * compiledResult, + size_t numPipelines, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled) +{ + assert(numPipelines); + instance_ = instance; + debugLevel_ = debugLevel; + batchJobsRequested_ = 0; + // input feature buffer size + // TODO: support multi-input graph + dlaBuffers_.input_feature_buffer_size = + compiledResult->get_conv_input_size_in_bytes(); + // input feature buffer to be allocated outside this routine + + // output buffer size + dlaBuffers_.output_feature_buffer_size = + compiledResult->get_conv_output_size_in_bytes(); + + // intermediate buffer size + dlaBuffers_.intermediate_feature_buffer_size = + compiledResult->get_conv_intermediate_size_in_bytes(); + + // config and filter buffer size + size_t num_config_words = compiledResult->get_num_config_words(); + dlaBuffers_.config_buffer_size = num_config_words * CONFIG_WORD_SIZE; + dlaBuffers_.filter_bias_scale_buffer_size = + compiledResult->get_total_filter_bias_scale_buffer_size(); + // store a pointer to CompiledResult to use config and filter buffer directly without copying + dlaBuffers_.compiled_result = compiledResult; + for(size_t i = 0; i < numPipelines; i++) { + batchJobs_.push_back(move(RawBatchJob::MakeUnique(compiledResult, &dlaBuffers_, instance_, debugLevel_, AES_key, IV_key, encryption_enabled))); + } + + dlaBuffers_.input_feature_buffer = NULL; +} + +BatchJob* RawGraphJob::GetBatchJob() { + graphJobMutex.lock(); + if(batchJobsRequested_ >= batchJobs_.size()) { + graphJobMutex.unlock(); + return nullptr; + } + auto * batchJob = batchJobs_[batchJobsRequested_].get(); + batchJobsRequested_++; + graphJobMutex.unlock(); + return batchJob; +} |
