diff options
Diffstat (limited to 'python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc')
5 files changed, 327 insertions, 0 deletions
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h new file mode 100644 index 0000000..697b5d2 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_structs.h @@ -0,0 +1,38 @@ +// Copyright 2020 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +#ifndef _DLA_AOT_STRUCTS_H_ +#define _DLA_AOT_STRUCTS_H_ + +#include "compiled_result.h" + +// Custom type +typedef unsigned char uint8_t; + +// All size and offset fields are in bytes. +typedef struct { + const dla::CompiledResult* compiled_result; + uint32_t config_buffer_size; + uint32_t filter_bias_scale_buffer_size; + uint8_t *input_feature_buffer; + uint32_t input_feature_buffer_size; + uint32_t output_feature_buffer_size; + uint32_t intermediate_feature_buffer_size; +} DLAInput; + +typedef struct { + // Its size is output_feature_buffer_size in DLAInput. + uint8_t *output_feature_buffer; +} DLAOutput; + +#endif // _DLA_REF_H_ diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h new file mode 100644 index 0000000..7fa23e8 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/dla_aot_utils.h @@ -0,0 +1,49 @@ +// Copyright 2020-2023 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +#ifndef _DLA_AOT_UTILS_H_ +#define _DLA_AOT_UTILS_H_ + +#include <fcntl.h> +#include <google/protobuf/io/zero_copy_stream_impl.h> +#include <google/protobuf/text_format.h> +#include <sys/stat.h> + +#include <iostream> +#include <string> +#include <vector> + +#include "dla_aot_structs.h" + +using google::protobuf::io::FileInputStream; + +// fp16 feature element (in bytes) +// TODO: extract it from arch / compiled result +const uint32_t feature_elem_size = 2; + +////////////////////////////////////////////////////////////////////////////// +// Dump DLA input and output to the following files: +// - config_filter.mem: config + filter buffer +// - input_feature.mem: input feature buffer +// - output_feature.mem: output feature buffer (emulation results) +// +// Each .mem file is a text file, with one byte (in hex) per line. +////////////////////////////////////////////////////////////////////////////// + +void writeInputOutputToFiles(const std::vector<int>& arch_hash, + const std::string& build_version, + const std::string& arch_name, + const DLAInput& input, + const DLAOutput& output); + +#endif // _DLA_AOT_UTILS_H_ diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h new file mode 100644 index 0000000..dd8e5fa --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_batch_job.h @@ -0,0 +1,79 @@ +// Copyright 2020-2023 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. +#ifndef RAW_BATCH_JOB_H +#define RAW_BATCH_JOB_H + +#include <assert.h> +#include <cstdio> +#if defined(_WIN32) || defined(_WIN64) +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include <windows.h> +#else +#include <dlfcn.h> +#endif +#include <cstring> +#include <iostream> +#include <string> +#include <thread> +#include <memory> + +#include "batch_job.h" +#include "dla_aot_structs.h" +#include "raw_device.h" + +// RawBatchJob represents one batch execution +// Contains functions to start DLA +class RawBatchJob : public BatchJob { + private: + const CompiledResult* compiledResult; + DLAInput* dlaBuffers_; + DLAOutput output_; + int instance_; + uint32_t debugLevel_; + std::string AES_key_; + std::string IV_key_; + bool encryption_enabled_; + RawBatchJob(const CompiledResult* compiledResult, + DLAInput* dlaBuffers, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled); + + public: + RawBatchJob(const RawBatchJob&) = delete; + RawBatchJob(RawBatchJob&) = delete; + RawBatchJob& operator=(const RawBatchJob&) = delete; + static unique_ptr<BatchJob> MakeUnique(const CompiledResult* compiledResult, + DLAInput* dlaBuffers, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled); + // @param inputArray - ptr to CPU array containing input data tp be copied to DDR + // blocking function + void LoadInputFeatureToDDR(void* inputArray); + // Starts DLA by writing to CSR in DLA DMA; the DDR addresses of graph config and input data + void StartDla() override; + // @param outputArray - ptr to CPU array where the output data in DDR is copied into + // outputArray must be allocated by the caller (size >= output_size_ddr) + // blocking function + void ReadOutputFeatureFromDDR(void* outputArray) const; + void ScheduleInputFeature() const {} +}; + +#endif diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h new file mode 100644 index 0000000..168707e --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_device.h @@ -0,0 +1,81 @@ +// Copyright 2020-2023 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. +#ifndef RAW_DEVICE_H +#define RAW_DEVICE_H + +#include <assert.h> +#include <chrono> +#include <cstdio> +#include <cstring> +#include <iostream> +#include <memory> +#include <string> +#include <thread> +#include <vector> +#include <map> +#include "arch_params.h" +#include "compiled_result.h" +#include "device.h" +using namespace std; +using namespace dla; +class GraphJob; + +class RawDevice : public Device { + public: + GraphJob* CreateGraphJob(const CompiledResult* compiledResult, + size_t numPipelines, + int instance, + std::string AES_key, + std::string IV_key, + bool encryption_enabled, + const std::string export_dir, + const std::string parameter_rom_export_dir); + // Return number of DLA jobs completed till now + // Used for debugging + int GetNumInferencesCompleted(int instance) const override; + // Must be called when there are no active jobs on DLA + // Returns the total time taken by DLA jobs on hardware (in milliseconds) + double GetActiveHWTimeMs(int instance) const override; + // Must be called when there are no active jobs on DLA + // Returns the average of time taken per job (in milliseconds) + // Avg Time per job < Active Time + double GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const override; + RawDevice(const arch_params* archParams); + void WaitForDla(int instance, + size_t threadId = 0, + std::function<bool()> isCancelled = nullptr) override; // threadId is for debugging purpose only + std::string SchedulerGetStatus() const override { return ""; } + bool InitializeScheduler(uint32_t sourceBufferSize, + uint32_t dropSourceBuffers, + uint32_t numInferenceRequests, + const std::string source_fifo_file = "") override { + return true; + } + int GetNumInstances() const override { return numInstances_; } + int GetSizeCsrDescriptorQueue() const override { return -1; } // meaningless here + double GetCoreDlaClockFreq() const override { return -1.0; } // meaningless here + std::map<std::string, uint64_t> ReadDebugNetwork(int instance) const override { + return std::map<std::string, uint64_t>(); + }; + uint64_t GetNumInputFeatureMemoryReads(int instance) const override { return 0; }; + uint64_t GetNumFilterMemoryReads(int instance) const override {return 0; }; + uint64_t GetNumOutputFeatureMemoryWrites(int instance) const override {return 0; }; + + private: + RawDevice() = delete; + vector<unique_ptr<GraphJob>> allGraphJobs_; + int numInstances_; + const arch_params* archParams_; +}; + +#endif // REF_DEVCE_H diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h new file mode 100644 index 0000000..38ad075 --- /dev/null +++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/inc/raw_graph_job.h @@ -0,0 +1,80 @@ +// Copyright 2020-2023 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. +#ifndef RAW_GRAPH_JOB_H +#define RAW_GRAPH_JOB_H + +#include <assert.h> +#include <cstdio> +#include <memory> +#include <vector> +#if defined(_WIN32) || defined(_WIN64) +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include <windows.h> +#else +#include <dlfcn.h> +#endif +#include <cstring> +#include <iostream> +#include <string> +#include <thread> +#include "compiled_result.h" + +#include "dla_aot_structs.h" +#include "graph_job.h" +#include "raw_batch_job.h" +#include "raw_device.h" +using namespace dla; +/*! RawGraphJob is a DLA compiled graph loaded onto a emulation device + * Initialized with Emulator Device object + * RawGraphJob stores arrays filter, bias, config, inputs and outputs + * It provides handle to "batch job" objects that are used to load input and start DLA for one batch + */ +class RawGraphJob : public GraphJob { + public: + static unique_ptr<GraphJob> MakeUnique(const arch_params* archParams, + const CompiledResult* compiled_result, + size_t numPipelines, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled); + // Returns an unused batch job object + // If all batch jobs are used, returns null + // Increments batchJobsRequested_ + // Thread safe + BatchJob* GetBatchJob(); + RawGraphJob(const GraphJob&) = delete; + RawGraphJob(RawGraphJob&) = delete; + RawGraphJob& operator=(const RawGraphJob&) = delete; + + private: + DLAInput dlaBuffers_; + vector<unique_ptr<BatchJob>> batchJobs_; + int instance_; + uint32_t debugLevel_; + unsigned int batchJobsRequested_; + std::mutex graphJobMutex; + RawGraphJob(const arch_params* archParams, + const CompiledResult* compiledResult, + size_t numPipelines, + int instance, + uint32_t debugLevel, + std::string AES_key, + std::string IV_key, + bool encryption_enabled); +}; + +#endif |
