5 files changed, 345 insertions, 0 deletions
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg
new file mode 100644
index 0000000..3288819
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/CPPLINT.cfg
@@ -0,0 +1,4 @@
+filter=-build/header_guard,-runtime/explicit,-build/include_subdir,-runtime/references,-build/c++11,-runtime/int
+exclude_files=^(?!pe_array_sim.cpp).*\.cpp
+linelength=160
+headers=h,hpp
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp
new file mode 100644
index 0000000..4317201
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/dla_aot_utils.cpp
@@ -0,0 +1,117 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+  This file contains some helper utilities to output coredla data blobs to files
+  in the current working directory
+*/
+
+#include "dla_aot_utils.h"
+
+// The resulting file is expected to be consumed by RTL testbench or hardware.
+static void writeBufferToBinFile(const uint8_t *buffer, uint32_t buffer_size,
+                              const char *file_path) {
+  FILE *fp = fopen(file_path, "wb");
+  assert(nullptr != fp);
+
+  if (buffer_size && !fwrite(buffer, buffer_size, 1, fp))
+  {
+    std::cout << "ERROR writing to output file " << file_path << std::endl;
+  }
+
+  fclose(fp);
+}
+
+// The resulting file is expected to be consumed by RTL testbench or hardware.
+static void writeBufferToFile(const uint8_t *buffer, uint32_t buffer_size,
+                              const char *file_path) {
+  FILE *fp = fopen(file_path, "w");
+  assert(nullptr != fp);
+
+  // Write buffer size (in bytes) to the first line
+  for (uint32_t b = 0; b < buffer_size; b+=4) {
+    if (b && ((b % 128) == 0))
+    {
+      fprintf(fp, "\n");
+    }
+    fprintf(fp, "0x%08x", *((uint32_t*)&buffer[b]));
+    if(b + 4 < buffer_size)
+    {
+      fprintf(fp, ",");
+    }
+  }
+
+  fclose(fp);
+}
+
+// Create all files that the splitter is responsible for
+void writeInputOutputToFiles (
+  const std::vector<int>& arch_hash,
+  const std::string& build_version,
+  const std::string& arch_name,
+  const DLAInput &input,
+  const DLAOutput &output
+) {
+  uint8_t arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE + ARCH_NAME_SIZE];
+
+  memset(&arch_build[0], 0, ARCH_HASH_SIZE + BUILD_VERSION_SIZE);
+  memcpy(&arch_build[0], arch_hash.data(), ARCH_HASH_SIZE);
+  memcpy(&arch_build[ARCH_HASH_SIZE], build_version.c_str(), std::min(build_version.length(),static_cast<size_t>(BUILD_VERSION_SIZE)));
+  memcpy(&arch_build[ARCH_HASH_SIZE + BUILD_VERSION_SIZE], arch_name.c_str(), std::min(arch_name.length(),static_cast<size_t>(ARCH_NAME_SIZE)));
+  writeBufferToFile(arch_build,
+                    sizeof(arch_build),
+                    "arch_build.mem");
+  writeBufferToFile(arch_build,
+                    sizeof(arch_build),
+                    "arch_build.bin");
+  const auto &config_fbs_buffer =
+    input.compiled_result->get_config_filter_bias_scale_array();
+
+  // Only dump filters and config memory file when they are saved in DDR
+  if (!input.compiled_result->get_ddrfree_header().enable_parameter_rom) {
+    writeBufferToFile(&(config_fbs_buffer[0][0]),
+                      input.config_buffer_size,
+                      "config.mem");
+    writeBufferToBinFile(&(config_fbs_buffer[0][0]),
+                      input.config_buffer_size,
+                      "config.bin");
+    writeBufferToFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size,
+                      input.filter_bias_scale_buffer_size,
+                      "filter.mem");
+    writeBufferToBinFile(&(config_fbs_buffer[0][0]) + input.config_buffer_size,
+                      input.filter_bias_scale_buffer_size,
+                      "filter.bin");
+  } else {
+    std::cout << "Graph filters and DLA configs are not dumped because parameter ROM is enabled in the AOT file." << std::endl;
+  }
+  uint8_t* input_buffer = nullptr;
+  size_t input_size = 0;
+  if (input.input_feature_buffer) {
+    input_buffer = input.input_feature_buffer;
+    input_size = input.input_feature_buffer_size;
+  }
+  writeBufferToFile(input_buffer,
+                    input_size,
+                    "input.mem");
+  writeBufferToBinFile(input_buffer,
+                    input_size,
+                    "input.bin");
+  uint32_t inter_size = input.intermediate_feature_buffer_size;
+  writeBufferToFile((const uint8_t*)&inter_size,
+                     sizeof(inter_size),
+                     "inter_size.mem");
+  uint32_t output_size = input.output_feature_buffer_size;
+  writeBufferToFile((const uint8_t*)&output_size,
+                     sizeof(output_size),
+                     "output_size.mem");
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp
new file mode 100644
index 0000000..23247d5
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_batch_job.cpp
@@ -0,0 +1,68 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+  The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+  used by dliaPlugin to mimic a inference flow without actually providing a
+  inference. It is used to get the transformed input performed by the dliaPlugin
+  upper layers
+*/
+
+#include "raw_batch_job.h"
+#include "dla_aot_utils.h"
+
+unique_ptr<BatchJob> RawBatchJob::MakeUnique(const CompiledResult * compiledResult,
+                            DLAInput* dlaBuffers,
+                            int instance,
+                            uint32_t debugLevel,
+                            std::string AES_key,
+                            std::string IV_key,
+                            bool encryption_enabled) {
+    return unique_ptr<BatchJob>(new RawBatchJob(compiledResult, dlaBuffers, instance, debugLevel, AES_key, IV_key, encryption_enabled));
+}
+
+RawBatchJob::RawBatchJob(const CompiledResult * compiledResult,
+        DLAInput* dlaBuffers,
+        int instance,
+        uint32_t debugLevel,
+        std::string AES_key,
+        std::string IV_key,
+        bool encryption_enabled) : compiledResult(compiledResult) {
+  dlaBuffers_ = dlaBuffers;
+  instance_ = instance;
+  debugLevel_= debugLevel;
+  AES_key_ = AES_key;
+  IV_key_ = IV_key;
+  encryption_enabled_ = encryption_enabled;
+  output_.output_feature_buffer = new uint8_t[dlaBuffers_->output_feature_buffer_size];
+  memset(output_.output_feature_buffer, 0, dlaBuffers_->output_feature_buffer_size);
+  assert(nullptr != output_.output_feature_buffer);
+}
+
+// Emulation device has no DDR. This function is just storing a pointer to the array
+// Note: inputAray should not be deleted until the end of the Emulation runs
+// i.e. StartDla completes
+void RawBatchJob::LoadInputFeatureToDDR(void* inputArray) {
+  dlaBuffers_->input_feature_buffer = (uint8_t*) inputArray;
+  StartDla();
+}
+
+void RawBatchJob::StartDla() {
+  // Write input / output buffers to files
+  writeInputOutputToFiles(compiledResult->get_arch_hash(), compiledResult->get_build_version_string(), compiledResult->get_arch_name(), *dlaBuffers_, output_);
+}
+
+// Emulation device has no DDR. Output is copied into the outputArray.
+void RawBatchJob::ReadOutputFeatureFromDDR(void* outputArray) const {
+  memcpy(outputArray, output_.output_feature_buffer, dlaBuffers_->output_feature_buffer_size);
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp
new file mode 100644
index 0000000..0b8e838
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_device.cpp
@@ -0,0 +1,67 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+  The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+  used by dliaPlugin to mimic a inference flow without actually providing a
+  inference. It is used to get the transformed input performed by the dliaPlugin
+  upper layers
+*/
+
+#include "raw_device.h"
+#include "raw_graph_job.h"
+unique_ptr<Device> Device::MakeUnique(const arch_params* archParams,
+                                      uint32_t waitForDlaTimeoutSeconds) {
+  return unique_ptr<Device>(new RawDevice(archParams));
+}
+
+RawDevice::RawDevice(const arch_params* archParams) {
+  numInstances_ = 1;
+  archParams_ = archParams;
+}
+
+GraphJob* RawDevice::CreateGraphJob(const CompiledResult * compiledResult,
+  size_t numPipelines,
+  int instance,
+  std::string AES_key,
+  std::string IV_key,
+  bool encryption_enabled,
+  const std::string export_dir,
+  const std::string parameter_rom_export_dir)
+{
+  (void) export_dir;  // unused in HW runtime. CoreDLA utilizes base pointers, which the SW reference utilizes this variable. We void it here.
+  (void) parameter_rom_export_dir;
+  assert(instance < numInstances_);
+  allGraphJobs_.push_back(move(RawGraphJob::MakeUnique(archParams_, compiledResult, numPipelines, instance, 0,
+                          AES_key, IV_key, encryption_enabled)));
+  return (allGraphJobs_.back()).get();
+}
+
+void RawDevice::WaitForDla(int instance, size_t threadId/* = 0 */, std::function<bool()> isCancelled) {
+  //RawDevice does not do any real work. No need to wait
+}
+
+int RawDevice::GetNumInferencesCompleted(int instance) const {
+  std::cout << "This function, GetNumInferencesCompleted, is not implemented for raw device" << std::endl;
+  return 0;
+}
+
+double RawDevice::GetActiveHWTimeMs(int instance) const {
+  std::cout << "This function, GetActiveHWTimeMs, is not implemented for raw device" << std::endl;
+  return 0;
+}
+
+double RawDevice::GetAvgHWTimePerJobMs(size_t num_jobs, int instance) const {
+  std::cout << "This function, GetAvgHWTimePerJobMs, is not implemented for raw device" << std::endl;
+  return 0;
+}
diff --git a/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp
new file mode 100644
index 0000000..c698110
--- /dev/null
+++ b/python/openvino/runtime/dla_aot_splitter/dla_aot_splitter_plugin/src/raw_graph_job.cpp
@@ -0,0 +1,89 @@
+// Copyright 2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+  The raw_batch_job, raw_graph_job, and raw_device implement the interfaces
+  used by dliaPlugin to mimic a inference flow without actually providing a
+  inference. It is used to get the transformed input performed by the dliaPlugin
+  upper layers
+*/
+
+#include "raw_graph_job.h"
+#include "dla_aot_utils.h"
+#include <fstream>
+#include "dla_defines.h"
+
+unique_ptr<GraphJob> RawGraphJob::MakeUnique(const arch_params* archParams,
+  const CompiledResult * compiledResult,
+  size_t numPipelines,
+  int instance,
+  uint32_t debugLevel = 0,
+  std::string AES_key = "",
+  std::string IV_key = "",
+  bool encryption_enabled = false)
+{
+  return unique_ptr<GraphJob>(new RawGraphJob(archParams, compiledResult, numPipelines, instance, debugLevel, AES_key, IV_key, encryption_enabled));
+}
+
+RawGraphJob::RawGraphJob(const arch_params* archParams,
+  const CompiledResult * compiledResult,
+  size_t numPipelines,
+  int instance,
+  uint32_t debugLevel,
+  std::string AES_key,
+  std::string IV_key,
+  bool encryption_enabled)
+{
+  assert(numPipelines);
+  instance_ = instance;
+  debugLevel_ = debugLevel;
+  batchJobsRequested_ = 0;
+  // input feature buffer size
+  // TODO: support multi-input graph
+  dlaBuffers_.input_feature_buffer_size =
+      compiledResult->get_conv_input_size_in_bytes();
+  // input feature buffer to be allocated outside this routine
+
+  // output buffer size
+  dlaBuffers_.output_feature_buffer_size =
+      compiledResult->get_conv_output_size_in_bytes();
+
+  // intermediate buffer size
+  dlaBuffers_.intermediate_feature_buffer_size =
+      compiledResult->get_conv_intermediate_size_in_bytes();
+
+  // config and filter buffer size
+  size_t num_config_words = compiledResult->get_num_config_words();
+  dlaBuffers_.config_buffer_size = num_config_words * CONFIG_WORD_SIZE;
+  dlaBuffers_.filter_bias_scale_buffer_size =
+      compiledResult->get_total_filter_bias_scale_buffer_size();
+  // store a pointer to CompiledResult to use config and filter buffer directly without copying
+  dlaBuffers_.compiled_result = compiledResult;
+  for(size_t i = 0; i < numPipelines; i++) {
+    batchJobs_.push_back(move(RawBatchJob::MakeUnique(compiledResult, &dlaBuffers_, instance_, debugLevel_, AES_key, IV_key, encryption_enabled)));
+  }
+
+  dlaBuffers_.input_feature_buffer = NULL;
+}
+
+BatchJob* RawGraphJob::GetBatchJob() {
+  graphJobMutex.lock();
+  if(batchJobsRequested_ >= batchJobs_.size()) {
+    graphJobMutex.unlock();
+    return nullptr;
+  }
+  auto * batchJob = batchJobs_[batchJobsRequested_].get();
+  batchJobsRequested_++;
+  graphJobMutex.unlock();
+  return batchJob;
+}