summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie')
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/CMakeLists.txt62
-rwxr-xr-xpython/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindNUMA.cmake34
-rwxr-xr-xpython/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindOPAE.cmake44
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.cpp257
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.h68
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd.cpp830
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.cpp448
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.h151
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.cpp573
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.h89
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.cpp163
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.h41
-rw-r--r--python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/include/aocl_mmd.h377
13 files changed, 3137 insertions, 0 deletions
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/CMakeLists.txt b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/CMakeLists.txt
new file mode 100644
index 0000000..445a304
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/CMakeLists.txt
@@ -0,0 +1,62 @@
+# (C) 2017 Intel Corporation. All rights reserved.
+# Your use of Intel Corporation's design tools, logic functions and other
+# software and tools, and its AMPP partner logic functions, and any output
+# files any of the foregoing (including device programming or simulation
+# files), and any associated documentation or information are expressly subject
+# to the terms and conditions of the Intel Program License Subscription
+# Agreement, Intel MegaCore Function License Agreement, or other applicable
+# license agreement, including, without limitation, that your use is for the
+# sole purpose of programming logic devices manufactured by Intel and sold by
+# Intel or its authorized distributors. Please refer to the applicable
+# agreement for further details.
+
+cmake_minimum_required(VERSION 2.8.12)
+project(mmd)
+
+add_definitions(-DI_DK_AFU_ID="11446C9D-AA42-4085-9B3D-4EEF9429A4AD")
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules")
+
+find_package(OPAE REQUIRED)
+find_package(NUMA REQUIRED)
+
+# DLA specific modifications made to the MMD
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDLA_MMD")
+
+enable_language(C ASM)
+
+set(ASM_OPTIONS "-x assembler-with-cpp")
+if(${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
+ set(ASM_OPTIONS "${ASM_OPTIONS} -no-integrated-as")
+endif()
+
+set(CMAKE_ASM_FLAGS "${CFLAGS} ${ASM_OPTIONS}")
+
+set(MMD_SRC
+ ./host/mmd.cpp
+ ./host/mmd_device.cpp
+ ./host/mmd_dma.cpp
+ ./host/mmd_helper.cpp
+ ./host/kernel_interrupt.cpp
+)
+
+# Add a shared library target called intel_opae_mmd
+# and build it from the MMD_SRC files
+add_library(intel_opae_mmd SHARED ${MMD_SRC})
+
+# Specify the include directories to be used when compiling intel_opae_mmd library
+target_include_directories(intel_opae_mmd PUBLIC
+ ${CMAKE_CURRENT_SOURCE_DIR}/include
+ )
+
+# Specify libraries needed when linking the intel_opae_mmd library
+target_link_libraries(intel_opae_mmd
+ libopae-c
+ libnuma
+)
+
+# Set the installation rules for the project
+install(TARGETS intel_opae_mmd
+ LIBRARY DESTINATION lib
+ COMPONENT intel_opae_mmd
+)
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindNUMA.cmake b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindNUMA.cmake
new file mode 100755
index 0000000..c981150
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindNUMA.cmake
@@ -0,0 +1,34 @@
+# - Try to find libnuma
+# Once done will define:
+#
+# NUMA_FOUND - system has libnuma
+# NUMA_INCLUDE_DIRS - include directory with numa.h
+# NUMA_LIBRARIES - link with this for libnuma
+
+find_path(NUMA_INCLUDE_DIRS
+ NAMES numa.h
+ PATHS
+ ${LIBNUMA_ROOT}/include
+ /usr/include
+ /p/psg/swip/dla/resources/numactl/2.0.16/include
+
+ )
+
+find_library(NUMA_LIBRARIES
+ NAMES numa
+ PATHS
+ ${LIBNUMA_ROOT}/lib
+ ${LIBNUMA_ROOT}/lib64
+ /usr/lib
+ /usr/lib64
+ /p/psg/swip/dla/resources/numactl/2.0.16/lib
+
+ )
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(NUMA
+ REQUIRED_VARS NUMA_INCLUDE_DIRS NUMA_LIBRARIES)
+
+add_library(libnuma IMPORTED SHARED)
+set_target_properties(libnuma PROPERTIES
+ IMPORTED_LOCATION ${NUMA_LIBRARIES}
+ INTERFACE_INCLUDE_DIRECTORIES ${NUMA_INCLUDE_DIRS})
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindOPAE.cmake b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindOPAE.cmake
new file mode 100755
index 0000000..6395d7c
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindOPAE.cmake
@@ -0,0 +1,44 @@
+# - Try to find libintelfpga
+# Once done, this will define
+#
+# libopae-c_FOUND - system has libopae-c
+# libopae-c_INCLUDE_DIRS - the libopae-c include directories
+# libopae-c_LIBRARIES - link these to use libopae-c
+
+find_package(PkgConfig)
+pkg_check_modules(PC_OPAE QUIET opae-c)
+
+# Use pkg-config to get hints about paths
+execute_process(COMMAND pkg-config --cflags opae-c --silence-errors
+ COMMAND cut -d I -f 2
+ OUTPUT_VARIABLE OPAE-C_PKG_CONFIG_INCLUDE_DIRS)
+set(OPAE-C_PKG_CONFIG_INCLUDE_DIRS "${OPAE-C_PKG_CONFIG_INCLUDE_DIRS}" CACHE STRING "Compiler flags for OPAE-C library")
+
+# Include dir
+find_path(libopae-c_INCLUDE_DIRS
+ NAMES opae/fpga.h
+ PATHS ${LIBOPAE-C_ROOT}/include
+ ${OPAE-C_PKG_CONFIG_INCLUDE_DIRS}
+ /usr/local/include
+ /usr/include
+ ${CMAKE_EXTRA_INCLUDES})
+
+# The library itself
+find_library(libopae-c_LIBRARIES
+ NAMES opae-c
+ PATHS ${LIBOPAE-C_ROOT}/lib
+ ${LIBOPAE-C_ROOT}/lib64
+ /usr/local/lib
+ /usr/lib
+ /lib
+ /usr/lib/x86_64-linux-gnu
+ ${CMAKE_EXTRA_LIBS})
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(OPAE
+ REQUIRED_VARS libopae-c_LIBRARIES libopae-c_INCLUDE_DIRS)
+
+add_library(libopae-c IMPORTED SHARED)
+set_target_properties(libopae-c PROPERTIES
+ IMPORTED_LOCATION ${libopae-c_LIBRARIES}
+ INTERFACE_INCLUDE_DIRECTORIES ${libopae-c_INCLUDE_DIRS})
+
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.cpp
new file mode 100644
index 0000000..97882d4
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.cpp
@@ -0,0 +1,257 @@
+// (c) 1992-2024 Intel Corporation.
+// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
+// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
+// and/or other countries. Other marks and brands may be claimed as the property
+// of others. See Trademarks on intel.com for full list of Intel trademarks or
+// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
+// Your use of Intel Corporation's design tools, logic functions and other
+// software and tools, and its AMPP partner logic functions, and any output
+// files any of the foregoing (including device programming or simulation
+// files), and any associated documentation or information are expressly subject
+// to the terms and conditions of the Altera Program License Subscription
+// Agreement, Intel MegaCore Function License Agreement, or other applicable
+// license agreement, including, without limitation, that your use is for the
+// sole purpose of programming logic devices manufactured by Intel and sold by
+// Intel or its authorized distributors. Please refer to the applicable
+// agreement for further details.
+
+#include "kernel_interrupt.h"
+
+#include <poll.h>
+#include <sys/eventfd.h>
+
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <thread>
+
+#include "mmd_device.h"
+
+using namespace intel_opae_mmd;
+
+static const int mmd_kernel_interrupt_line_num = 1;
+static const uint32_t enable_int_mask = 0x00000001;
+static const uint32_t disable_int_mask = 0x00000000;
+
+bool KernelInterrupt::enable_thread = false;
+
+static const int debug_log_level = 0;
+
+// TODO: use consistent function throughout MMD for controlling debug
+// messages. This debug_print function is from OFS.
+static void debug_print(std::string &err_msg, int msglog) {
+ if (debug_log_level >= msglog) {
+ std::cerr << "KernelInterrupt: " << err_msg << std::endl;
+ }
+}
+
+static inline void check_result(fpga_result res, const char *err_str) {
+ if (res == FPGA_OK) {
+ return;
+ }
+ std::string opae_err_str =
+ std::string("KernelInterrupt: ") + std::string(err_str) + std::string(": ") + std::string(fpgaErrStr(res));
+}
+
+/** KernelInterrupt constructor
+ */
+KernelInterrupt::KernelInterrupt(fpga_handle fpga_handle_arg, int mmd_handle)
+ : m_work_thread_active(false),
+ m_eventfd(0),
+ m_kernel_interrupt_fn(nullptr),
+ m_kernel_interrupt_user_data(nullptr),
+ m_fpga_handle(fpga_handle_arg),
+ m_mmd_handle(mmd_handle),
+ m_event_handle(nullptr) {
+ if (std::getenv("MMD_ENABLE_DEBUG")) {
+ MMD_DEBUG("DEBUG LOG : KernelInterrupt Constructor\n");
+ }
+ set_member_for_interrupts();
+ enable_interrupts();
+}
+
+/** KernelInterrupt destructor
+ * calls disable_interrupts()
+ */
+KernelInterrupt::~KernelInterrupt() {
+ if (std::getenv("MMD_ENABLE_DEBUG")) {
+ MMD_DEBUG("DEBUG LOG : KernelInterrupt Destructor\n");
+ }
+ try {
+ disable_interrupts();
+ } catch (...) {
+ std::string err("destructor error");
+ debug_print(err, 0);
+ }
+}
+
+/** disable_interrupts() function is used in KernelInterrupt destructor
+ * if interupt not enabled , !enable_thread
+ * then disable interrupt mask
+ * else if interrupts are used,
+ * call noftify_work_thread(), join the thread
+ * we call OPAE API fpgaUnregisterEvent() to unregister FPGA event,
+ * it tells driver caller is no longer interested in notification for event associated with m_event_handle
+ * we call OPAE API fpgaDestroyEventHandle() to free resources
+ */
+void KernelInterrupt::disable_interrupts() {
+ if (!enable_thread) {
+ if (std::getenv("MMD_ENABLE_DEBUG")) {
+ MMD_DEBUG("DEBUG LOG : KernelInterrupt disabling interrupts\n");
+ }
+ assert(m_work_thread_active == false);
+ return;
+ }
+
+ m_work_thread_active = false;
+ notify_work_thread();
+ m_work_thread->join();
+
+ if (m_event_handle != nullptr) {
+ fpga_result res;
+
+ res = fpgaUnregisterEvent(m_fpga_handle, FPGA_EVENT_INTERRUPT, m_event_handle);
+ check_result(res, "error fpgaUnregisterEvent");
+
+ res = fpgaDestroyEventHandle(&m_event_handle);
+ check_result(res, "error fpgaDestroyEventHandle");
+ }
+ if (std::getenv("MMD_ENABLE_DEBUG")) {
+ MMD_DEBUG("DEBUG LOG : KernelInterrupt disabling interrupts\n");
+ }
+}
+
+/** notify_work_thread() function is called by disable_interrupts() function
+ * eventfd object created by OPAE API fpgaGetOSObjectFromEventHandle() , m_eventfd,
+ * can be used as an event wait/notify mechanism by user space applications and by kernel,
+ * to notify user space applications of events
+ * every time write() is performed on eventfd,
+ * the value of uint64_t being written is added to count and wakeup is performed.
+ * We dont use read() below but read() will return count value to user space and reset count to 0
+ */
+void KernelInterrupt::notify_work_thread() {
+ uint64_t val = 1;
+ ssize_t res = write(m_eventfd, &val, sizeof(val));
+ if (res < 0) {
+ std::cerr << "Warning: KernelInterrupts::notify_work_thread()"
+ " write to eventfd failed: "
+ << strerror(errno) << std::endl;
+ }
+}
+
+/** enable_interrupts() function is called by Kernel Interrupt constructor
+ * if interrupt is not enabled it will disable interrupt mask , set thread active as false and return
+ * if interrupt is enabled, it will use OPAE APIs to create event handle fpgaCreateEventHandle()
+ * OPAE event APIs provide functions for handling asynchronous events such as errors and interrupts
+ * Associated with every event a process has registered for is an fpga_event_handle,
+ * which encapsulates OS specific data structure for event objects
+ * On Linux fpga_event_handle can be used as file descriptor
+ * and passed to select(), poll() and similar functions to wait for asynchronous events
+ * OPAE API fpgaRegisterEvent() is used to tell driver that caller is interested in notification for event specified
+ * OPAE API fpgaGetOSObjectFromEventHandle() checks validity of event handle and
+ * gets OS object used to subscribe and unsubscribe to events
+ * we create a thread and call work_thread()
+ */
+void KernelInterrupt::enable_interrupts() {
+ if (!enable_thread) {
+ if (std::getenv("MMD_ENABLE_DEBUG")) {
+ MMD_DEBUG("DEBUG LOG : KernelInterrupt enabling interrupts\n");
+ }
+ m_work_thread_active = false;
+ return;
+ }
+
+ fpga_result res;
+
+ res = fpgaCreateEventHandle(&m_event_handle);
+ check_result(res, "error creating event handle");
+
+ res = fpgaRegisterEvent(m_fpga_handle, FPGA_EVENT_INTERRUPT, m_event_handle, mmd_kernel_interrupt_line_num);
+ check_result(res, "error registering event");
+
+ res = fpgaGetOSObjectFromEventHandle(m_event_handle, &m_eventfd);
+ check_result(res, "error getting event file handle");
+
+ m_work_thread_active = true;
+ m_work_thread = std::unique_ptr<std::thread>(new std::thread([this] { this->work_thread(); }));
+ if (std::getenv("MMD_ENABLE_DEBUG")) {
+ MMD_DEBUG("DEBUG LOG : KernelInterrupt enabling interrupts\n");
+ }
+}
+
+/** work_thread() is called from enable_interrupts() function while creating new thread
+ * it calls wait_for_event(), disables interrupt mask
+ * creates lock_guard with m_mutex, calls kernel interrupt function and then enables interrupt mask
+ */
+void KernelInterrupt::work_thread() {
+ while (m_work_thread_active) {
+ wait_for_event();
+ std::lock_guard<std::mutex> lock(m_mutex);
+ if (m_kernel_interrupt_fn != nullptr) {
+ m_kernel_interrupt_fn(m_mmd_handle, m_kernel_interrupt_user_data);
+ }
+ }
+}
+
+/** wait_for_event() is called from work_thread() function
+ * it uses poll() function to wait for event on a file descriptor,
+ * the m_event_fd file descriptor which we got from fpgaOSObjectFromEventHandle()
+ * poll() uses pollfd struct, which inncludes
+ * fd - file descriptor, events - requested events, revents - returned events
+ * timeout argument in poll() specifies number of milliseconds,
+ * poll() will block waiting for file descriptor
+ * On success, poll() returns a nonnegative value which is the
+ * number of elements in the pollfds whose revents fields have been
+ * set to a nonzero value (indicating an event or an error). A
+ * return value of zero indicates that the system call timed out
+ * before any file descriptors became read
+ */
+void KernelInterrupt::wait_for_event() {
+ // Use timeout when polling eventfd because sometimes interrupts are missed.
+ // This may be caused by knonw race condition with runtime, or there may
+ // be occasional events lost from OPAE.
+
+ MMD_DEBUG("DEBUG LOG : KernelInterrupt waiting for event using poll()\n");
+ const int timeout_ms = 250;
+ struct pollfd pfd = {.fd = m_eventfd, .events = POLLIN, .revents = 0};
+ int num_events = poll(&pfd, 1, timeout_ms);
+ if (num_events <= 0) {
+ std::string err(num_events < 0 ? strerror(errno) : "timed out");
+ std::string err_str("poll(): ");
+ debug_print(err_str.append(err), 1);
+ } else if (pfd.revents != POLLIN) {
+ std::string err("poll error num: ", pfd.revents);
+ debug_print(err, 0);
+ } else {
+ uint64_t val = 0;
+ ssize_t bytes_read = read(pfd.fd, &val, sizeof(val));
+ if (bytes_read < 0) {
+ std::string err(strerror(errno));
+ std::string err_str("read: ");
+ debug_print(err_str.append(err), 1);
+ }
+ }
+}
+
+void KernelInterrupt::set_kernel_interrupt(aocl_mmd_interrupt_handler_fn fn, void *user_data) {
+ MMD_DEBUG("DEBUG LOG : KernelInterrupt setting kernel interrupt\n");
+ std::lock_guard<std::mutex> lock(m_mutex);
+ m_kernel_interrupt_fn = fn;
+ m_kernel_interrupt_user_data = user_data;
+}
+
+/** Configure interrupts
+ * set_member_for_interrupts() called from KernelInterrupts constructor
+ */
+void KernelInterrupt::set_member_for_interrupts() {
+ static bool initialized = false;
+ if (initialized) {
+ return;
+ }
+ // Use interrupts
+ MMD_DEBUG("DEBUG LOG : Using interrupts\n");
+
+ enable_thread = true;
+ initialized = true;
+}
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.h
new file mode 100644
index 0000000..9ea6e68
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.h
@@ -0,0 +1,68 @@
+// (c) 1992-2024 Intel Corporation.
+// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
+// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
+// and/or other countries. Other marks and brands may be claimed as the property
+// of others. See Trademarks on intel.com for full list of Intel trademarks or
+// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
+// Your use of Intel Corporation's design tools, logic functions and other
+// software and tools, and its AMPP partner logic functions, and any output
+// files any of the foregoing (including device programming or simulation
+// files), and any associated documentation or information are expressly subject
+// to the terms and conditions of the Altera Program License Subscription
+// Agreement, Intel MegaCore Function License Agreement, or other applicable
+// license agreement, including, without limitation, that your use is for the
+// sole purpose of programming logic devices manufactured by Intel and sold by
+// Intel or its authorized distributors. Please refer to the applicable
+// agreement for further details.
+
+#ifndef KERNEL_INTERRUPT_H_
+#define KERNEL_INTERRUPT_H_
+
+#include <opae/fpga.h>
+
+#include <atomic>
+#include <chrono>
+#include <mutex>
+#include <thread>
+
+#include "aocl_mmd.h"
+
+namespace intel_opae_mmd {
+
+class KernelInterrupt final {
+ public:
+ KernelInterrupt(fpga_handle fpga_handle_arg, int mmd_handle);
+ ~KernelInterrupt();
+
+ void enable_interrupts();
+ void disable_interrupts();
+ void set_kernel_interrupt(aocl_mmd_interrupt_handler_fn fn, void *user_data);
+
+ KernelInterrupt(const KernelInterrupt &) = delete;
+ KernelInterrupt &operator=(const KernelInterrupt &) = delete;
+ KernelInterrupt(KernelInterrupt &&) = delete;
+ KernelInterrupt &operator=(KernelInterrupt &&) = delete;
+
+ private:
+ static void set_member_for_interrupts();
+
+ void notify_work_thread();
+ void wait_for_event();
+ void work_thread();
+
+ static bool enable_thread;
+
+ std::mutex m_mutex;
+ std::unique_ptr<std::thread> m_work_thread;
+ std::atomic<bool> m_work_thread_active;
+ int m_eventfd;
+ aocl_mmd_interrupt_handler_fn m_kernel_interrupt_fn;
+ void *m_kernel_interrupt_user_data;
+ fpga_handle m_fpga_handle;
+ int m_mmd_handle;
+ fpga_event_handle m_event_handle;
+};
+
+}; // namespace intel_opae_mmd
+
+#endif // KERNEL_INTERRUPT_H_
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd.cpp
new file mode 100644
index 0000000..58cd8e0
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd.cpp
@@ -0,0 +1,830 @@
+// (c) 1992-2024 Intel Corporation.
+// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
+// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
+// and/or other countries. Other marks and brands may be claimed as the property
+// of others. See Trademarks on intel.com for full list of Intel trademarks or
+// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
+// Your use of Intel Corporation's design tools, logic functions and other
+// software and tools, and its AMPP partner logic functions, and any output
+// files any of the foregoing (including device programming or simulation
+// files), and any associated documentation or information are expressly subject
+// to the terms and conditions of the Altera Program License Subscription
+// Agreement, Intel MegaCore Function License Agreement, or other applicable
+// license agreement, including, without limitation, that your use is for the
+// sole purpose of programming logic devices manufactured by Intel and sold by
+// Intel or its authorized distributors. Please refer to the applicable
+// agreement for further details.
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <zlib.h>
+
+#include <linux/mman.h>
+#include <sys/mman.h>
+
+// On some systems MAP_HUGE_2MB is not defined. It should be defined for all
+// platforms that DCP supports, but we also want ability to compile MMD on
+// CentOS 6 systems.
+#ifndef MAP_HUGE_SHIFT
+#define MAP_HUGE_SHIFT 26
+#endif
+
+#ifndef MAP_HUGE_2MB
+#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
+#endif
+
+#ifndef MAP_HUGE_1GB
+#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
+#endif
+
+#include <algorithm>
+#include <cassert>
+#include <cstdio>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <unordered_map>
+#include <vector>
+#ifdef DLA_MMD
+#include <chrono>
+#include <thread>
+#endif
+
+#include "aocl_mmd.h"
+#include "mmd_device.h"
+
+bool diagnose = 0;
+
+/** If the MMD is loaded dynamically, destructors in the MMD will execute before
+ * the destructors in the runtime upon program termination. The DeviceMapManager
+ * guards accesses to the device/handle maps to make sure the runtime doesn't
+ * get to reference them after MMD destructors have been called. Destructor
+ * makes sure that all devices are closed at program termination regardless of
+ * what the runtime does. Implemented as a singleton.
+ */
+class DeviceMapManager final {
+ public:
+ /** C++ std map data structure to keep track of
+ * object id -> handle and handle -> device
+ */
+ typedef std::map<int, Device *> t_handle_to_dev_map;
+ typedef std::map<uint64_t, int> t_id_to_handle_map;
+
+ static const int SUCCESS = 0;
+ static const int FAILURE = -1;
+
+ /** Returns handle and device pointer to the device with the specified name
+ * Creates a new entry for this device if it doesn't already exist
+ * Return 0 on success, -1 on failure
+ */
+ int get_or_create_device(const char *board_name, int *handle, Device **device);
+
+ /** Return obj id based on ASP name.*/
+ uint64_t id_from_name(const char *board_name);
+
+ /** Return MMD handle based on obj id. Returned value is negative if board
+ * doesn't exist
+ */
+ inline int handle_from_id(uint64_t obj_id);
+
+ /** Return pointer to device based on MMD handle. Returned value is null
+ * if board doesn't exist
+ */
+ Device *device_from_handle(int handle);
+
+ /** Closes specified device if it exists */
+ void close_device_if_exists(int handle);
+
+ /* Returns a reference to the class singleton */
+ static DeviceMapManager &get_instance() {
+ static DeviceMapManager instance;
+ return instance;
+ }
+
+ DeviceMapManager(DeviceMapManager const &) = delete;
+ void operator=(DeviceMapManager const &) = delete;
+ ~DeviceMapManager() {
+ // delete all allocated Device* entries
+ while (handle_to_dev_map->size() > 0) {
+ int handle = handle_to_dev_map->begin()->first;
+ aocl_mmd_close(handle);
+#ifdef SIM
+ std::cout << "# mmd.cpp: When destroying DeviceMapManager in ASE, assume it worked.\n";
+ break;
+#endif
+ MMD_DEBUG("DEBUG LOG : In DeviceMapManager destructor, closing device with handle %d \n", handle);
+ }
+ delete handle_to_dev_map;
+ delete id_to_handle_map;
+ handle_to_dev_map = nullptr;
+ id_to_handle_map = nullptr;
+ }
+
+ private:
+ DeviceMapManager() {
+ handle_to_dev_map = new t_handle_to_dev_map();
+ id_to_handle_map = new t_id_to_handle_map();
+
+ MMD_DEBUG("DEBUG LOG : Constructing DeviceMapManager object\n");
+ }
+ t_handle_to_dev_map *handle_to_dev_map = nullptr;
+ t_id_to_handle_map *id_to_handle_map = nullptr;
+};
+static DeviceMapManager &device_manager = DeviceMapManager::get_instance();
+
+/** Returns handle and device pointer to the device with the specified name
+ * Creates a new entry for this device if it doesn't already exist
+ * Return 0 on success, -1 on failure
+ */
+int DeviceMapManager::get_or_create_device(const char *board_name, int *handle, Device **device) {
+ int _handle = MMD_INVALID_PARAM;
+ Device *_device = nullptr;
+
+ if (id_to_handle_map == nullptr || handle_to_dev_map == nullptr) {
+ MMD_DEBUG(
+ "DEBUG LOG : Failure in DeviceMapManager::get_or_create_device,id_to_handle_map or handle_to_dev_map is "
+ "NULL\n");
+ return DeviceMapManager::FAILURE;
+ }
+
+ uint64_t obj_id = id_from_name(board_name);
+ if (!obj_id) {
+ MMD_DEBUG("DEBUG LOG : Failure in DeviceMapManager::get_or_create_device. obj_id : %ld \n", obj_id);
+ return false;
+ }
+ if (id_to_handle_map->count(obj_id) == 0) {
+ try {
+ _device = new Device(obj_id);
+ _handle = _device->get_mmd_handle();
+ id_to_handle_map->insert({obj_id, _handle});
+ handle_to_dev_map->insert({_handle, _device});
+ } catch (std::runtime_error &e) {
+ MMD_DEBUG("DEBUG LOG : Failure in DeviceMapManager::get_or_create_device %s\n", e.what());
+ delete _device;
+ return DeviceMapManager::FAILURE;
+ }
+ MMD_DEBUG("DEBUG LOG : Success in creating new device object handle : %d \n", _handle);
+ } else {
+ _handle = id_to_handle_map->at(obj_id);
+ _device = handle_to_dev_map->at(_handle);
+ MMD_DEBUG("DEBUG LOG : Success in retrieving device metadata(handle , object) , handle : %d\n", _handle);
+ }
+
+ (*handle) = _handle;
+ (*device) = _device;
+
+ MMD_DEBUG("DEBUG LOG : Success in creating new device object , handle : %d\n", _handle);
+ return DeviceMapManager::SUCCESS;
+}
+
+/** Return obj id based on ASP name.*/
+uint64_t DeviceMapManager::id_from_name(const char *board_name) {
+ uint64_t obj_id = 0;
+ if (Device::parse_board_name(board_name, obj_id)) {
+ MMD_DEBUG("DEBUG LOG : Success in retrieving object id from board name\n");
+ return obj_id;
+ } else {
+ MMD_DEBUG("DEBUG LOG : Failed to retrieve object id from board name\n");
+ return 0;
+ }
+}
+
+/** Return MMD handle based on obj id. Returned value is negative if board
+ * doesn't exist
+ */
+inline int DeviceMapManager::handle_from_id(uint64_t obj_id) {
+ int handle = MMD_INVALID_PARAM;
+ if (id_to_handle_map) {
+ auto it = id_to_handle_map->find(obj_id);
+ if (it != id_to_handle_map->end()) {
+ handle = it->second;
+ }
+ MMD_DEBUG("DEBUG LOG : Success in retrieving handle from object id. handle : %d \n", handle);
+ } else {
+ MMD_DEBUG("DEBUG LOG : Failed to retrieve handle from object id \n");
+ }
+ return handle;
+}
+
+/** Return pointer to device based on MMD handle. Returned value is null
+ * if board doesn't exist
+ */
+Device *DeviceMapManager::device_from_handle(int handle) {
+ Device *dev = nullptr;
+ if (handle_to_dev_map) {
+ auto it = handle_to_dev_map->find(handle);
+ if (it != handle_to_dev_map->end()) {
+ return it->second;
+ }
+ MMD_DEBUG("DEBUG LOG : Success in retrieving device from handle. handle : %d \n", handle);
+ } else {
+ MMD_DEBUG("DEBUG LOG : Failed to retrieve device from handle\n");
+ }
+ return dev;
+}
+
+/** Closes specified device if it exists */
+void DeviceMapManager::close_device_if_exists(int handle) {
+ if (handle_to_dev_map) {
+ if (handle_to_dev_map->count(handle) > 0) {
+ Device *dev = handle_to_dev_map->at(handle);
+ uint64_t obj_id = dev->get_fpga_obj_id();
+ delete dev;
+
+ handle_to_dev_map->erase(handle);
+ id_to_handle_map->erase(obj_id);
+ MMD_DEBUG("DEBUG LOG : Closing device with handle : %d\n", handle);
+ } else {
+ MMD_DEBUG("DEBUG LOG : Nothing to close. Device with handle : %d already closed\n", handle);
+ }
+ } else {
+ MMD_DEBUG("DEBUG LOG : Error, no handle to device map entry found for handle : %d \n", handle);
+ }
+}
+
+/** Interface for checking if AFU has ASP loaded */
+bool mmd_asp_loaded(const char *name) {
+ uint64_t obj_id = device_manager.id_from_name(name);
+ if (!obj_id) {
+ MMD_DEBUG("DEBUG LOG : Error, no object id found for board : %s \n", name);
+ return false;
+ }
+
+ int handle = device_manager.handle_from_id(obj_id);
+ if (handle > 0) {
+ Device *dev = device_manager.device_from_handle(handle);
+ if (dev) {
+ MMD_DEBUG("DEBUG LOG : ASP loaded for handle : %d \n", handle);
+ return dev->asp_loaded();
+ } else {
+ MMD_DEBUG("DEBUG LOG : ASP not loaded for handle : %d \n", handle);
+ return false;
+ }
+ } else {
+ bool asp_loaded = false;
+ try {
+ Device dev(obj_id);
+ asp_loaded = dev.asp_loaded();
+ } catch (std::runtime_error &e) {
+ MMD_DEBUG("DEBUG LOG : ASP not loaded for handle : %d , %s\n", handle, e.what());
+ return false;
+ }
+
+ MMD_DEBUG("DEBUG LOG : ASP loaded : %d (0 - not loaded , 1 - loaded) for handle : %d \n", asp_loaded, handle);
+ return asp_loaded;
+ }
+}
+
+/** Function called as part of aocl_mmd_get_offline_info()
+ * to determine number of baords in system
+ */
+static unsigned int get_offline_num_acl_boards(const char *asp_uuid) {
+ bool asp_only = true;
+ fpga_guid guid;
+ fpga_result res = FPGA_OK;
+ uint32_t num_matches = 0;
+ bool ret_err = false;
+ fpga_properties filter = NULL;
+
+ if (uuid_parse(asp_uuid, guid) < 0) {
+ MMD_DEBUG("Error parsing guid '%s'\n", asp_uuid);
+ ret_err = true;
+ goto out;
+ }
+
+ res = fpgaGetProperties(NULL, &filter);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("Error creating properties object: %s\n", fpgaErrStr(res));
+ ret_err = true;
+ goto out;
+ }
+
+ if (asp_only) {
+ res = fpgaPropertiesSetGUID(filter, guid);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("Error setting GUID: %s\n", fpgaErrStr(res));
+ ret_err = true;
+ goto out;
+ }
+ }
+
+ res = fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("Error setting object type: %s\n", fpgaErrStr(res));
+ ret_err = true;
+ goto out;
+ }
+
+ res = fpgaEnumerate(&filter, 1, NULL, 0, &num_matches);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("Error enumerating AFCs: %s\n", fpgaErrStr(res));
+ ret_err = true;
+ goto out;
+ }
+
+out:
+ if (filter) fpgaDestroyProperties(&filter);
+
+ if (ret_err) {
+ return MMD_AOCL_ERR;
+ } else {
+ return num_matches;
+ }
+}
+
+/** Function called as part of aocl_mmd_get_offline_info()
+ * to determine names of boards in the system
+ */
+static bool get_offline_board_names(std::string &boards, bool asp_only = true) {
+ boards = "dla_agx7_ofs_board";
+ return true;
+}
+
+// Macros used for acol_mmd_get_offline_info and aocl_mmd_get_info
+#define RESULT_INT(X) \
+ { \
+ *((int *)param_value) = X; \
+ if (param_size_ret) *param_size_ret = sizeof(int); \
+ }
+#define RESULT_SIZE_T(X) \
+ { \
+ *((size_t *)param_value) = X; \
+ if (param_size_ret) *param_size_ret = sizeof(size_t); \
+ }
+
+#define RESULT_STR(X) \
+ do { \
+ unsigned Xlen = strnlen(X, 4096) + 1; \
+ unsigned Xcpylen = (param_value_size <= Xlen) ? param_value_size : Xlen; \
+ memcpy((void *)param_value, X, Xcpylen); \
+ if (param_size_ret) *param_size_ret = Xcpylen; \
+ } while (0)
+
+/** Get information about the board using the enum aocl_mmd_offline_info_t for
+ * offline info (called without a handle), and the enum aocl_mmd_info_t for
+ * info specific to a certain board.
+ * Arguments:
+ *
+ * requested_info_id - a value from the aocl_mmd_offline_info_t enum
+ *
+ * param_value_size - size of the param_value field in bytes. This should
+ * match the size of the return type expected as indicated in the enum
+ * definition.
+ *
+ * param_value - pointer to the variable that will receive the returned info
+ *
+ * param_size_ret - receives the number of bytes of data actually returned
+ *
+ * Returns: a negative value to indicate error.
+ */
+
+// From DLA perspective, only AOCL_MMD_BOARD_NAMES info we care
+int aocl_mmd_get_offline_info(aocl_mmd_offline_info_t requested_info_id,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_size_ret) {
+ /** aocl_mmd_get_offline_info can be called many times by the runtime
+ * and it is expensive to query the system. Only compute values first
+ * time aocl_mmd_get_offline_info called future iterations use saved results
+ */
+ static bool initialized = false;
+ static int mem_type_info;
+ static unsigned int num_acl_boards;
+ static std::string boards;
+ static bool success;
+
+ if (!initialized) {
+ mem_type_info = (int)AOCL_MMD_PHYSICAL_MEMORY;
+ num_acl_boards = get_offline_num_acl_boards(I_DK_AFU_ID);
+ success = get_offline_board_names(boards, true);
+ initialized = true;
+ }
+
+ switch (requested_info_id) {
+ case AOCL_MMD_VERSION:
+ RESULT_STR(AOCL_MMD_VERSION_STRING);
+ break;
+ case AOCL_MMD_NUM_BOARDS: {
+ RESULT_INT(num_acl_boards);
+ break;
+ }
+ case AOCL_MMD_VENDOR_NAME:
+ RESULT_STR("Intel Corp");
+ break;
+ case AOCL_MMD_BOARD_NAMES: {
+ if (success) {
+ RESULT_STR(boards.c_str());
+ } else {
+ return MMD_AOCL_ERR;
+ }
+ break;
+ }
+ case AOCL_MMD_VENDOR_ID:
+ RESULT_INT(0);
+ break;
+ case AOCL_MMD_USES_YIELD:
+ RESULT_INT(0);
+ break;
+ case AOCL_MMD_MEM_TYPES_SUPPORTED:
+ RESULT_INT(mem_type_info);
+ break;
+ }
+
+ return 0;
+}
+
+/** Get information about the board using the enum aocl_mmd_info_t for
+ * info specific to a certain board.
+ * Arguments:
+ *
+ * requested_info_id - a value from the aocl_mmd_info_t enum
+ *
+ * param_value_size - size of the param_value field in bytes. This should
+ * match the size of the return type expected as indicated in the enum
+ * definition. For example, the AOCL_MMD_TEMPERATURE returns a float, so
+ * the param_value_size should be set to sizeof(float) and you should
+ * expect the same number of bytes returned in param_size_ret.
+ *
+ * param_value - pointer to the variable that will receive the returned info
+ *
+ * param_size_ret - receives the number of bytes of data actually returned
+ *
+ * Returns: a negative value to indicate error.
+ */
+int aocl_mmd_get_info(
+ int handle, aocl_mmd_info_t requested_info_id, size_t param_value_size, void *param_value, size_t *param_size_ret) {
+ MMD_DEBUG("DEBUG LOG : called aocl_mmd_get_info\n");
+ Device *dev = device_manager.device_from_handle(handle);
+ if (dev == NULL) return 0;
+
+ assert(param_value);
+ switch (requested_info_id) {
+ case AOCL_MMD_BOARD_NAME: {
+ std::ostringstream board_name;
+ board_name << "Intel OFS Platform"
+ << " (" << dev->get_dev_name() << ")";
+ RESULT_STR(board_name.str().c_str());
+ break;
+ }
+ case AOCL_MMD_NUM_KERNEL_INTERFACES:
+ RESULT_INT(1);
+ break;
+ case AOCL_MMD_KERNEL_INTERFACES:
+ RESULT_INT(AOCL_MMD_KERNEL);
+ break;
+#ifdef SIM
+ case AOCL_MMD_PLL_INTERFACES:
+ RESULT_INT(-1);
+ break;
+#else
+ case AOCL_MMD_PLL_INTERFACES:
+ RESULT_INT(-1);
+ break;
+#endif
+ case AOCL_MMD_MEMORY_INTERFACE:
+ RESULT_INT(AOCL_MMD_MEMORY);
+ break;
+ case AOCL_MMD_PCIE_INFO: {
+ RESULT_STR(dev->get_bdf().c_str());
+ break;
+ }
+ case AOCL_MMD_BOARD_UNIQUE_ID:
+ RESULT_INT(0);
+ break;
+ case AOCL_MMD_TEMPERATURE: {
+ if (param_value_size == sizeof(float)) {
+ float *ptr = static_cast<float *>(param_value);
+ *ptr = dev->get_temperature();
+ if (param_size_ret) *param_size_ret = sizeof(float);
+ }
+ break;
+ }
+ case AOCL_MMD_CONCURRENT_READS:
+ RESULT_INT(1);
+ break;
+ case AOCL_MMD_CONCURRENT_WRITES:
+ RESULT_INT(1);
+ break;
+ case AOCL_MMD_CONCURRENT_READS_OR_WRITES:
+ RESULT_INT(2);
+ break;
+
+ case AOCL_MMD_MIN_HOST_MEMORY_ALIGNMENT:
+ RESULT_SIZE_T(64);
+ break;
+
+ case AOCL_MMD_HOST_MEM_CAPABILITIES: {
+ RESULT_INT(0);
+ break;
+ }
+ case AOCL_MMD_SHARED_MEM_CAPABILITIES: {
+ RESULT_INT(0);
+ break;
+ }
+
+ case AOCL_MMD_DEVICE_MEM_CAPABILITIES:
+ RESULT_INT(0);
+ break;
+ case AOCL_MMD_HOST_MEM_CONCURRENT_GRANULARITY:
+ RESULT_SIZE_T(0);
+ break;
+ case AOCL_MMD_SHARED_MEM_CONCURRENT_GRANULARITY:
+ RESULT_SIZE_T(0);
+ break;
+ case AOCL_MMD_DEVICE_MEM_CONCURRENT_GRANULARITY:
+ RESULT_SIZE_T(0);
+ break;
+ }
+ return 0;
+}
+
+#undef RESULT_INT
+#undef RESULT_STR
+
+/** Set the interrupt handler for the opened device.
+ * The interrupt handler is called whenever the client needs to be notified
+ * of an asynchronous event signaled by the device internals.
+ * For example, the kernel has completed or is stalled.
+ *
+ * Important: Interrupts from the kernel must be ignored until this handler is
+ * set
+ *
+ * Arguments:
+ * fn - the callback function to invoke when a kernel interrupt occurs
+ * user_data - the data that should be passed to fn when it is called.
+ *
+ * Returns: 0 if successful, negative on error
+ */
+int AOCL_MMD_CALL aocl_mmd_set_interrupt_handler(int handle, aocl_mmd_interrupt_handler_fn fn, void *user_data) {
+ Device *dev = device_manager.device_from_handle(handle);
+ if (dev) {
+ dev->set_kernel_interrupt(fn, user_data);
+ MMD_DEBUG("DEBUG LOG : Set kernel interrupt handler for device handle : %d\n", handle);
+ } else {
+ MMD_DEBUG("DEBUG LOG : Error setting kernel interrupt handler for device handle : %d\n", handle);
+ return MMD_AOCL_ERR;
+ }
+ return 0;
+}
+
+/** Set the operation status handler for the opened device.
+ * The operation status handler is called with
+ * status 0 when the operation has completed successfully.
+ * status negative when the operation completed with errors.
+ *
+ * Arguments:
+ * fn - the callback function to invoke when a status update is to be
+ * performed.
+ * user_data - the data that should be passed to fn when it is called.
+ *
+ * Returns: 0 if successful, negative on error
+ */
+
+int AOCL_MMD_CALL aocl_mmd_set_status_handler(int handle, aocl_mmd_status_handler_fn fn, void *user_data) {
+ Device *dev = device_manager.device_from_handle(handle);
+ if (dev) {
+ dev->set_status_handler(fn, user_data);
+ MMD_DEBUG("DEBUG LOG : Set status handler for device handle : %d\n", handle);
+ }
+ return 0;
+}
+
+/** Host to device-global-memory write (HOST DDR -> FPGA DDR)
+ * If op is NULL
+ * - Then these calls must block until the operation is complete.
+ * - The status handler is not called for this operation.
+ *
+ * If op is non-NULL, then:
+ * - These may be non-blocking calls
+ * - The status handler must be called upon completion, with status 0
+ * for success, and a negative value for failure.
+ *
+ * Arguments:
+ * op - the operation object used to track this operations progress
+ *
+ * len - the size in bytes to transfer
+ *
+ * src - the host buffer being read from
+ *
+ * dst - the host buffer being written to
+ *
+ * mmd_interface - the handle to the interface being accessed. E.g. To
+ * access global memory this handle will be whatever is returned by
+ * aocl_mmd_get_info when called with AOCL_MMD_MEMORY_INTERFACE.
+ *
+ * offset/src_offset/dst_offset - the byte offset within the interface that
+ * the transfer will begin at.
+ *
+ * The return value is 0 if the operation launch was successful, and
+ * negative otherwise.
+ */
+int AOCL_MMD_CALL
+aocl_mmd_write(int handle, aocl_mmd_op_t op, size_t len, const void *src, int mmd_interface, size_t offset) {
+ MMD_DEBUG(
+ "DEBUG LOG : aocl_mmd_write: handle : %d\t operation : %p\t len : 0x%zx\t src : %p\t mmd_interface : %d\t offset "
+ ": 0x%zx\n",
+ handle,
+ op,
+ len,
+ src,
+ mmd_interface,
+ offset);
+ Device *dev = device_manager.device_from_handle(handle);
+ if (dev){
+ return dev->write_block(op, mmd_interface, src, offset, len);
+ }
+ else {
+ MMD_DEBUG("DEBUG LOG : Error in aocl_mmd_write , device not found for handle : %d\n", handle);
+ return -1;
+ }
+}
+
+/** Host reading from device-global-memory (FPGA DDR -> HOST DDR)
+ * If op is NULL
+ * - Then these calls must block until the operation is complete.
+ * - The status handler is not called for this operation.
+ *
+ * If op is non-NULL, then:
+ * - These may be non-blocking calls
+ * - The status handler must be called upon completion, with status 0
+ * for success, and a negative value for failure.
+ *
+ * Arguments:
+ * op - the operation object used to track this operations progress
+ *
+ * len - the size in bytes to transfer
+ *
+ * src - the host buffer being read from
+ *
+ * dst - the host buffer being written to
+ *
+ * mmd_interface - the handle to the interface being accessed. E.g. To
+ * access global memory this handle will be whatever is returned by
+ * aocl_mmd_get_info when called with AOCL_MMD_MEMORY_INTERFACE.
+ *
+ * offset/src_offset/dst_offset - the byte offset within the interface that
+ * the transfer will begin at.
+ *
+ * The return value is 0 if the operation launch was successful, and
+ * negative otherwise.
+ */
+
+int AOCL_MMD_CALL aocl_mmd_read(int handle, aocl_mmd_op_t op, size_t len, void *dst, int mmd_interface, size_t offset) {
+ MMD_DEBUG(
+ "DEBUG LOG : aocl_mmd_read: handle : %d\t operation : %p\t len : 0x%zx\t dst : %p\t mmd_interface : %d\t offset "
+ ": 0x%zx\n",
+ handle,
+ op,
+ len,
+ dst,
+ mmd_interface,
+ offset);
+ Device *dev = device_manager.device_from_handle(handle);
+ if (dev){
+ return dev->read_block(op, mmd_interface, dst, offset, len);
+ }
+ else {
+ MMD_DEBUG("DEBUG LOG : Error in aocl_mmd_read , device not found for handle : %d\n", handle);
+ return -1;
+ }
+}
+
+/** Open and initialize the named device.
+ *
+ * The name is typically one specified by the AOCL_MMD_BOARD_NAMES offline
+ * info.
+ *
+ * Arguments:
+ * name - open the board with this name (provided as a C-style string,
+ * i.e. NUL terminated ASCII.)
+ *
+ * Returns: the non-negative integer handle for the board, otherwise a
+ * negative value to indicate error. Upon receiving the error, the OpenCL
+ * runtime will proceed to open other known devices, hence the MMD mustn't
+ * exit the application if an open call fails.
+ */
+
+int AOCL_MMD_CALL aocl_mmd_open(const char *name) {
+
+ MMD_DEBUG("DEBUG LOG : aocl_mmd_open, Opening device: %s\n", name);
+
+ uint64_t obj_id = device_manager.id_from_name(name);
+ if (!obj_id) {
+ MMD_DEBUG("DEBUG LOG : Error while aocl_mmd_open, object id not found for board : %s\n", name);
+ return MMD_INVALID_PARAM;
+ }
+
+ int handle;
+ Device *dev = nullptr;
+ if (device_manager.get_or_create_device(name, &handle, &dev) != DeviceMapManager::SUCCESS) {
+ if (std::getenv("MMD_PROGRAM_DEBUG") || std::getenv("MMD_DMA_DEBUG") || std::getenv("MMD_ENABLE_DEBUG")) {
+ MMD_DEBUG("DEBUG LOG : Error while aocl_mmd_open, device not found for board : %s\n", name);
+ }
+ return MMD_AOCL_ERR;
+ }
+
+ assert(dev);
+ if (dev->asp_loaded()) {
+ if (!dev->initialize_asp()) {
+ MMD_DEBUG("DEBUG LOG : Error while aocl_mmd_open, Error initializing asp for board : %s\n", name);
+ return MMD_ASP_INIT_FAILED;
+ }
+ } else {
+ MMD_DEBUG("DEBUG LOG : Error while aocl_mmd_open, asp not loaded for board : %s\n", name);
+ return MMD_ASP_NOT_LOADED;
+ }
+ MMD_DEBUG("end of aocl_mmd_open \n");
+ MMD_DEBUG("DEBUG LOG : Success aocl_mmd_open for board : %s, handle : %d \n", name, handle);
+ return handle;
+}
+
+/** Close an opened device, by its handle.
+ * Returns: 0 on success, negative values on error.
+ */
+int AOCL_MMD_CALL aocl_mmd_close(int handle) {
+#ifndef SIM
+ device_manager.close_device_if_exists(handle);
+#else
+ std::cout << "# mmd.cpp: During simulation (ASE) we are not closing the device.\n";
+#endif
+ return 0;
+}
+
+// CoreDLA modifications
+// To support multiple different FPGA boards, anything board specific must be implemented in a
+// board-specific MMD instead of the CoreDLA runtime layer.
+#ifdef DLA_MMD
+// Query functions to get board-specific values
+AOCL_MMD_CALL int dla_mmd_get_max_num_instances() { return 4; }
+
+// DLA can only uses 4GB DDR as of 2024.2
+AOCL_MMD_CALL uint64_t dla_mmd_get_ddr_size_per_instance() { return 1ULL << 32; }
+AOCL_MMD_CALL double dla_mmd_get_ddr_clock_freq() {
+ #ifdef USE_N6001_BOARD
+ return 300.0; // MHz
+ #else
+ return 333.333333; // MHz
+ #endif
+}
+
+// Helper functions for the wrapper functions around CSR and DDR
+uint64_t dla_get_raw_csr_address(int instance, uint64_t addr) { return 0x10000 + (0x800 * instance) + addr; }
+uint64_t dla_get_raw_ddr_address(int instance, uint64_t addr) {
+ #ifdef USE_N6001_BOARD
+ return (1ULL << 32) * instance + addr;
+ #else
+ return (1ULL << 33) * instance + addr;
+ #endif
+}
+
+// Wrappers around CSR and DDR reads and writes to abstract away board-specific offsets
+AOCL_MMD_CALL int dla_mmd_csr_write(int handle, int instance, uint64_t addr, const uint32_t *data) {
+ return aocl_mmd_write(
+ handle, NULL, sizeof(uint32_t), data, AOCL_MMD_DLA_CSR, dla_get_raw_csr_address(instance, addr));
+}
+
+AOCL_MMD_CALL int dla_mmd_csr_read(int handle, int instance, uint64_t addr, uint32_t *data) {
+ return aocl_mmd_read(handle, NULL, sizeof(uint32_t), data, AOCL_MMD_DLA_CSR, dla_get_raw_csr_address(instance, addr));
+}
+
+AOCL_MMD_CALL int dla_mmd_ddr_write(int handle, int instance, uint64_t addr, uint64_t length, const void *data) {
+ return aocl_mmd_write(handle, NULL, length, data, AOCL_MMD_MEMORY, dla_get_raw_ddr_address(instance, addr));
+}
+
+AOCL_MMD_CALL int dla_mmd_ddr_read(int handle, int instance, uint64_t addr, uint64_t length, void *data) {
+ return aocl_mmd_read(handle, NULL, length, data, AOCL_MMD_MEMORY, dla_get_raw_ddr_address(instance, addr));
+}
+
+AOCL_MMD_CALL double dla_mmd_get_coredla_clock_freq(int handle) {
+ constexpr uint64_t hw_timer_address = 0x37000;
+ const uint32_t start_bit = 1;
+ const uint32_t stop_bit = 2;
+
+ // Send the start command to the hardware counter
+ std::chrono::high_resolution_clock::time_point time_before = std::chrono::high_resolution_clock::now();
+ int status = aocl_mmd_write(handle, NULL, sizeof(uint32_t), &start_bit, AOCL_MMD_DLA_CSR, hw_timer_address);
+ assert(status == 0);
+
+ // Unlikely to sleep for exactly 10 milliseconds, but it doesn't matter since we use a high resolution clock to
+ // determine the amount of time between the start and stop commands for the hardware counter
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
+
+ // Send the stop command to the hardware counter
+ std::chrono::high_resolution_clock::time_point time_after = std::chrono::high_resolution_clock::now();
+ status = aocl_mmd_write(handle, NULL, sizeof(uint32_t), &stop_bit, AOCL_MMD_DLA_CSR, hw_timer_address);
+ assert(status == 0);
+
+ // Read back the value of the counter
+ uint32_t counter = 0;
+ status = aocl_mmd_read(handle, NULL, sizeof(uint32_t), &counter, AOCL_MMD_DLA_CSR, hw_timer_address);
+ assert(status == 0);
+
+ // Calculate the clock frequency of the counter, which is running on clk_dla
+ double elapsed_seconds = std::chrono::duration_cast<std::chrono::duration<double>>(time_after - time_before).count();
+ return 1.0e-6 * counter / elapsed_seconds; // 1.0e-6 is to convert to MHz
+}
+#endif
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.cpp
new file mode 100644
index 0000000..dd4ca42
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.cpp
@@ -0,0 +1,448 @@
+// (c) 1992-2024 Intel Corporation.
+// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
+// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
+// and/or other countries. Other marks and brands may be claimed as the property
+// of others. See Trademarks on intel.com for full list of Intel trademarks or
+// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
+// Your use of Intel Corporation's design tools, logic functions and other
+// software and tools, and its AMPP partner logic functions, and any output
+// files any of the foregoing (including device programming or simulation
+// files), and any associated documentation or information are expressly subject
+// to the terms and conditions of the Altera Program License Subscription
+// Agreement, Intel MegaCore Function License Agreement, or other applicable
+// license agreement, including, without limitation, that your use is for the
+// sole purpose of programming logic devices manufactured by Intel and sold by
+// Intel or its authorized distributors. Please refer to the applicable
+// agreement for further details.
+
+#include <assert.h>
+#include <numa.h>
+
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <limits>
+#include <sstream>
+
+#include "mmd_device.h"
+#include "mmd_helper.h"
+
+int Device::next_mmd_handle{1};
+
+/**
+ * The Device object is created for each device/board opened and
+ * it has methods to interact with fpga device.
+ * The entry point for Device is in DeviceMapManager Class
+ * which maintains mapping between device names and handles.
+ * Device Object is foundation for interacting with device.
+ */
+Device::Device(uint64_t obj_id)
+ : fpga_obj_id(obj_id),
+ kernel_interrupt_thread(NULL),
+ event_update(NULL),
+ event_update_user_data(NULL),
+ enable_set_numa(false),
+ fme_sysfs_temp_initialized(false),
+ bus(0),
+ device(0),
+ function(0),
+ afu_initialized(false),
+ asp_initialized(false),
+ mmio_is_mapped(false),
+ filter(NULL),
+ mmio_token(NULL),
+ mmio_handle(NULL),
+ fme_token(NULL),
+ guid(),
+ mmd_dma(NULL) {
+ // Note that this constructor is not thread-safe because next_mmd_handle
+ // is shared between all class instances
+ MMD_DEBUG("DEBUG LOG : Constructing Device object\n");
+
+ mmd_handle = next_mmd_handle;
+ if (next_mmd_handle == std::numeric_limits<int>::max())
+ next_mmd_handle = 1;
+ else
+ next_mmd_handle++;
+
+ fpga_properties filter = NULL;
+ uint32_t num_matches;
+ fpga_result r;
+
+ // Set up a filter that will search for an accelerator
+ fpgaGetProperties(NULL, &filter);
+ fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
+
+ // Add the desired UUID to the filter
+ uuid_parse(I_DK_AFU_ID, guid);
+ fpgaPropertiesSetGUID(filter, guid);
+
+ // Do the search across the available FPGA contexts
+ num_matches = 1;
+ fpgaEnumerate(&filter, 1, &mmio_token, 1, &num_matches);
+
+ fpgaPropertiesGetParent(filter, &fme_token);
+
+ // Not needed anymore so we destroy the filter
+ fpgaDestroyProperties(&filter);
+
+ if (num_matches < 1) {
+ throw std::runtime_error(std::string("Cannot find accelerator"));
+ }
+
+ // Open accelerator
+ r = fpgaOpen(mmio_token, &mmio_handle, 0);
+ assert(FPGA_OK == r);
+
+ // While the token is available, check whether it is for HW
+ // or for ASE simulation.
+ fpga_properties accel_props;
+ uint16_t vendor_id, dev_id;
+ fpgaGetProperties(mmio_token, &accel_props);
+ fpgaPropertiesGetVendorID(accel_props, &vendor_id);
+ fpgaPropertiesGetDeviceID(accel_props, &dev_id);
+
+ afu_initialized = true;
+ MMD_DEBUG("DEBUG LOG : Done constructing Device object\n");
+}
+
+/** Return true if board name parses correctly, false if it does not
+ * Return the parsed object_id in obj_id as an [out] parameter
+ */
+bool Device::parse_board_name(const char *board_name_str, uint64_t &obj_id) {
+ MMD_DEBUG("DEBUG LOG : Parsing board name\n");
+ std::string prefix(ASP_NAME);
+ std::string board_name(board_name_str);
+
+ obj_id = 0;
+ if (board_name.length() <= prefix.length() && board_name.compare(0, prefix.length(), prefix)) {
+ MMD_DEBUG("DEBUG LOG : Error parsing device name '%s'\n", board_name_str);
+ return false;
+ }
+
+ std::string device_num_str = board_name.substr(prefix.length());
+ obj_id = std::stol(device_num_str, 0, 16);
+
+ // Assume that OPAE does not use 0 as a valid object ID. This is true for now
+ // but relies somewhat on an implementaion dependent feature.
+ assert(obj_id > 0);
+ return true;
+}
+
+/** initialize_asp() function is used in aocl_mmd_open() API
+ * It resets AFC and reinitializes DMA, Kernel Interrupts if in use
+ */
+bool Device::initialize_asp() {
+ MMD_DEBUG("DEBUG LOG : Initializing ASP ... \n");
+ if (asp_initialized) {
+ MMD_DEBUG("DEBUG LOG : ASP already initialized \n");
+ return true;
+ }
+
+ fpga_result res = fpgaMapMMIO(mmio_handle, 0, NULL);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("Error mapping MMIO space: %s\n", fpgaErrStr(res));
+ return false;
+ }
+ mmio_is_mapped = true;
+
+ // Trigger an user reset
+ uint64_t reset = 1;
+ fpgaWriteMMIO64(mmio_handle, 0, 0x40000, reset);
+
+ AFU_RESET_DELAY();
+
+ // DMA performance is heavily dependent on the memcpy operation that transfers
+ // data from user allocated buffer to the pinned buffer that is used for
+ // DMA. On some machines with multiple NUMA nodes it is critical for
+ // performance that the pinned buffer is located on the NUMA node as the
+ // threads that performs the DMA operation.
+ //
+ // The performance also improves slighlty if the DMA threads are on the same
+ // NUMA node as the FPGA PCI device.
+ //
+ // This code pins memory allocation to occur from FPGA NUMA node prior to
+ // initializing the DMA buffers. It also pins all threads in the process
+ // to run on this same node.
+ struct bitmask *mask = NULL;
+ if (enable_set_numa) {
+ mask = numa_parse_nodestring(fpga_numa_node.c_str());
+ numa_set_membind(mask);
+ int ret = numa_run_on_node_mask_all(mask);
+ if (ret < 0) {
+ fprintf(stderr, " Error setting NUMA node mask\n");
+ }
+ }
+
+ MMD_DEBUG("DEBUG LOG : Initializing HOST -> FPGA DMA channel \n");
+
+ mmd_dma = new intel_opae_mmd::mmd_dma(mmio_handle, mmd_handle);
+ if (!mmd_dma->initialized()) {
+ MMD_DEBUG("DEBUG LOG : Error initializing DMA channel \n");
+ delete mmd_dma;
+ return false;
+ }
+
+ // Turn off membind restriction in order to allow future allocation to
+ // occur on different NUMA nodes if needed. Hypothesis is that only
+ // the pinned buffers are performance critical for the memcpy. Other
+ // allocations in the process can occur on other NUMA nodes if needed.
+ if (enable_set_numa) {
+ numa_set_membind(numa_nodes_ptr);
+ numa_free_nodemask(mask);
+ }
+
+// Do not enable interrupt if polling mode is enabled in the DLA runtime.
+#ifndef COREDLA_RUNTIME_POLLING
+ try {
+ kernel_interrupt_thread = new intel_opae_mmd::KernelInterrupt(mmio_handle, mmd_handle);
+ } catch (const std::system_error &e) {
+ std::cerr << "Error initializing kernel interrupt thread: " << e.what() << e.code() << std::endl;
+ return false;
+ } catch (const std::exception &e) {
+ std::cerr << "Error initializing kernel interrupt thread: " << e.what() << std::endl;
+ return false;
+ }
+#endif
+
+ asp_initialized = true;
+ MMD_DEBUG("DEBUG LOG : ASP Initialized ! \n");
+ return asp_initialized;
+}
+
+/** Device Class Destructor implementation
+ * Properly releasing and free-ing memory
+ * part of best coding practices and help
+ * with stable system performance and
+ * helps reduce bugs
+ */
+Device::~Device() {
+ MMD_DEBUG("DEBUG LOG : Destructing Device object \n");
+ int num_errors = 0;
+
+ if (kernel_interrupt_thread != nullptr) {
+ delete kernel_interrupt_thread;
+ kernel_interrupt_thread = NULL;
+ }
+
+ if (mmd_dma) {
+ delete mmd_dma;
+ mmd_dma = NULL;
+ }
+
+ if (mmio_is_mapped) {
+ if (fpgaUnmapMMIO(mmio_handle, 0)) {
+ MMD_DEBUG("DEBUG LOG : fpgaUnmapMMIO failed\n");
+ num_errors++;
+ }
+ }
+
+ if (mmio_handle) {
+ if (fpgaClose(mmio_handle) != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : fpgaClose mmio_handle failed\n");
+ num_errors++;
+ }
+ }
+
+ if (mmio_token) {
+ if (fpgaDestroyToken(&mmio_token) != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : fpgaDestroyToken mmio_token failed\n");
+ num_errors++;
+ }
+ }
+
+ if (filter) {
+ if (fpgaDestroyProperties(&filter) != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : fpgaDestroyProperties filter failed\n");
+ num_errors++;
+ }
+ }
+
+ if (num_errors > 0) {
+ MMD_DEBUG("DEBUG LOG : Error freeing resources in Device destructor\n");
+ }
+}
+
+/** asp_loaded() function which checks if asp is loaded on board
+ * it is used in aocl_mmd_open() API
+ */
+bool Device::asp_loaded() {
+ fpga_guid pci_guid;
+ fpga_guid afu_guid;
+ fpga_properties prop;
+ fpga_result res;
+
+ if (uuid_parse(I_DK_AFU_ID, pci_guid) < 0) {
+ MMD_DEBUG("DEBUG LOG : Error parsing guid\n");
+ return false;
+ }
+
+ res = fpgaGetProperties(mmio_token, &prop);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : Error reading properties: %s \n", fpgaErrStr(res));
+ fpgaDestroyProperties(&prop);
+ return false;
+ }
+
+ if (!mmio_token) {
+ fpgaDestroyProperties(&prop);
+ MMD_DEBUG("DEBUG LOG : Error reading the mmio_token\n");
+ return false;
+ }
+
+ res = fpgaPropertiesGetGUID(prop, &afu_guid);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : Error reading GUID \n");
+ fpgaDestroyProperties(&prop);
+ return false;
+ }
+
+ fpgaDestroyProperties(&prop);
+ if (uuid_compare(pci_guid, afu_guid) == 0) {
+ MMD_DEBUG("DEBUG LOG : asp loaded : true \n");
+ return true;
+ } else {
+ MMD_DEBUG("DEBUG LOG : asp loaded : false \n");
+ return false;
+ }
+}
+
+/** get_bdf() function is called
+ * in aocl_mmd_get_info() API
+ */
+std::string Device::get_bdf() {
+ std::ostringstream bdf;
+ bdf << std::setfill('0') << std::setw(2) << std::hex << unsigned(bus) << ":" << std::setfill('0') << std::setw(2)
+ << std::hex << unsigned(device) << "." << std::hex << unsigned(function);
+
+ return bdf.str();
+}
+
+/** get_temperature() function is called
+ * in aocl_mmd_get_info() API
+ * We currently use hardcoded paths to retrieve temperature information
+ * We will replace with OPAE APIs in future
+ */
+float Device::get_temperature() {
+ if (std::getenv("MMD_ENABLE_DEBUG")) {
+ MMD_DEBUG("DEBUG LOG : Reading temperature ... \n");
+ }
+ float temp = 0;
+ fpga_object obj;
+ const char *name;
+ name = "dfl_dev.*/spi_master/spi*/spi*.*/*-hwmon.*.auto/hwmon/hwmon*/temp1_input";
+ fpga_result res;
+ res = fpgaTokenGetObject(fme_token, name, &obj, FPGA_OBJECT_GLOB);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : Error reading temperature monitor from BMC :");
+ MMD_DEBUG(" %s \n", fpgaErrStr(res));
+ temp = -999;
+ return temp;
+ }
+
+ uint64_t value = 0;
+ fpgaObjectRead64(obj, &value, FPGA_OBJECT_SYNC);
+ fpgaDestroyObject(&obj);
+ temp = value / 1000;
+ return temp;
+}
+
+/** set_kernel_interrupt() function is used in aocl_mmd_set_interrupt_handler() API
+ */
+void Device::set_kernel_interrupt(aocl_mmd_interrupt_handler_fn fn, void *user_data) {
+ MMD_DEBUG("DEBUG LOG : Device::set_kernel_interrupt() \n");
+ if (kernel_interrupt_thread) {
+ kernel_interrupt_thread->set_kernel_interrupt(fn, user_data);
+ }
+}
+
+/** set_kernel_interrupt() function is used in aocl_mmd_set_status_handler() API
+ */
+void Device::set_status_handler(aocl_mmd_status_handler_fn fn, void *user_data) {
+ MMD_DEBUG("DEBUG LOG : Device::set_status_handler() \n");
+ event_update = fn;
+ event_update_user_data = user_data;
+}
+
+/** event_update_fn() is used in read_block(), write_block(), copy_block() functions
+ * OPAE provides event API for handling asynchronous events sucj as errors and interrupts
+ * under the hood those are used
+ */
+void Device::event_update_fn(aocl_mmd_op_t op, int status) {
+ MMD_DEBUG("DEBUG LOG : Device::event_update_fn() \n");
+ event_update(mmd_handle, event_update_user_data, op, status);
+}
+
+/** read_block() is used in aocl_mmd_read() API
+ * as name suggests its used for fpga->host DMA and MMIO transfers
+ */
+int Device::read_block(aocl_mmd_op_t op, int mmd_interface, void *host_addr, size_t offset, size_t size) {
+ MMD_DEBUG("DEBUG LOG : Device::read_block()\n");
+ int res;
+
+ // The mmd_interface is defined as the base address of the MMIO write. Access
+ // to memory requires special functionality. Otherwise do direct MMIO read.
+
+ if (mmd_interface == AOCL_MMD_MEMORY) {
+ std::unique_lock<std::mutex> dma_mutex_lock(m_dma_mutex);
+ MMD_DEBUG("DEBUG LOG : Using DMA to read block\n");
+ res = mmd_dma->fpga_to_host(host_addr, (uint64_t)offset, size);
+ } else if (mmd_interface == AOCL_MMD_DLA_CSR) {
+ assert(size == 4); // DLA CSR read should be always size ==4 as of 2024.2
+ MMD_DEBUG("DEBUG LOG : Using MMIO to read block in the DLA CSR space\n");
+ res = read_mmio(host_addr, offset, size);
+ } else {
+ MMD_DEBUG("DEBUG LOG : Using MMIO to read block\n");
+ res = read_mmio(host_addr, mmd_interface + offset, size);
+
+ if (op) {
+ this->event_update_fn(op, res);
+ }
+ }
+ return res;
+}
+
+/** write_block() is used in aocl_mmd_write() API
+ * as name suggests its used for DMA and MMIO transfers
+ */
+int Device::write_block(aocl_mmd_op_t op, int mmd_interface, const void *host_addr, size_t offset, size_t size) {
+ MMD_DEBUG("DEBUG LOG : Device::write_block()\n");
+ int res;
+
+ // The mmd_interface is defined as the base address of the MMIO write. Access
+ // to memory requires special functionality. Otherwise do direct MMIO write
+ if (mmd_interface == AOCL_MMD_MEMORY) {
+ std::unique_lock<std::mutex> dma_mutex_lock(m_dma_mutex);
+ MMD_DEBUG("DEBUG LOG : Using DMA to write block\n");
+ res = mmd_dma->host_to_fpga(host_addr, (uint64_t)offset, size);
+ } else if (mmd_interface == AOCL_MMD_DLA_CSR) {
+ assert(size == 4); // DLA CSR read should be always size ==4 as of 2024.2
+ MMD_DEBUG("DEBUG LOG : Using MMIO to read block in the DLA CSR space\n");
+ res = write_mmio(host_addr, offset, size);
+ } else {
+ MMD_DEBUG("DEBUG LOG : Using MMIO to write block\n");
+ res = write_mmio(host_addr, mmd_interface + offset, size);
+ if (op) {
+ this->event_update_fn(op, res);
+ }
+ }
+
+ return res;
+}
+
+/** read_mmio() is used in read_block() function
+ * it uses OPAE APIs fpgaReadMMIO64() and fpgaReadMMIO32()
+ */
+int Device::read_mmio(void *host_addr, size_t mmio_addr, size_t size) {
+ return mmd_helper::read_mmio(mmio_handle, host_addr, mmio_addr, size);
+}
+
+/** write_mmio() is used in write_block() function
+ * it uses OPAE APIs fpgaWriteMMIO64() and fpgaWriteMMIO32()
+ */
+int Device::write_mmio(const void *host_addr, size_t mmio_addr, size_t size) {
+ return mmd_helper::write_mmio(mmio_handle, host_addr, mmio_addr, size);
+}
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.h
new file mode 100644
index 0000000..1cded83
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.h
@@ -0,0 +1,151 @@
+// (c) 1992-2024 Intel Corporation.
+// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
+// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
+// and/or other countries. Other marks and brands may be claimed as the property
+// of others. See Trademarks on intel.com for full list of Intel trademarks or
+// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
+// Your use of Intel Corporation's design tools, logic functions and other
+// software and tools, and its AMPP partner logic functions, and any output
+// files any of the foregoing (including device programming or simulation
+// files), and any associated documentation or information are expressly subject
+// to the terms and conditions of the Altera Program License Subscription
+// Agreement, Intel MegaCore Function License Agreement, or other applicable
+// license agreement, including, without limitation, that your use is for the
+// sole purpose of programming logic devices manufactured by Intel and sold by
+// Intel or its authorized distributors. Please refer to the applicable
+// agreement for further details.
+
+#ifndef MMD_DEVICE_H
+#define MMD_DEVICE_H
+
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <string>
+
+#include <opae/fpga.h>
+
+#include <uuid/uuid.h>
+
+#include "aocl_mmd.h"
+#include "mmd_dma.h"
+#include "mmd_helper.h"
+
+#include "kernel_interrupt.h"
+
+// Tune delay for simulation or HW. Eventually delay
+// should be removed for HW, may still be needed for ASE simulation
+#ifdef SIM
+#define DELAY_MULTIPLIER 100
+#else
+#define DELAY_MULTIPLIER 1
+#endif
+
+// Most AOCL_MMD_CALL functions return negative number in case of error,
+// MMD_AOCL_ERR is used to indicate an error from the MMD that is being
+// returned to the runtime. Simply set to -2 for now since neither interface
+// defines a meaning to return codes for errors.
+#define MMD_AOCL_ERR -1
+
+// NOTE: some of the code relies on invalid handle returning -1
+// future TODO eliminate dependency on specific error values
+#define MMD_INVALID_PARAM -1
+
+// Our diagnostic script relies on handle values < -1 to determine when
+// a valid device is present but a functioning ASP is not loaded.
+#define MMD_ASP_NOT_LOADED -2
+#define MMD_ASP_INIT_FAILED -3
+
+// Delay settings
+#define MMIO_DELAY()
+#define YIELD_DELAY() usleep(1 * DELAY_MULTIPLIER)
+#define OPENCL_SW_RESET_DELAY() usleep(5000 * DELAY_MULTIPLIER)
+#define AFU_RESET_DELAY() usleep(20000 * DELAY_MULTIPLIER)
+
+#define KERNEL_SW_RESET_BASE (AOCL_MMD_KERNEL + 0x30)
+
+#define ASP_NAME "ofs_"
+
+#define SVM_MMD_MPF 0x24000
+
+#define SVM_DDR_OFFSET 0x1000000000000
+#define PCI_DDR_OFFSET 0
+
+enum {
+ // IRQ offsets no longer exist in DLA hardware (removed from board.qsys)
+ AOCL_IRQ_POLLING_BASE = 0x0100, // CSR to polling interrupt status
+ AOCL_IRQ_MASKING_BASE = 0x0108, // CSR to set/unset interrupt mask
+ AOCL_MMD_KERNEL = 0,
+ AOCL_MMD_MEMORY = 1,
+ AOCL_MMD_DLA_CSR = 2,
+};
+
+enum AfuStatu { MMD_INVALID_ID = 0, MMD_ASP, MMD_AFU };
+
+class Device final {
+ public:
+ Device(uint64_t);
+ Device(const Device &) = delete;
+ Device &operator=(const Device &) = delete;
+ ~Device();
+
+ static bool parse_board_name(const char *board_name, uint64_t &obj_id);
+
+ int get_mmd_handle() { return mmd_handle; }
+ uint64_t get_fpga_obj_id() { return fpga_obj_id; }
+ std::string get_dev_name() { return mmd_dev_name; }
+ std::string get_bdf();
+ float get_temperature();
+
+ bool initialize_asp();
+ void set_kernel_interrupt(aocl_mmd_interrupt_handler_fn fn, void *user_data);
+ void set_status_handler(aocl_mmd_status_handler_fn fn, void *user_data);
+ void event_update_fn(aocl_mmd_op_t op, int status);
+ bool asp_loaded();
+
+ int read_block(aocl_mmd_op_t op, int mmd_interface, void *host_addr, size_t dev_addr, size_t size);
+ int write_block(aocl_mmd_op_t op, int mmd_interface, const void *host_addr, size_t dev_addr, size_t size);
+
+ private:
+ static int next_mmd_handle;
+
+ int mmd_handle;
+ uint64_t fpga_obj_id;
+ std::string mmd_dev_name;
+ intel_opae_mmd::KernelInterrupt *kernel_interrupt_thread;
+ aocl_mmd_status_handler_fn event_update;
+ void *event_update_user_data;
+
+ std::string fpga_numa_node;
+ bool enable_set_numa;
+ bool fme_sysfs_temp_initialized;
+ void initialize_fme_sysfs();
+ void initialize_local_cpus_sysfs();
+ bool find_dma_dfh_offsets();
+
+ uint8_t bus;
+ uint8_t device;
+ uint8_t function;
+
+ bool afu_initialized;
+ bool asp_initialized;
+ bool mmio_is_mapped;
+
+ fpga_properties filter;
+ fpga_token mmio_token;
+ fpga_handle mmio_handle;
+ fpga_token fme_token;
+ fpga_guid guid;
+ intel_opae_mmd::mmd_dma *mmd_dma;
+ std::mutex m_dma_mutex;
+
+ // Helper functions
+ int read_mmio(void *host_addr, size_t dev_addr, size_t size);
+ int write_mmio(const void *host_addr, size_t dev_addr, size_t size);
+};
+
+#endif // MMD_DEVICE_H
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.cpp
new file mode 100644
index 0000000..6a4e13c
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.cpp
@@ -0,0 +1,573 @@
+// (c) 1992-2024 Intel Corporation.
+// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
+// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
+// and/or other countries. Other marks and brands may be claimed as the property
+// of others. See Trademarks on intel.com for full list of Intel trademarks or
+// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
+// Your use of Intel Corporation's design tools, logic functions and other
+// software and tools, and its AMPP partner logic functions, and any output
+// files any of the foregoing (including device programming or simulation
+// files), and any associated documentation or information are expressly subject
+// to the terms and conditions of the Altera Program License Subscription
+// Agreement, Intel MegaCore Function License Agreement, or other applicable
+// license agreement, including, without limitation, that your use is for the
+// sole purpose of programming logic devices manufactured by Intel and sold by
+// Intel or its authorized distributors. Please refer to the applicable
+// agreement for further details.
+
+#include <memory.h>
+#include <sys/mman.h>
+#include <cassert>
+#include <chrono>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <unordered_map>
+
+#include <inttypes.h>
+#include <sstream>
+
+#include "mmd_device.h"
+#include "mmd_dma.h"
+#include "mmd_helper.h"
+
+namespace intel_opae_mmd {
+
+/** mmd_dma class constructor
+ */
+mmd_dma::mmd_dma(fpga_handle fpga_handle_arg, int mmd_handle) : m_initialized(false), m_fpga_handle(fpga_handle_arg) {
+ MMD_DEBUG("DEBUG LOG : Constructing DMA \n");
+ // Initialize shared buffer
+ auto res = fpgaPrepareBuffer(m_fpga_handle, DMA_BUFFER_SIZE, (void **)&dma_buf_ptr, &dma_buf_wsid, 0);
+
+ assert(FPGA_OK == res && "Allocating DMA Buffer failed");
+
+ memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE);
+
+ // Store virtual address of IO registers
+ res = fpgaGetIOAddress(m_fpga_handle, dma_buf_wsid, &dma_buf_iova);
+ assert(FPGA_OK == res && "getting dma DMA_BUF_IOVA failed");
+
+ m_initialized = true;
+}
+
+/** mmd_dma destructor
+ * free-ing , releasing various resources created during object construction is a good idea
+ * it helps with system stability and reduces code bugs
+ */
+mmd_dma::~mmd_dma() {
+ MMD_DEBUG("DEBUG LOG : Destructing DMA \n");
+ auto res = fpgaReleaseBuffer(m_fpga_handle, dma_buf_wsid);
+ assert(FPGA_OK == res && "Release DMA Buffer failed");
+ m_initialized = false;
+}
+
+// Called in dma_transfer() to send DMA descriptor
+int mmd_dma::send_descriptor(uint64_t mmio_dst, dma_descriptor_t desc) {
+ // mmio requires 8 byte alignment
+ assert(mmio_dst % 8 == 0);
+
+ fpgaWriteMMIO64(m_fpga_handle, 0, mmio_dst, desc.src_address);
+ MMD_DEBUG("Writing %lX to address %lX\n", desc.src_address, mmio_dst);
+ mmio_dst += 8;
+ fpgaWriteMMIO64(m_fpga_handle, 0, mmio_dst, desc.dest_address);
+ MMD_DEBUG("Writing %lX to address %lX\n", desc.dest_address, mmio_dst);
+ mmio_dst += 8;
+ fpgaWriteMMIO64(m_fpga_handle, 0, mmio_dst, desc.len);
+ MMD_DEBUG("Writing %X to address %lX\n", desc.len, mmio_dst);
+ mmio_dst += 8;
+ fpgaWriteMMIO64(m_fpga_handle, 0, mmio_dst, desc.control);
+ MMD_DEBUG("Writing %X to address %lX\n", desc.control, mmio_dst);
+
+ return 0;
+}
+
+// Use ASE to handle unaligned transfer and DMA to do aligned transfer.
+int mmd_dma::fpga_to_host(void *host_addr, uint64_t dev_src, size_t size) {
+ fpga_result res = FPGA_OK;
+ uint64_t count_left = size;
+ uint64_t aligned_addr = 0;
+ uint64_t align_bytes = 0;
+ uint64_t curr_dev_src = dev_src;
+ void *curr_host_addr = host_addr;
+
+ if (dev_src % 64 != 0) {
+ // We use ASE to handle unaligned DMA transfer
+ MMD_DEBUG("DEBUG LOG : mmd_dma::fpga_to_host dev_src is non 64B aligned\n");
+ if (count_left < 64) {
+ MMD_DEBUG("DEBUG LOG : mmd_dma::fpga_to_host dev_src count < 64\n");
+ res = _ase_fpga_to_host(curr_dev_src, curr_host_addr, count_left);
+ assert(FPGA_OK == res && "_ase_fpga_to_host failed");
+ return res;
+ } else {
+ aligned_addr = ((curr_dev_src / 64) + 1) * 64;
+ align_bytes = aligned_addr - curr_dev_src;
+ res = _ase_fpga_to_host(curr_dev_src, curr_host_addr, align_bytes);
+ assert(FPGA_OK == res && "_ase_fpga_to_host failed");
+
+ // Update the processed data
+ count_left -= align_bytes;
+ curr_dev_src += align_bytes;
+ curr_host_addr = (void *)(static_cast<char *>(curr_host_addr) + align_bytes);
+ }
+ }
+
+ if (count_left) {
+ uint64_t dma_chunks = count_left / DMA_BUFFER_SIZE;
+ for (uint64_t i = 0; i < dma_chunks; i++) {
+ // constant size transfer
+
+ uint64_t dev_dest = dma_buf_iova | DMA_HOST_MASK;
+ int len = ((DMA_BUFFER_SIZE - 1) / DMA_LINE_SIZE) + 1; // Ceiling of test_buffer_size / DMA_LINE_SIZE
+
+ dma_transfer(curr_dev_src, dev_dest, len, ddr_to_host);
+
+ // Copy data from shared buffer to host addr
+ memcpy(curr_host_addr, (void *)dma_buf_ptr, DMA_BUFFER_SIZE);
+
+ memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE);
+
+ // Update the curr source and dest
+ curr_host_addr = (void *)(static_cast<char *>(curr_host_addr) + DMA_BUFFER_SIZE);
+ curr_dev_src += DMA_BUFFER_SIZE;
+ }
+
+ // Updated the count_left for the for loop
+ count_left -= (dma_chunks * DMA_BUFFER_SIZE);
+
+ if (count_left) {
+ uint64_t dma_tx_bytes = (count_left / 64) * 64;
+ if (dma_tx_bytes != 0) {
+ assert(dma_tx_bytes <= DMA_BUFFER_SIZE && "Illegal transfer size\n");
+
+ uint64_t dev_dest = dma_buf_iova | DMA_HOST_MASK;
+ int len = ((dma_tx_bytes - 1) / DMA_LINE_SIZE) + 1; // Ceiling of test_buffer_size / DMA_LINE_SIZE
+
+ dma_transfer(curr_dev_src, dev_dest, len, ddr_to_host);
+
+ // Copy data from shared buffer to host addr
+ memcpy(curr_host_addr, (void *)dma_buf_ptr, dma_tx_bytes);
+
+ memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE);
+
+ // Update the address
+ curr_host_addr = (void *)(static_cast<char *>(curr_host_addr) + dma_tx_bytes);
+ curr_dev_src += dma_tx_bytes;
+ count_left -= dma_tx_bytes;
+ }
+ if (count_left) {
+ MMD_DEBUG("DEBUG LOG : mmd_dma::fpga_to_host count_left after DMA transfer is ");
+ MMD_DEBUG("%" PRIu64 "\n", count_left);
+ // Handle the rest unaligned transfer using ASE
+ res = _ase_fpga_to_host(curr_dev_src, curr_host_addr, count_left);
+ if (FPGA_OK != res) {
+ MMD_DEBUG("DEBUG LOG : mmd_dma::_ase_fpga_to_host failed\n");
+ return -1;
+ }
+ count_left = 0;
+
+ // No need to update address as the transaction is done.
+ }
+ }
+ }
+ assert(count_left==0 && "fpga_to_host failed");
+ return 0;
+}
+
+// Use ASE to handle unaligned transfer and DMA to do aligned transfer.
+int mmd_dma::host_to_fpga(const void *host_addr, uint64_t dev_dest, size_t size) {
+ fpga_result res = FPGA_OK;
+ uint64_t count_left = size;
+ uint64_t aligned_addr = 0;
+ uint64_t align_bytes = 0;
+ uint64_t curr_dest = dev_dest;
+ const void *curr_host_addr = host_addr;
+
+ if (dev_dest % 64 != 0) {
+ // We use ASE to handle unaligned DMA transfer
+ MMD_DEBUG("DEBUG LOG : mmd_dma::host_to_fpga dev_dest is non 64B aligned\n");
+ if (count_left < 64) {
+ res = _ase_host_to_fpga(dev_dest, host_addr, count_left);
+ assert(FPGA_OK == res && "_ase_host_to_fpga failed");
+ return res;
+ } else {
+ aligned_addr = ((dev_dest / 64) + 1) * 64;
+ align_bytes = aligned_addr - dev_dest;
+ res = _ase_host_to_fpga(dev_dest, host_addr, align_bytes);
+ assert(FPGA_OK == res && "_ase_host_to_fpga failed");
+
+ // Update the processed data
+ count_left -= align_bytes;
+ curr_dest += align_bytes;
+ curr_host_addr = (const void *)(static_cast<const char *>(curr_host_addr) + align_bytes);
+ }
+ }
+
+ if (count_left) {
+ uint64_t dma_chunks = count_left / DMA_BUFFER_SIZE;
+ for (uint64_t i = 0; i < dma_chunks; i++) {
+ // constant size transfer
+ // Copy host_src value to the shared buffer
+ memcpy((void *)dma_buf_ptr, curr_host_addr, DMA_BUFFER_SIZE);
+ uint64_t dev_src = dma_buf_iova | DMA_HOST_MASK;
+
+ int len = ((DMA_BUFFER_SIZE - 1) / DMA_LINE_SIZE) + 1; // Ceiling of test_buffer_size / DMA_LINE_SIZE
+
+ dma_transfer(dev_src, curr_dest, len, host_to_ddr);
+
+ memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE);
+
+ // Update the curr source and dest
+ curr_host_addr = (const void *)(static_cast<const char *>(curr_host_addr) + DMA_BUFFER_SIZE);
+ curr_dest += DMA_BUFFER_SIZE;
+ }
+
+ // Updated the count_left for the for loop
+ count_left -= (dma_chunks * DMA_BUFFER_SIZE);
+
+ if (count_left) {
+ uint64_t dma_tx_bytes = (count_left / 64) * 64;
+ if (dma_tx_bytes != 0) {
+ assert(dma_tx_bytes <= DMA_BUFFER_SIZE && "Illegal transfer size\n");
+
+ // Copy host_src value to the shared buffer
+ memcpy((void *)dma_buf_ptr, curr_host_addr, dma_tx_bytes);
+ uint64_t dev_src = dma_buf_iova | DMA_HOST_MASK;
+
+ int len = ((dma_tx_bytes - 1) / DMA_LINE_SIZE) + 1; // Ceiling of dma_tx_bytes / DMA_LINE_SIZE
+ dma_transfer(dev_src, curr_dest, len, host_to_ddr);
+
+ memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE);
+ }
+
+ // Update the address
+ curr_host_addr = (const void *)(static_cast<const char *>(curr_host_addr) + dma_tx_bytes);
+ curr_dest += dma_tx_bytes;
+ count_left -= dma_tx_bytes;
+
+ if (count_left) {
+ MMD_DEBUG("DEBUG LOG : mmd_dma::host_to_fpga count_left after DMA transfer is ");
+ MMD_DEBUG("%" PRIu64 "\n", count_left);
+ // Handle the rest unaligned transfer using ASE
+ res = _ase_host_to_fpga(curr_dest, curr_host_addr, count_left);
+ assert(FPGA_OK == res && "_ase_host_to_fpga failed");
+ count_left = 0;
+ }
+ }
+ }
+ assert(count_left==0 && "host_to_fpga failed");
+ return 0;
+}
+
+int mmd_dma::dma_transfer(uint64_t dev_src, uint64_t dev_dest, int len, dma_mode descriptor_mode) {
+
+ // Get debug information for thread id
+ std::stringstream ss;
+ ss << std::this_thread::get_id();
+ uint64_t id = std::stoull(ss.str());
+ MMD_DEBUG("dma_transfer start current thread_id is %04lX\n", id);
+
+ // Native DMA transfer requires 64 byte alignment
+ assert(dev_src % 64 == 0);
+ assert(dev_dest % 64 == 0);
+
+ const uint64_t MASK_FOR_35BIT_ADDR = 0x7FFFFFFFF;
+
+ dma_descriptor_t desc;
+
+ MMD_DEBUG("DEBUG LOG : mmd_dma::dma_transfer starts\n");
+ MMD_DEBUG("DEBUG LOG dev_dest = %04lX\n", dev_dest);
+
+ desc.src_address = dev_src & MASK_FOR_35BIT_ADDR;
+ desc.dest_address = dev_dest & MASK_FOR_35BIT_ADDR;
+ desc.len = len;
+ desc.control = 0x80000000 | (descriptor_mode << MODE_SHIFT);
+
+ const uint64_t DMA_DESC_BASE = 8 * DMA_CSR_IDX_SRC_ADDR;
+ const uint64_t DMA_STATUS_BASE = 8 * DMA_CSR_IDX_STATUS;
+ uint64_t mmio_data = 0;
+
+ int desc_size = sizeof(desc);
+
+ MMD_DEBUG("Descriptor size = %d\n", desc_size);
+ MMD_DEBUG("desc.src_address = %04lX\n", desc.src_address);
+ MMD_DEBUG("desc.dest_address = %04lX\n", desc.dest_address);
+ MMD_DEBUG("desc.len = %d\n", desc.len);
+ MMD_DEBUG("desc.control = %04X\n", desc.control);
+ MMD_DEBUG("descriptor_mode = %04X\n", descriptor_mode);
+
+ // send descriptor
+ send_descriptor(DMA_DESC_BASE, desc);
+
+ fpga_result r;
+ r = fpgaReadMMIO64(m_fpga_handle, 0, DMA_STATUS_BASE, &mmio_data);
+ MMD_DEBUG("DMA_STATUS_BASE before = %04lX\n", mmio_data);
+ if (FPGA_OK != r) return -1;
+
+ // If the busy bit is empty, then we are done.
+ while ((mmio_data & 0x1) == 0x1) {
+ r = fpgaReadMMIO64(m_fpga_handle, 0, DMA_STATUS_BASE, &mmio_data);
+ assert(FPGA_OK == r);
+ }
+ MMD_DEBUG("dma_transfer end current thread_id is %04lX\n", id);
+ return 0;
+}
+
+// Transfer "count" bytes from HOST to FPGA using Address span expander(ASE)- will internally make
+// calls to handle unaligned and aligned MMIO writes.
+fpga_result mmd_dma::_ase_host_to_fpga(uint64_t dev_dest, const void *src_ptr, uint64_t count) {
+ MMD_DEBUG("DEBUG LOG: _ase_host_to_fpga is being called\n ");
+
+ MMD_DEBUG("DEBUG LOG : dev_dest is ");
+ MMD_DEBUG("%" PRIu64 "\n", dev_dest);
+
+ assert(count < 64); // DLA only uses ASE transfer with less than 64 Byte transfer.
+
+ fpga_result res = FPGA_OK;
+ uint64_t count_left = count;
+ uint64_t unaligned_size = 0;
+
+ // For ASE window
+ uint64_t ase_window;
+ uint64_t ase_addr;
+ uint64_t dev_addr;
+
+ const void *curr_src_ptr = src_ptr;
+
+ if (count == 0) return res;
+
+ if (dev_dest % 8 == 0) {
+ while (count > 0) {
+ ase_window = dev_dest & ~(0xfff);
+ ase_addr = (dev_dest & 0xfff); // only keep the lower 12 bits.
+
+ uint64_t mmio_base_control = ASE_MMIO_BASE + ASE_MMIO_CTRL;
+
+ MMD_DEBUG("DEBUG LOG : ase_window is ");
+ MMD_DEBUG("%" PRIu64 "\n", ase_window);
+
+ // Write to ASE control
+ res = fpgaWriteMMIO64(m_fpga_handle, 0, mmio_base_control, ase_window);
+ assert(res == FPGA_OK && "Write to ASE control failed");
+
+ // Set final dev_addr
+ // dev_addr will be 8 byte aligned as long as dev_dest is 8 byte aligned.
+ dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr;
+
+ assert(dev_addr % 8 == 0);
+
+ MMD_DEBUG("DEBUG LOG : _ase_host_to_fpga count is ");
+ MMD_DEBUG("%" PRIu64 "\n", count);
+
+ MMD_DEBUG("DEBUG LOG : dev addr is ");
+ MMD_DEBUG("%" PRIu64 "\n", dev_addr);
+
+ size_t size = (count > 8) ? 8 : count;
+ mmd_helper::write_mmio(m_fpga_handle, curr_src_ptr, dev_addr, size);
+
+ count -= size;
+ dev_dest += size;
+ curr_src_ptr = (const void *)(static_cast<const char *>(curr_src_ptr) + size);
+ }
+
+ assert(count == 0);
+
+ } else {
+ // First we need to handle the non byte aligned transfer
+
+ MMD_DEBUG("DEBUG LOG : _ase_host_to_fpga count is ");
+ MMD_DEBUG("%" PRIu64 "\n", count);
+
+ // Aligns address to 8 byte using dst masking method
+ unaligned_size = 8 - (dev_dest % 8);
+ if (unaligned_size > count_left) unaligned_size = count_left;
+
+ // Write to the unaligned address
+ assert(unaligned_size < 8);
+ uint64_t shift = dev_dest % 8;
+
+ // Write to ASE control to switch page.
+ ase_window = dev_dest & ~(0xfff);
+
+ MMD_DEBUG("DEBUG LOG : ase_window in non-aligned is ");
+ MMD_DEBUG("%" PRIu64 "\n", ase_window);
+
+ fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window);
+
+ // Get aligned dest address
+ uint64_t dev_aligned_addr = dev_dest - shift;
+ assert(dev_aligned_addr % 8 == 0);
+
+ // read data from device memory with aligned dev dest
+ ase_addr = (dev_aligned_addr & 0xfff);
+ dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr;
+ uint64_t read_tmp = 0;
+ fpgaReadMMIO64(m_fpga_handle, 0, dev_addr, &read_tmp);
+
+ // overlay our data, check if the shift is correct here
+ memcpy((reinterpret_cast<char *>(&read_tmp) + shift), src_ptr, unaligned_size);
+
+ // Write back data to the device
+ fpgaWriteMMIO64(m_fpga_handle, 0, dev_addr, read_tmp);
+
+ count_left -= unaligned_size;
+
+ // Check if there is any byte left
+ if (count_left == 0) {
+ return res;
+ }
+
+ // Now the dest address should be byte aligned now
+ // Start the regular ASE transfer
+
+ const void *curr_src_ptr = (const void *)(static_cast<const char *>(src_ptr) + unaligned_size);
+ uint64_t next_dev_dest = dev_dest + unaligned_size;
+
+ while (count_left > 0) {
+ ase_window = next_dev_dest & ~(0xfff);
+ ase_addr = (next_dev_dest & 0xfff); // only keep the lower 12 bits.
+
+ MMD_DEBUG("DEBUG LOG : ase_window in non-aligned loop is ");
+ MMD_DEBUG("%" PRIu64 "\n", ase_window);
+
+ // Write to ASE control
+ fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window);
+
+ // Set final dev_addr
+ dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr;
+
+ assert(dev_addr % 8 == 0);
+
+ size_t size = (count_left > 8) ? 8 : count_left;
+ mmd_helper::write_mmio(m_fpga_handle,
+ curr_src_ptr,
+ dev_addr,
+ size);
+
+ count_left -= size;
+ next_dev_dest += size;
+ curr_src_ptr = (const void *)(static_cast<const char *>(curr_src_ptr) + size);
+ }
+ assert(count_left == 0);
+ }
+
+ return FPGA_OK;
+}
+
+// Transfer "count" bytes from FPGA to HOST using Address span expander(ASE)- will internally make
+// calls to handle unaligned and aligned MMIO reads.
+fpga_result mmd_dma::_ase_fpga_to_host(uint64_t dev_dest, void *host_ptr, uint64_t count) {
+ MMD_DEBUG("DEBUG LOG : _ase_fpga_to_host is being called\n ");
+
+ assert(count < 64);
+
+ fpga_result res = FPGA_OK;
+ uint64_t count_left = count;
+ uint64_t unaligned_size = 0;
+
+ // For ASE window
+
+ uint64_t ase_window;
+ uint64_t ase_addr;
+ uint64_t dev_addr;
+
+ if (count == 0) return res;
+
+ void *curr_host_ptr = host_ptr;
+
+ if (dev_dest % 8 == 0) {
+ while (count > 0) {
+ ase_window = dev_dest & ~(0xfff);
+ ase_addr = (dev_dest & 0xfff); // only keep the lower 12 bits.
+
+ MMD_DEBUG("DEBUG LOG : ase_window is ");
+ MMD_DEBUG("%" PRIu64 "\n", ase_window);
+
+ // Write to ASE control to switch page.
+ fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window);
+
+ // Set final dev_addr
+ // dev_addr will be 8 byte aligned as long as dev_dest is 8 byte aligned.
+ dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr;
+
+ assert(dev_addr % 8 == 0);
+
+ size_t size = (count > 8) ? 8 : count;
+
+ mmd_helper::read_mmio(m_fpga_handle, curr_host_ptr, dev_addr, size);
+
+ count -= size;
+ dev_dest += size;
+ curr_host_ptr = (void *)(static_cast<char *>(curr_host_ptr) + size);
+ }
+
+ } else {
+ // First we need to handle the non byte aligned transfer
+
+ // Aligns address to 8 byte using dst masking method
+ unaligned_size = 8 - (dev_dest % 8);
+ if (unaligned_size > count_left) unaligned_size = count_left;
+
+ // Write to the unaligned address
+ assert(unaligned_size < 8);
+ uint64_t shift = dev_dest % 8;
+
+ // Write to ASE control to switch page.
+ ase_window = dev_dest & ~(0xfff);
+
+ MMD_DEBUG("DEBUG LOG : ase_window is ");
+ MMD_DEBUG("%" PRIu64 "\n", ase_window);
+
+ fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window);
+
+ // Get aligned dest address
+ uint64_t dev_aligned_addr = dev_dest - shift;
+ assert(dev_aligned_addr % 8 == 0);
+
+ // read data from device memory with aligned dev dest
+ ase_addr = (dev_aligned_addr & 0xfff);
+ dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr;
+
+ uint64_t read_tmp = 0;
+ fpgaReadMMIO64(m_fpga_handle, 0, dev_addr, &read_tmp);
+
+ // overlay our data
+ memcpy(host_ptr, (reinterpret_cast<char *>(&read_tmp) + shift), unaligned_size);
+
+ count_left -= unaligned_size;
+
+ // Check if there is any byte left
+ if (count_left == 0) {
+ return res;
+ }
+
+ // Now the dest address should be byte aligned now
+ // Start the regular ASE transfer
+ curr_host_ptr = (void *)(static_cast<char *>(host_ptr) + unaligned_size);
+ uint64_t next_dev_dest = dev_dest + unaligned_size;
+
+ while (count_left > 0) {
+ ase_window = next_dev_dest & ~(0xfff);
+ ase_addr = (next_dev_dest & 0xfff); // only keep the lower 12 bits.
+
+ // Write to ASE control to switch page.
+ fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window);
+
+ // Set final dev_addr
+ dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr;
+
+ assert(dev_addr % 8 == 0);
+
+ size_t size = (count_left > 8) ? 8 : count_left;
+ mmd_helper::read_mmio(m_fpga_handle, curr_host_ptr, dev_addr, size);
+
+ count_left -= size;
+ next_dev_dest += size;
+ curr_host_ptr = (void *)(static_cast<char *>(curr_host_ptr) + size);
+ }
+
+ assert(count_left == 0);
+ }
+ return FPGA_OK;
+}
+} // namespace intel_opae_mmd
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.h
new file mode 100644
index 0000000..a2841b1
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.h
@@ -0,0 +1,89 @@
+// (c) 1992-2024 Intel Corporation.
+// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
+// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
+// and/or other countries. Other marks and brands may be claimed as the property
+// of others. See Trademarks on intel.com for full list of Intel trademarks or
+// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
+// Your use of Intel Corporation's design tools, logic functions and other
+// software and tools, and its AMPP partner logic functions, and any output
+// files any of the foregoing (including device programming or simulation
+// files), and any associated documentation or information are expressly subject
+// to the terms and conditions of the Altera Program License Subscription
+// Agreement, Intel MegaCore Function License Agreement, or other applicable
+// license agreement, including, without limitation, that your use is for the
+// sole purpose of programming logic devices manufactured by Intel and sold by
+// Intel or its authorized distributors. Please refer to the applicable
+// agreement for further details.
+#ifndef MMD_DMA_H_
+#define MMD_DMA_H_
+
+#include <opae/fpga.h>
+#include <poll.h>
+
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <unordered_map>
+
+#include "aocl_mmd.h"
+#include "mmd_helper.h"
+
+#define DMA_CSR_IDX_SRC_ADDR 0x5
+#define DMA_CSR_IDX_STATUS 0x9
+#define MODE_SHIFT 26
+// For now limits to 16K to avoid DMA transfer hang in hw, further testing required to increase the value.
+#define DMA_BUFFER_SIZE (1024 * 16)
+#define DMA_LINE_SIZE 64
+#define DMA_HOST_MASK 0x2000000000000
+
+#define ASE_MMIO_BASE 0x20000
+#define ASE_MMIO_CTRL 0x200
+#define ASE_MMIO_WINDOW 0x1000
+
+namespace intel_opae_mmd {
+
+enum dma_mode { stand_by = 0x0, host_to_ddr = 0x1, ddr_to_host = 0x2, ddr_to_ddr = 0x3 };
+
+struct dma_descriptor_t {
+ uint64_t src_address;
+ uint64_t dest_address;
+ uint32_t len;
+ uint32_t control;
+};
+
+class mmd_dma final {
+ public:
+ mmd_dma(fpga_handle fpga_handle_arg, int mmd_handle);
+ ~mmd_dma();
+
+ bool initialized() { return m_initialized; }
+
+ int fpga_to_host(void *host_addr, uint64_t dev_src, size_t size);
+ int host_to_fpga(const void *host_addr, uint64_t dev_dest, size_t size);
+ int dma_transfer(uint64_t dev_src, uint64_t dev_dest, int len, dma_mode descriptor_mode);
+ fpga_result _ase_host_to_fpga(uint64_t dev_dest, const void *src_ptr, uint64_t count);
+ fpga_result _ase_fpga_to_host(uint64_t dev_dest, void *host_ptr, uint64_t count);
+ mmd_dma(mmd_dma &other) = delete;
+ mmd_dma &operator=(const mmd_dma &other) = delete;
+
+ private:
+ // Helper functions
+ int send_descriptor(uint64_t mmio_dst, dma_descriptor_t desc);
+ // Member variables
+ bool m_initialized;
+ fpga_handle m_fpga_handle;
+
+ // Shared buffer in host memory
+ uint64_t *dma_buf_ptr = NULL;
+ // Workspace ID used by OPAE to identify buffer
+ uint64_t dma_buf_wsid;
+ // IO virtual address
+ uint64_t dma_buf_iova;
+};
+
+}; // namespace intel_opae_mmd
+
+#endif // MMD_DMA_H_
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.cpp
new file mode 100644
index 0000000..4af482a
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.cpp
@@ -0,0 +1,163 @@
+// (c) 1992-2024 Intel Corporation.
+// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
+// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
+// and/or other countries. Other marks and brands may be claimed as the property
+// of others. See Trademarks on intel.com for full list of Intel trademarks or
+// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
+// Your use of Intel Corporation's design tools, logic functions and other
+// software and tools, and its AMPP partner logic functions, and any output
+// files any of the foregoing (including device programming or simulation
+// files), and any associated documentation or information are expressly subject
+// to the terms and conditions of the Altera Program License Subscription
+// Agreement, Intel MegaCore Function License Agreement, or other applicable
+// license agreement, including, without limitation, that your use is for the
+// sole purpose of programming logic devices manufactured by Intel and sold by
+// Intel or its authorized distributors. Please refer to the applicable
+// agreement for further details.
+
+#include "mmd_helper.h"
+#include <inttypes.h>
+
+namespace mmd_helper {
+
+int read_mmio(fpga_handle mmio_handle, void *host_addr, size_t mmio_addr, size_t size) {
+ fpga_result res = FPGA_OK;
+
+ MMD_DEBUG("DEBUG LOG : Device::read_mmio start: host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n",
+ host_addr,
+ mmio_addr,
+ size);
+
+ if (mmio_addr % 4 != 0) {
+ MMD_DEBUG("DEBUG LOG : ead_mmio function doesn't support non 4 Byte aligned mmio_addr due to OPAE\n");
+ return -1;
+ }
+
+ uint64_t *host_addr64 = static_cast<uint64_t *>(host_addr);
+
+ while (size >= 8) {
+ MMD_DEBUG("DEBUG LOG : Using fpgaReadMMIO64() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x8\n",
+ host_addr64,
+ mmio_addr);
+ res = fpgaReadMMIO64(mmio_handle, 0, mmio_addr, host_addr64);
+ if (res != FPGA_OK) {
+ MMD_DEBUG(
+ "DEBUG LOG : Error in read_mmio() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x8\n", host_addr64, mmio_addr);
+ return -1;
+ }
+ MMD_DEBUG("DEBUG LOG : the host_addr64 value is ");
+ MMD_DEBUG("%" PRIu64 "\n", *host_addr64);
+ host_addr64 += 1;
+ mmio_addr += 8;
+ size -= 8;
+ }
+
+ uint32_t *host_addr32 = reinterpret_cast<uint32_t *>(host_addr64);
+ while (size >= 4) {
+ MMD_DEBUG("DEBUG LOG : Using fpgaReadMMIO32() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x4\n",
+ host_addr32,
+ mmio_addr);
+ res = fpgaReadMMIO32(mmio_handle, 0, mmio_addr, host_addr32);
+ if (res != FPGA_OK) {
+ MMD_DEBUG(
+ "DEBUG LOG : Error in read_mmio() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x4\n", host_addr32, mmio_addr);
+ return -1;
+ }
+ host_addr32 += 1;
+ mmio_addr += 4;
+ size -= 4;
+ }
+
+ if (size > 0) {
+ uint32_t read_data;
+ MMD_DEBUG("DEBUG LOG : Using fpgaReadMMIO32() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n",
+ host_addr,
+ mmio_addr,
+ size);
+ res = fpgaReadMMIO32(mmio_handle, 0, mmio_addr, &read_data);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : Error in read_mmio() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n",
+ host_addr,
+ mmio_addr,
+ size);
+ MMD_DEBUG("result is %d \n", res);
+ return -1;
+ }
+
+ memcpy(host_addr32, &read_data, size);
+ }
+
+ return res;
+}
+
+int write_mmio(fpga_handle mmio_handle, const void *host_addr, size_t mmio_addr, size_t size) {
+ fpga_result res = FPGA_OK;
+
+ MMD_DEBUG("DEBUG LOG : Device::write_mmio start: host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n",
+ host_addr,
+ mmio_addr,
+ size);
+
+ const uint64_t *host_addr64 = static_cast<const uint64_t *>(host_addr);
+ while (size >= 8) {
+ MMD_DEBUG("DEBUG LOG : Using fpgaWriteMMIO64() host_addr : %p\t mmio_addr : 0x%zx\t \n",
+ host_addr64,
+ mmio_addr);
+ res = fpgaWriteMMIO64(mmio_handle, 0, mmio_addr, *host_addr64);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : Error in write_mmio() host_addr : %p\t mmio_addr : 0x%zx\t \n",
+ host_addr64,
+ mmio_addr);
+ return -1;
+ }
+ host_addr64 += 1;
+ mmio_addr += 8;
+ size -= 8;
+ }
+
+ const uint32_t *host_addr32 = reinterpret_cast<const uint32_t *>(host_addr64);
+
+ while (size >= 4) {
+ MMD_DEBUG("DEBUG LOG : Using fpgaWriteMMIO32() host_addr : %p\t mmio_addr : 0x%zx\t \n",
+ host_addr32,
+ mmio_addr);
+ res = fpgaWriteMMIO32(mmio_handle, 0, mmio_addr, *host_addr32);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : Error in write_mmio() host_addr : %p\t mmio_addr : 0x%zx\t\n",
+ host_addr32,
+ mmio_addr);
+ return -1;
+ }
+ host_addr32 += 1;
+ mmio_addr += 4;
+ size -= 4;
+ }
+
+ while (size > 0) {
+ MMD_DEBUG("DEBUG LOG : Using fpgaWriteMMIO32() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n",
+ host_addr32,
+ mmio_addr,
+ size);
+ uint32_t tmp_data32 = 0;
+ fpgaReadMMIO32(mmio_handle, 0, mmio_addr, &tmp_data32); // First read the data back
+ size_t chunk_size = (size >= 4) ? 4 : size;
+
+ memcpy(&tmp_data32, host_addr32, chunk_size); // Apply our data overlay
+
+ res = fpgaWriteMMIO32(mmio_handle, 0, mmio_addr, tmp_data32);
+ if (res != FPGA_OK) {
+ MMD_DEBUG("DEBUG LOG : Error in write_mmio() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n",
+ host_addr32,
+ mmio_addr,
+ size);
+ return -1;
+ }
+ host_addr32 += 1;
+ mmio_addr += chunk_size;
+ size -= chunk_size;
+ }
+
+ return 0;
+}
+
+}; // namespace mmd_helper
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.h
new file mode 100644
index 0000000..b7e2667
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.h
@@ -0,0 +1,41 @@
+// (c) 1992-2024 Intel Corporation.
+// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
+// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
+// and/or other countries. Other marks and brands may be claimed as the property
+// of others. See Trademarks on intel.com for full list of Intel trademarks or
+// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
+// Your use of Intel Corporation's design tools, logic functions and other
+// software and tools, and its AMPP partner logic functions, and any output
+// files any of the foregoing (including device programming or simulation
+// files), and any associated documentation or information are expressly subject
+// to the terms and conditions of the Altera Program License Subscription
+// Agreement, Intel MegaCore Function License Agreement, or other applicable
+// license agreement, including, without limitation, that your use is for the
+// sole purpose of programming logic devices manufactured by Intel and sold by
+// Intel or its authorized distributors. Please refer to the applicable
+// agreement for further details.
+
+#ifndef MMD_HELPER_H
+#define MMD_HELPER_H
+
+#include <opae/fpga.h>
+#include <stdarg.h>
+
+inline void MMD_DEBUG(const char *format, ...) {
+ if (std::getenv("MMD_ENABLE_DEBUG")) {
+ va_list arglist;
+ va_start(arglist, format);
+ vprintf(format, arglist);
+ va_end(arglist);
+ fflush(stdout);
+ }
+}
+
+namespace mmd_helper {
+
+int read_mmio(fpga_handle mmio_handle, void *host_addr, size_t mmio_addr, size_t size);
+int write_mmio(fpga_handle mmio_handle, const void *host_addr, size_t mmio_addr, size_t size);
+
+}; // namespace mmd_helper
+
+#endif // MMD_HELPER_H
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/include/aocl_mmd.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/include/aocl_mmd.h
new file mode 100644
index 0000000..16992da
--- /dev/null
+++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/include/aocl_mmd.h
@@ -0,0 +1,377 @@
+// Copyright 2022 Intel Corporation
+// SPDX-License-Identifier: MIT
+
+#ifndef AOCL_MMD_H
+#define AOCL_MMD_H
+
+/* TODO: this file comes from OpenCL SDK and should be formatted there first */
+/* clang-format off */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Support for memory mapped ACL devices.
+ *
+ * Typical API lifecycle, from the perspective of the caller.
+ *
+ * 1. aocl_mmd_open must be called first, to provide a handle for further
+ * operations.
+ *
+ * 2. The interrupt and status handlers must be set.
+ *
+ * 3. Read and write operations are performed.
+ *
+ * 4. aocl_mmd_close may be called to shut down the device. No further
+ * operations are permitted until a subsequent aocl_mmd_open call.
+ *
+ * aocl_mmd_get_offline_info can be called anytime including before
+ * open. aocl_mmd_get_info can be called anytime between open and close.
+ */
+
+// #ifndef AOCL_MMD_CALL
+// #if defined(_WIN32)
+// #define AOCL_MMD_CALL __declspec(dllimport)
+// #else
+// #define AOCL_MMD_CALL
+// #endif
+// #endif
+
+#ifndef AOCL_MMD_CALL
+#if defined(_WIN32)
+#define AOCL_MMD_CALL __declspec(dllimport)
+#else
+#define AOCL_MMD_CALL __attribute__((visibility ("default")))
+#endif
+#endif
+
+#ifndef WEAK
+#if defined(_WIN32)
+#define WEAK
+#else
+#define WEAK __attribute__((weak))
+#endif
+#endif
+
+#ifdef __cplusplus
+#include <cstddef> //size_t
+#else
+#include <stddef.h> //size_t
+#endif
+
+/* The MMD API's version - the runtime expects this string when
+ * AOCL_MMD_VERSION is queried. This changes only if the API has changed */
+#define AOCL_MMD_VERSION_STRING "20.3"
+
+/* Memory types that can be supported - bitfield. Other than physical memory
+ * these types closely align with the OpenCL SVM types.
+ *
+ * AOCL_MMD_PHYSICAL_MEMORY - The vendor interface includes IP to communicate
+ * directly with physical memory such as DDR, QDR, etc.
+ *
+ * AOCL_MMD_SVM_COARSE_GRAIN_BUFFER - The vendor interface includes support for
+ * caching SVM pointer data and requires explicit function calls from the user
+ * to synchronize the cache between the host processor and the FPGA. This level
+ * of SVM is not currently supported by Altera except as a subset of
+ * SVM_FINE_GAIN_SYSTEM support.
+ *
+ * AOCL_MMD_SVM_FINE_GRAIN_BUFFER - The vendor interface includes support for
+ * caching SVM pointer data and requires additional information from the user
+ * and/or host runtime that can be collected during pointer allocation in order
+ * to synchronize the cache between the host processor and the FPGA. Once this
+ * additional data is provided for an SVM pointer, the vendor interface handles
+ * cache synchronization between the host processor & the FPGA automatically.
+ * This level of SVM is not currently supported by Altera except as a subset
+ * of SVM_FINE_GRAIN_SYSTEM support.
+ *
+ * AOCL_MMD_SVM_FINE_GRAIN_SYSTEM - The vendor interface includes support for
+ * caching SVM pointer data and does not require any additional information to
+ * synchronize the cache between the host processor and the FPGA. The vendor
+ * interface handles cache synchronization between the host processor & the
+ * FPGA automatically for all SVM pointers. This level of SVM support is
+ * currently under development by Altera and some features may not be fully
+ * supported.
+ */
+#define AOCL_MMD_PHYSICAL_MEMORY (1 << 0)
+#define AOCL_MMD_SVM_COARSE_GRAIN_BUFFER (1 << 1)
+#define AOCL_MMD_SVM_FINE_GRAIN_BUFFER (1 << 2)
+#define AOCL_MMD_SVM_FINE_GRAIN_SYSTEM (1 << 3)
+
+/* program modes - bitfield
+ *
+ * AOCL_MMD_PROGRAM_PRESERVE_GLOBAL_MEM - preserve contents of global memory
+ * when this bit is set to 1. If programming can't occur without preserving
+ * global memory contents, the program function must fail, in which case the
+ * runtime may re-invoke program with this bit set to 0, allowing programming
+ * to occur even if doing so destroys global memory contents.
+ *
+ * more modes are reserved for stacking on in the future
+ */
+#define AOCL_MMD_PROGRAM_PRESERVE_GLOBAL_MEM (1 << 0)
+typedef int aocl_mmd_program_mode_t;
+
+
+typedef void* aocl_mmd_op_t;
+
+typedef struct {
+ unsigned lo; /* 32 least significant bits of time value. */
+ unsigned hi; /* 32 most significant bits of time value. */
+} aocl_mmd_timestamp_t;
+
+
+/* Defines the set of characteristics that can be probed about the board before
+ * opening a device. The type of data returned by each is specified in
+ * parentheses in the adjacent comment.
+ *
+ * AOCL_MMD_NUM_BOARDS and AOCL_MMD_BOARD_NAMES
+ * These two fields can be used to implement multi-device support. The MMD
+ * layer may have a list of devices it is capable of interacting with, each
+ * identified with a unique name. The length of the list should be returned
+ * in AOCL_MMD_NUM_BOARDS, and the names of these devices returned in
+ * AOCL_MMD_BOARD_NAMES. The OpenCL runtime will try to call aocl_mmd_open
+ * for each board name returned in AOCL_MMD_BOARD_NAMES.
+ */
+typedef enum {
+ AOCL_MMD_VERSION = 0, /* Version of MMD (char*)*/
+ AOCL_MMD_NUM_BOARDS = 1, /* Number of candidate boards (int)*/
+ AOCL_MMD_BOARD_NAMES = 2, /* Names of boards available delimiter=; (char*)*/
+ AOCL_MMD_VENDOR_NAME = 3, /* Name of vendor (char*) */
+ AOCL_MMD_VENDOR_ID = 4, /* An integer ID for the vendor (int) */
+ AOCL_MMD_USES_YIELD = 5, /* 1 if yield must be called to poll hw (int) */
+ /* The following can be combined in a bit field:
+ * AOCL_MMD_PHYSICAL_MEMORY, AOCL_MMD_SVM_COARSE_GRAIN_BUFFER, AOCL_MMD_SVM_FINE_GRAIN_BUFFER, AOCL_MMD_SVM_FINE_GRAIN_SYSTEM.
+ * Prior to 14.1, all existing devices supported physical memory and no types of SVM memory, so this
+ * is the default when this operation returns '0' for board MMDs with a version prior to 14.1
+ */
+ AOCL_MMD_MEM_TYPES_SUPPORTED = 6,
+} aocl_mmd_offline_info_t;
+
+
+/** Possible capabilities to return from AOCL_MMD_*_MEM_CAPABILITIES query */
+/**
+ * If not set allocation function is not supported, even if other capabilities are set.
+ */
+#define AOCL_MMD_MEM_CAPABILITY_SUPPORTED (1 << 0)
+/**
+ * Supports atomic access to the memory by either the host or device.
+ */
+#define AOCL_MMD_MEM_CAPABILITY_ATOMIC (1 << 1)
+/**
+ * Supports concurrent access to the memory either by host or device if the
+ * accesses are not on the same block. Block granularity is defined by
+ * AOCL_MMD_*_MEM_CONCURRENT_GRANULARITY., blocks are aligned to this
+ * granularity
+ */
+#define AOCL_MMD_MEM_CAPABILITY_CONCURRENT (1 << 2)
+/**
+ * Memory can be accessed by multiple devices at the same time.
+ */
+#define AOCL_MMD_MEM_CAPABILITY_P2P (1 << 3)
+
+
+/* Defines the set of characteristics that can be probed about the board after
+ * opening a device. This can involve communication to the device
+ *
+ * AOCL_MMD_NUM_KERNEL_INTERFACES - The number of kernel interfaces, usually 1
+ *
+ * AOCL_MMD_KERNEL_INTERFACES - the handle for each kernel interface.
+ * param_value will have size AOCL_MMD_NUM_KERNEL_INTERFACES * sizeof int
+ *
+ * AOCL_MMD_PLL_INTERFACES - the handle for each pll associated with each
+ * kernel interface. If a kernel interface is not clocked by acl_kernel_clk
+ * then return -1
+ *
+ * */
+typedef enum {
+ AOCL_MMD_NUM_KERNEL_INTERFACES = 1, /* Number of Kernel interfaces (int) */
+ AOCL_MMD_KERNEL_INTERFACES = 2, /* Kernel interface (int*) */
+ AOCL_MMD_PLL_INTERFACES = 3, /* Kernel clk handles (int*) */
+ AOCL_MMD_MEMORY_INTERFACE = 4, /* Global memory handle (int) */
+ AOCL_MMD_TEMPERATURE = 5, /* Temperature measurement (float) */
+ AOCL_MMD_PCIE_INFO = 6, /* PCIe information (char*) */
+ AOCL_MMD_BOARD_NAME = 7, /* Name of board (char*) */
+ AOCL_MMD_BOARD_UNIQUE_ID = 8, /* Unique ID of board (int) */
+ AOCL_MMD_CONCURRENT_READS = 9, /* # of parallel reads; 1 is serial*/
+ AOCL_MMD_CONCURRENT_WRITES = 10, /* # of parallel writes; 1 is serial*/
+ AOCL_MMD_CONCURRENT_READS_OR_WRITES = 11, /* total # of concurrent operations read + writes*/
+ AOCL_MMD_MIN_HOST_MEMORY_ALIGNMENT = 12, /* Min alignment that the ASP supports for host allocations (size_t) */
+ AOCL_MMD_HOST_MEM_CAPABILITIES = 13, /* Capabilities of aocl_mmd_host_alloc() (unsigned int)*/
+ AOCL_MMD_SHARED_MEM_CAPABILITIES = 14, /* Capabilities of aocl_mmd_shared_alloc (unsigned int)*/
+ AOCL_MMD_DEVICE_MEM_CAPABILITIES = 15, /* Capabilities of aocl_mmd_device_alloc (unsigned int)*/
+ AOCL_MMD_HOST_MEM_CONCURRENT_GRANULARITY = 16, /*(size_t)*/
+ AOCL_MMD_SHARED_MEM_CONCURRENT_GRANULARITY = 17, /*(size_t)*/
+ AOCL_MMD_DEVICE_MEM_CONCURRENT_GRANULARITY = 18, /*(size_t)*/
+} aocl_mmd_info_t;
+
+typedef struct {
+ unsigned long long int exception_type;
+ void *user_private_info;
+ size_t user_cb;
+}aocl_mmd_interrupt_info;
+
+typedef void (*aocl_mmd_interrupt_handler_fn)( int handle, void* user_data );
+typedef void (*aocl_mmd_device_interrupt_handler_fn)( int handle, aocl_mmd_interrupt_info* data_in, void* user_data );
+typedef void (*aocl_mmd_status_handler_fn)( int handle, void* user_data, aocl_mmd_op_t op, int status );
+
+
+/* Get information about the board using the enum aocl_mmd_offline_info_t for
+ * offline info (called without a handle), and the enum aocl_mmd_info_t for
+ * info specific to a certain board.
+ * Arguments:
+ *
+ * requested_info_id - a value from the aocl_mmd_offline_info_t enum
+ *
+ * param_value_size - size of the param_value field in bytes. This should
+ * match the size of the return type expected as indicated in the enum
+ * definition. For example, the AOCL_MMD_TEMPERATURE returns a float, so
+ * the param_value_size should be set to sizeof(float) and you should
+ * expect the same number of bytes returned in param_size_ret.
+ *
+ * param_value - pointer to the variable that will receive the returned info
+ *
+ * param_size_ret - receives the number of bytes of data actually returned
+ *
+ * Returns: a negative value to indicate error.
+ */
+AOCL_MMD_CALL int aocl_mmd_get_offline_info(
+ aocl_mmd_offline_info_t requested_info_id,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_size_ret ) WEAK;
+
+AOCL_MMD_CALL int aocl_mmd_get_info(
+ int handle,
+ aocl_mmd_info_t requested_info_id,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_size_ret ) WEAK;
+
+/* Open and initialize the named device.
+ *
+ * The name is typically one specified by the AOCL_MMD_BOARD_NAMES offline
+ * info.
+ *
+ * Arguments:
+ * name - open the board with this name (provided as a C-style string,
+ * i.e. NUL terminated ASCII.)
+ *
+ * Returns: the non-negative integer handle for the board, otherwise a
+ * negative value to indicate error. Upon receiving the error, the OpenCL
+ * runtime will proceed to open other known devices, hence the MMD mustn't
+ * exit the application if an open call fails.
+ */
+AOCL_MMD_CALL int aocl_mmd_open(const char *name) WEAK;
+
+/* Close an opened device, by its handle.
+ * Returns: 0 on success, negative values on error.
+ */
+AOCL_MMD_CALL int aocl_mmd_close(int handle) WEAK;
+
+/* Set the interrupt handler for the opened device.
+ * The interrupt handler is called whenever the client needs to be notified
+ * of an asynchronous event signaled by the device internals.
+ * For example, the kernel has completed or is stalled.
+ *
+ * Important: Interrupts from the kernel must be ignored until this handler is
+ * set
+ *
+ * Arguments:
+ * fn - the callback function to invoke when a kernel interrupt occurs
+ * user_data - the data that should be passed to fn when it is called.
+ *
+ * Returns: 0 if successful, negative on error
+ */
+AOCL_MMD_CALL int aocl_mmd_set_interrupt_handler( int handle, aocl_mmd_interrupt_handler_fn fn, void* user_data ) WEAK;
+
+/* Set the operation status handler for the opened device.
+ * The operation status handler is called with
+ * status 0 when the operation has completed successfully.
+ * status negative when the operation completed with errors.
+ *
+ * Arguments:
+ * fn - the callback function to invoke when a status update is to be
+ * performed.
+ * user_data - the data that should be passed to fn when it is called.
+ *
+ * Returns: 0 if successful, negative on error
+ */
+AOCL_MMD_CALL int aocl_mmd_set_status_handler( int handle, aocl_mmd_status_handler_fn fn, void* user_data ) WEAK;
+
+/* Read, write and copy operations on a single interface.
+ * If op is NULL
+ * - Then these calls must block until the operation is complete.
+ * - The status handler is not called for this operation.
+ *
+ * If op is non-NULL, then:
+ * - These may be non-blocking calls
+ * - The status handler must be called upon completion, with status 0
+ * for success, and a negative value for failure.
+ *
+ * Arguments:
+ * op - the operation object used to track this operations progress
+ *
+ * len - the size in bytes to transfer
+ *
+ * src - the host buffer being read from
+ *
+ * dst - the host buffer being written to
+ *
+ * mmd_interface - the handle to the interface being accessed. E.g. To
+ * access global memory this handle will be whatever is returned by
+ * aocl_mmd_get_info when called with AOCL_MMD_MEMORY_INTERFACE.
+ *
+ * offset/src_offset/dst_offset - the byte offset within the interface that
+ * the transfer will begin at.
+ *
+ * The return value is 0 if the operation launch was successful, and
+ * negative otherwise.
+ */
+AOCL_MMD_CALL int aocl_mmd_read(
+ int handle,
+ aocl_mmd_op_t op,
+ size_t len,
+ void* dst,
+ int mmd_interface, size_t offset) WEAK;
+AOCL_MMD_CALL int aocl_mmd_write(
+ int handle,
+ aocl_mmd_op_t op,
+ size_t len,
+ const void* src,
+ int mmd_interface, size_t offset ) WEAK;
+
+/** Error values*/
+#define AOCL_MMD_ERROR_SUCCESS 0
+#define AOCL_MMD_ERROR_INVALID_HANDLE -1
+#define AOCL_MMD_ERROR_OUT_OF_MEMORY -2
+#define AOCL_MMD_ERROR_UNSUPPORTED_ALIGNMENT -3
+#define AOCL_MMD_ERROR_UNSUPPORTED_PROPERTY -4
+#define AOCL_MMD_ERROR_INVALID_POINTER -5
+#define AOCL_MMD_ERROR_INVALID_MIGRATION_SIZE -6
+
+// CoreDLA modifications
+// To support multiple different FPGA boards, anything board specific must be implemented in a
+// board-specific MMD instead of the CoreDLA runtime layer.
+#ifdef DLA_MMD
+#include <cstdint>
+// Query functions to get board-specific values
+AOCL_MMD_CALL int dla_mmd_get_max_num_instances() WEAK;
+AOCL_MMD_CALL uint64_t dla_mmd_get_ddr_size_per_instance() WEAK;
+AOCL_MMD_CALL double dla_mmd_get_ddr_clock_freq() WEAK;
+
+// Wrappers around CSR and DDR reads and writes to abstract away board-specific offsets
+AOCL_MMD_CALL int dla_mmd_csr_write(int handle, int instance, uint64_t addr, const uint32_t* data) WEAK;
+AOCL_MMD_CALL int dla_mmd_csr_read(int handle, int instance, uint64_t addr, uint32_t* data) WEAK;
+AOCL_MMD_CALL int dla_mmd_ddr_write(int handle, int instance, uint64_t addr, uint64_t length, const void* data) WEAK;
+AOCL_MMD_CALL int dla_mmd_ddr_read(int handle, int instance, uint64_t addr, uint64_t length, void* data) WEAK;
+
+// Get the clk_dla PLL clock frequency in MHz, returns a negative value if there is an error
+AOCL_MMD_CALL double dla_mmd_get_coredla_clock_freq(int handle) WEAK;
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+/* clang-format on */
+#endif