diff options
| author | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
|---|---|---|
| committer | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
| commit | ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch) | |
| tree | a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie | |
| parent | 40da1752f2c8639186b72f6838aa415e854d0b1d (diff) | |
| download | thesis-master.tar.gz thesis-master.tar.bz2 thesis-master.zip | |
Diffstat (limited to 'python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie')
13 files changed, 3137 insertions, 0 deletions
diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/CMakeLists.txt b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/CMakeLists.txt new file mode 100644 index 0000000..445a304 --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/CMakeLists.txt @@ -0,0 +1,62 @@ +# (C) 2017 Intel Corporation. All rights reserved. +# Your use of Intel Corporation's design tools, logic functions and other +# software and tools, and its AMPP partner logic functions, and any output +# files any of the foregoing (including device programming or simulation +# files), and any associated documentation or information are expressly subject +# to the terms and conditions of the Intel Program License Subscription +# Agreement, Intel MegaCore Function License Agreement, or other applicable +# license agreement, including, without limitation, that your use is for the +# sole purpose of programming logic devices manufactured by Intel and sold by +# Intel or its authorized distributors. Please refer to the applicable +# agreement for further details. + +cmake_minimum_required(VERSION 2.8.12) +project(mmd) + +add_definitions(-DI_DK_AFU_ID="11446C9D-AA42-4085-9B3D-4EEF9429A4AD") + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") + +find_package(OPAE REQUIRED) +find_package(NUMA REQUIRED) + +# DLA specific modifications made to the MMD +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDLA_MMD") + +enable_language(C ASM) + +set(ASM_OPTIONS "-x assembler-with-cpp") +if(${CMAKE_C_COMPILER_ID} STREQUAL "Clang") + set(ASM_OPTIONS "${ASM_OPTIONS} -no-integrated-as") +endif() + +set(CMAKE_ASM_FLAGS "${CFLAGS} ${ASM_OPTIONS}") + +set(MMD_SRC + ./host/mmd.cpp + ./host/mmd_device.cpp + ./host/mmd_dma.cpp + ./host/mmd_helper.cpp + ./host/kernel_interrupt.cpp +) + +# Add a shared library target called intel_opae_mmd +# and build it from the MMD_SRC files +add_library(intel_opae_mmd SHARED ${MMD_SRC}) + +# Specify the include directories to be used when compiling intel_opae_mmd library +target_include_directories(intel_opae_mmd PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/include + ) + +# Specify libraries needed when linking the intel_opae_mmd library +target_link_libraries(intel_opae_mmd + libopae-c + libnuma +) + +# Set the installation rules for the project +install(TARGETS intel_opae_mmd + LIBRARY DESTINATION lib + COMPONENT intel_opae_mmd +) diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindNUMA.cmake b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindNUMA.cmake new file mode 100755 index 0000000..c981150 --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindNUMA.cmake @@ -0,0 +1,34 @@ +# - Try to find libnuma +# Once done will define: +# +# NUMA_FOUND - system has libnuma +# NUMA_INCLUDE_DIRS - include directory with numa.h +# NUMA_LIBRARIES - link with this for libnuma + +find_path(NUMA_INCLUDE_DIRS + NAMES numa.h + PATHS + ${LIBNUMA_ROOT}/include + /usr/include + /p/psg/swip/dla/resources/numactl/2.0.16/include + + ) + +find_library(NUMA_LIBRARIES + NAMES numa + PATHS + ${LIBNUMA_ROOT}/lib + ${LIBNUMA_ROOT}/lib64 + /usr/lib + /usr/lib64 + /p/psg/swip/dla/resources/numactl/2.0.16/lib + + ) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS(NUMA + REQUIRED_VARS NUMA_INCLUDE_DIRS NUMA_LIBRARIES) + +add_library(libnuma IMPORTED SHARED) +set_target_properties(libnuma PROPERTIES + IMPORTED_LOCATION ${NUMA_LIBRARIES} + INTERFACE_INCLUDE_DIRECTORIES ${NUMA_INCLUDE_DIRS}) diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindOPAE.cmake b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindOPAE.cmake new file mode 100755 index 0000000..6395d7c --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/cmake/modules/FindOPAE.cmake @@ -0,0 +1,44 @@ +# - Try to find libintelfpga +# Once done, this will define +# +# libopae-c_FOUND - system has libopae-c +# libopae-c_INCLUDE_DIRS - the libopae-c include directories +# libopae-c_LIBRARIES - link these to use libopae-c + +find_package(PkgConfig) +pkg_check_modules(PC_OPAE QUIET opae-c) + +# Use pkg-config to get hints about paths +execute_process(COMMAND pkg-config --cflags opae-c --silence-errors + COMMAND cut -d I -f 2 + OUTPUT_VARIABLE OPAE-C_PKG_CONFIG_INCLUDE_DIRS) +set(OPAE-C_PKG_CONFIG_INCLUDE_DIRS "${OPAE-C_PKG_CONFIG_INCLUDE_DIRS}" CACHE STRING "Compiler flags for OPAE-C library") + +# Include dir +find_path(libopae-c_INCLUDE_DIRS + NAMES opae/fpga.h + PATHS ${LIBOPAE-C_ROOT}/include + ${OPAE-C_PKG_CONFIG_INCLUDE_DIRS} + /usr/local/include + /usr/include + ${CMAKE_EXTRA_INCLUDES}) + +# The library itself +find_library(libopae-c_LIBRARIES + NAMES opae-c + PATHS ${LIBOPAE-C_ROOT}/lib + ${LIBOPAE-C_ROOT}/lib64 + /usr/local/lib + /usr/lib + /lib + /usr/lib/x86_64-linux-gnu + ${CMAKE_EXTRA_LIBS}) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS(OPAE + REQUIRED_VARS libopae-c_LIBRARIES libopae-c_INCLUDE_DIRS) + +add_library(libopae-c IMPORTED SHARED) +set_target_properties(libopae-c PROPERTIES + IMPORTED_LOCATION ${libopae-c_LIBRARIES} + INTERFACE_INCLUDE_DIRECTORIES ${libopae-c_INCLUDE_DIRS}) + diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.cpp new file mode 100644 index 0000000..97882d4 --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.cpp @@ -0,0 +1,257 @@ +// (c) 1992-2024 Intel Corporation. +// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words +// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. +// and/or other countries. Other marks and brands may be claimed as the property +// of others. See Trademarks on intel.com for full list of Intel trademarks or +// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) +// Your use of Intel Corporation's design tools, logic functions and other +// software and tools, and its AMPP partner logic functions, and any output +// files any of the foregoing (including device programming or simulation +// files), and any associated documentation or information are expressly subject +// to the terms and conditions of the Altera Program License Subscription +// Agreement, Intel MegaCore Function License Agreement, or other applicable +// license agreement, including, without limitation, that your use is for the +// sole purpose of programming logic devices manufactured by Intel and sold by +// Intel or its authorized distributors. Please refer to the applicable +// agreement for further details. + +#include "kernel_interrupt.h" + +#include <poll.h> +#include <sys/eventfd.h> + +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iostream> +#include <thread> + +#include "mmd_device.h" + +using namespace intel_opae_mmd; + +static const int mmd_kernel_interrupt_line_num = 1; +static const uint32_t enable_int_mask = 0x00000001; +static const uint32_t disable_int_mask = 0x00000000; + +bool KernelInterrupt::enable_thread = false; + +static const int debug_log_level = 0; + +// TODO: use consistent function throughout MMD for controlling debug +// messages. This debug_print function is from OFS. +static void debug_print(std::string &err_msg, int msglog) { + if (debug_log_level >= msglog) { + std::cerr << "KernelInterrupt: " << err_msg << std::endl; + } +} + +static inline void check_result(fpga_result res, const char *err_str) { + if (res == FPGA_OK) { + return; + } + std::string opae_err_str = + std::string("KernelInterrupt: ") + std::string(err_str) + std::string(": ") + std::string(fpgaErrStr(res)); +} + +/** KernelInterrupt constructor + */ +KernelInterrupt::KernelInterrupt(fpga_handle fpga_handle_arg, int mmd_handle) + : m_work_thread_active(false), + m_eventfd(0), + m_kernel_interrupt_fn(nullptr), + m_kernel_interrupt_user_data(nullptr), + m_fpga_handle(fpga_handle_arg), + m_mmd_handle(mmd_handle), + m_event_handle(nullptr) { + if (std::getenv("MMD_ENABLE_DEBUG")) { + MMD_DEBUG("DEBUG LOG : KernelInterrupt Constructor\n"); + } + set_member_for_interrupts(); + enable_interrupts(); +} + +/** KernelInterrupt destructor + * calls disable_interrupts() + */ +KernelInterrupt::~KernelInterrupt() { + if (std::getenv("MMD_ENABLE_DEBUG")) { + MMD_DEBUG("DEBUG LOG : KernelInterrupt Destructor\n"); + } + try { + disable_interrupts(); + } catch (...) { + std::string err("destructor error"); + debug_print(err, 0); + } +} + +/** disable_interrupts() function is used in KernelInterrupt destructor + * if interupt not enabled , !enable_thread + * then disable interrupt mask + * else if interrupts are used, + * call noftify_work_thread(), join the thread + * we call OPAE API fpgaUnregisterEvent() to unregister FPGA event, + * it tells driver caller is no longer interested in notification for event associated with m_event_handle + * we call OPAE API fpgaDestroyEventHandle() to free resources + */ +void KernelInterrupt::disable_interrupts() { + if (!enable_thread) { + if (std::getenv("MMD_ENABLE_DEBUG")) { + MMD_DEBUG("DEBUG LOG : KernelInterrupt disabling interrupts\n"); + } + assert(m_work_thread_active == false); + return; + } + + m_work_thread_active = false; + notify_work_thread(); + m_work_thread->join(); + + if (m_event_handle != nullptr) { + fpga_result res; + + res = fpgaUnregisterEvent(m_fpga_handle, FPGA_EVENT_INTERRUPT, m_event_handle); + check_result(res, "error fpgaUnregisterEvent"); + + res = fpgaDestroyEventHandle(&m_event_handle); + check_result(res, "error fpgaDestroyEventHandle"); + } + if (std::getenv("MMD_ENABLE_DEBUG")) { + MMD_DEBUG("DEBUG LOG : KernelInterrupt disabling interrupts\n"); + } +} + +/** notify_work_thread() function is called by disable_interrupts() function + * eventfd object created by OPAE API fpgaGetOSObjectFromEventHandle() , m_eventfd, + * can be used as an event wait/notify mechanism by user space applications and by kernel, + * to notify user space applications of events + * every time write() is performed on eventfd, + * the value of uint64_t being written is added to count and wakeup is performed. + * We dont use read() below but read() will return count value to user space and reset count to 0 + */ +void KernelInterrupt::notify_work_thread() { + uint64_t val = 1; + ssize_t res = write(m_eventfd, &val, sizeof(val)); + if (res < 0) { + std::cerr << "Warning: KernelInterrupts::notify_work_thread()" + " write to eventfd failed: " + << strerror(errno) << std::endl; + } +} + +/** enable_interrupts() function is called by Kernel Interrupt constructor + * if interrupt is not enabled it will disable interrupt mask , set thread active as false and return + * if interrupt is enabled, it will use OPAE APIs to create event handle fpgaCreateEventHandle() + * OPAE event APIs provide functions for handling asynchronous events such as errors and interrupts + * Associated with every event a process has registered for is an fpga_event_handle, + * which encapsulates OS specific data structure for event objects + * On Linux fpga_event_handle can be used as file descriptor + * and passed to select(), poll() and similar functions to wait for asynchronous events + * OPAE API fpgaRegisterEvent() is used to tell driver that caller is interested in notification for event specified + * OPAE API fpgaGetOSObjectFromEventHandle() checks validity of event handle and + * gets OS object used to subscribe and unsubscribe to events + * we create a thread and call work_thread() + */ +void KernelInterrupt::enable_interrupts() { + if (!enable_thread) { + if (std::getenv("MMD_ENABLE_DEBUG")) { + MMD_DEBUG("DEBUG LOG : KernelInterrupt enabling interrupts\n"); + } + m_work_thread_active = false; + return; + } + + fpga_result res; + + res = fpgaCreateEventHandle(&m_event_handle); + check_result(res, "error creating event handle"); + + res = fpgaRegisterEvent(m_fpga_handle, FPGA_EVENT_INTERRUPT, m_event_handle, mmd_kernel_interrupt_line_num); + check_result(res, "error registering event"); + + res = fpgaGetOSObjectFromEventHandle(m_event_handle, &m_eventfd); + check_result(res, "error getting event file handle"); + + m_work_thread_active = true; + m_work_thread = std::unique_ptr<std::thread>(new std::thread([this] { this->work_thread(); })); + if (std::getenv("MMD_ENABLE_DEBUG")) { + MMD_DEBUG("DEBUG LOG : KernelInterrupt enabling interrupts\n"); + } +} + +/** work_thread() is called from enable_interrupts() function while creating new thread + * it calls wait_for_event(), disables interrupt mask + * creates lock_guard with m_mutex, calls kernel interrupt function and then enables interrupt mask + */ +void KernelInterrupt::work_thread() { + while (m_work_thread_active) { + wait_for_event(); + std::lock_guard<std::mutex> lock(m_mutex); + if (m_kernel_interrupt_fn != nullptr) { + m_kernel_interrupt_fn(m_mmd_handle, m_kernel_interrupt_user_data); + } + } +} + +/** wait_for_event() is called from work_thread() function + * it uses poll() function to wait for event on a file descriptor, + * the m_event_fd file descriptor which we got from fpgaOSObjectFromEventHandle() + * poll() uses pollfd struct, which inncludes + * fd - file descriptor, events - requested events, revents - returned events + * timeout argument in poll() specifies number of milliseconds, + * poll() will block waiting for file descriptor + * On success, poll() returns a nonnegative value which is the + * number of elements in the pollfds whose revents fields have been + * set to a nonzero value (indicating an event or an error). A + * return value of zero indicates that the system call timed out + * before any file descriptors became read + */ +void KernelInterrupt::wait_for_event() { + // Use timeout when polling eventfd because sometimes interrupts are missed. + // This may be caused by knonw race condition with runtime, or there may + // be occasional events lost from OPAE. + + MMD_DEBUG("DEBUG LOG : KernelInterrupt waiting for event using poll()\n"); + const int timeout_ms = 250; + struct pollfd pfd = {.fd = m_eventfd, .events = POLLIN, .revents = 0}; + int num_events = poll(&pfd, 1, timeout_ms); + if (num_events <= 0) { + std::string err(num_events < 0 ? strerror(errno) : "timed out"); + std::string err_str("poll(): "); + debug_print(err_str.append(err), 1); + } else if (pfd.revents != POLLIN) { + std::string err("poll error num: ", pfd.revents); + debug_print(err, 0); + } else { + uint64_t val = 0; + ssize_t bytes_read = read(pfd.fd, &val, sizeof(val)); + if (bytes_read < 0) { + std::string err(strerror(errno)); + std::string err_str("read: "); + debug_print(err_str.append(err), 1); + } + } +} + +void KernelInterrupt::set_kernel_interrupt(aocl_mmd_interrupt_handler_fn fn, void *user_data) { + MMD_DEBUG("DEBUG LOG : KernelInterrupt setting kernel interrupt\n"); + std::lock_guard<std::mutex> lock(m_mutex); + m_kernel_interrupt_fn = fn; + m_kernel_interrupt_user_data = user_data; +} + +/** Configure interrupts + * set_member_for_interrupts() called from KernelInterrupts constructor + */ +void KernelInterrupt::set_member_for_interrupts() { + static bool initialized = false; + if (initialized) { + return; + } + // Use interrupts + MMD_DEBUG("DEBUG LOG : Using interrupts\n"); + + enable_thread = true; + initialized = true; +} diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.h new file mode 100644 index 0000000..9ea6e68 --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/kernel_interrupt.h @@ -0,0 +1,68 @@ +// (c) 1992-2024 Intel Corporation. +// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words +// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. +// and/or other countries. Other marks and brands may be claimed as the property +// of others. See Trademarks on intel.com for full list of Intel trademarks or +// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) +// Your use of Intel Corporation's design tools, logic functions and other +// software and tools, and its AMPP partner logic functions, and any output +// files any of the foregoing (including device programming or simulation +// files), and any associated documentation or information are expressly subject +// to the terms and conditions of the Altera Program License Subscription +// Agreement, Intel MegaCore Function License Agreement, or other applicable +// license agreement, including, without limitation, that your use is for the +// sole purpose of programming logic devices manufactured by Intel and sold by +// Intel or its authorized distributors. Please refer to the applicable +// agreement for further details. + +#ifndef KERNEL_INTERRUPT_H_ +#define KERNEL_INTERRUPT_H_ + +#include <opae/fpga.h> + +#include <atomic> +#include <chrono> +#include <mutex> +#include <thread> + +#include "aocl_mmd.h" + +namespace intel_opae_mmd { + +class KernelInterrupt final { + public: + KernelInterrupt(fpga_handle fpga_handle_arg, int mmd_handle); + ~KernelInterrupt(); + + void enable_interrupts(); + void disable_interrupts(); + void set_kernel_interrupt(aocl_mmd_interrupt_handler_fn fn, void *user_data); + + KernelInterrupt(const KernelInterrupt &) = delete; + KernelInterrupt &operator=(const KernelInterrupt &) = delete; + KernelInterrupt(KernelInterrupt &&) = delete; + KernelInterrupt &operator=(KernelInterrupt &&) = delete; + + private: + static void set_member_for_interrupts(); + + void notify_work_thread(); + void wait_for_event(); + void work_thread(); + + static bool enable_thread; + + std::mutex m_mutex; + std::unique_ptr<std::thread> m_work_thread; + std::atomic<bool> m_work_thread_active; + int m_eventfd; + aocl_mmd_interrupt_handler_fn m_kernel_interrupt_fn; + void *m_kernel_interrupt_user_data; + fpga_handle m_fpga_handle; + int m_mmd_handle; + fpga_event_handle m_event_handle; +}; + +}; // namespace intel_opae_mmd + +#endif // KERNEL_INTERRUPT_H_ diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd.cpp new file mode 100644 index 0000000..58cd8e0 --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd.cpp @@ -0,0 +1,830 @@ +// (c) 1992-2024 Intel Corporation. +// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words +// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. +// and/or other countries. Other marks and brands may be claimed as the property +// of others. See Trademarks on intel.com for full list of Intel trademarks or +// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) +// Your use of Intel Corporation's design tools, logic functions and other +// software and tools, and its AMPP partner logic functions, and any output +// files any of the foregoing (including device programming or simulation +// files), and any associated documentation or information are expressly subject +// to the terms and conditions of the Altera Program License Subscription +// Agreement, Intel MegaCore Function License Agreement, or other applicable +// license agreement, including, without limitation, that your use is for the +// sole purpose of programming logic devices manufactured by Intel and sold by +// Intel or its authorized distributors. Please refer to the applicable +// agreement for further details. + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <zlib.h> + +#include <linux/mman.h> +#include <sys/mman.h> + +// On some systems MAP_HUGE_2MB is not defined. It should be defined for all +// platforms that DCP supports, but we also want ability to compile MMD on +// CentOS 6 systems. +#ifndef MAP_HUGE_SHIFT +#define MAP_HUGE_SHIFT 26 +#endif + +#ifndef MAP_HUGE_2MB +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#endif + +#ifndef MAP_HUGE_1GB +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#endif + +#include <algorithm> +#include <cassert> +#include <cstdio> +#include <iomanip> +#include <iostream> +#include <map> +#include <sstream> +#include <unordered_map> +#include <vector> +#ifdef DLA_MMD +#include <chrono> +#include <thread> +#endif + +#include "aocl_mmd.h" +#include "mmd_device.h" + +bool diagnose = 0; + +/** If the MMD is loaded dynamically, destructors in the MMD will execute before + * the destructors in the runtime upon program termination. The DeviceMapManager + * guards accesses to the device/handle maps to make sure the runtime doesn't + * get to reference them after MMD destructors have been called. Destructor + * makes sure that all devices are closed at program termination regardless of + * what the runtime does. Implemented as a singleton. + */ +class DeviceMapManager final { + public: + /** C++ std map data structure to keep track of + * object id -> handle and handle -> device + */ + typedef std::map<int, Device *> t_handle_to_dev_map; + typedef std::map<uint64_t, int> t_id_to_handle_map; + + static const int SUCCESS = 0; + static const int FAILURE = -1; + + /** Returns handle and device pointer to the device with the specified name + * Creates a new entry for this device if it doesn't already exist + * Return 0 on success, -1 on failure + */ + int get_or_create_device(const char *board_name, int *handle, Device **device); + + /** Return obj id based on ASP name.*/ + uint64_t id_from_name(const char *board_name); + + /** Return MMD handle based on obj id. Returned value is negative if board + * doesn't exist + */ + inline int handle_from_id(uint64_t obj_id); + + /** Return pointer to device based on MMD handle. Returned value is null + * if board doesn't exist + */ + Device *device_from_handle(int handle); + + /** Closes specified device if it exists */ + void close_device_if_exists(int handle); + + /* Returns a reference to the class singleton */ + static DeviceMapManager &get_instance() { + static DeviceMapManager instance; + return instance; + } + + DeviceMapManager(DeviceMapManager const &) = delete; + void operator=(DeviceMapManager const &) = delete; + ~DeviceMapManager() { + // delete all allocated Device* entries + while (handle_to_dev_map->size() > 0) { + int handle = handle_to_dev_map->begin()->first; + aocl_mmd_close(handle); +#ifdef SIM + std::cout << "# mmd.cpp: When destroying DeviceMapManager in ASE, assume it worked.\n"; + break; +#endif + MMD_DEBUG("DEBUG LOG : In DeviceMapManager destructor, closing device with handle %d \n", handle); + } + delete handle_to_dev_map; + delete id_to_handle_map; + handle_to_dev_map = nullptr; + id_to_handle_map = nullptr; + } + + private: + DeviceMapManager() { + handle_to_dev_map = new t_handle_to_dev_map(); + id_to_handle_map = new t_id_to_handle_map(); + + MMD_DEBUG("DEBUG LOG : Constructing DeviceMapManager object\n"); + } + t_handle_to_dev_map *handle_to_dev_map = nullptr; + t_id_to_handle_map *id_to_handle_map = nullptr; +}; +static DeviceMapManager &device_manager = DeviceMapManager::get_instance(); + +/** Returns handle and device pointer to the device with the specified name + * Creates a new entry for this device if it doesn't already exist + * Return 0 on success, -1 on failure + */ +int DeviceMapManager::get_or_create_device(const char *board_name, int *handle, Device **device) { + int _handle = MMD_INVALID_PARAM; + Device *_device = nullptr; + + if (id_to_handle_map == nullptr || handle_to_dev_map == nullptr) { + MMD_DEBUG( + "DEBUG LOG : Failure in DeviceMapManager::get_or_create_device,id_to_handle_map or handle_to_dev_map is " + "NULL\n"); + return DeviceMapManager::FAILURE; + } + + uint64_t obj_id = id_from_name(board_name); + if (!obj_id) { + MMD_DEBUG("DEBUG LOG : Failure in DeviceMapManager::get_or_create_device. obj_id : %ld \n", obj_id); + return false; + } + if (id_to_handle_map->count(obj_id) == 0) { + try { + _device = new Device(obj_id); + _handle = _device->get_mmd_handle(); + id_to_handle_map->insert({obj_id, _handle}); + handle_to_dev_map->insert({_handle, _device}); + } catch (std::runtime_error &e) { + MMD_DEBUG("DEBUG LOG : Failure in DeviceMapManager::get_or_create_device %s\n", e.what()); + delete _device; + return DeviceMapManager::FAILURE; + } + MMD_DEBUG("DEBUG LOG : Success in creating new device object handle : %d \n", _handle); + } else { + _handle = id_to_handle_map->at(obj_id); + _device = handle_to_dev_map->at(_handle); + MMD_DEBUG("DEBUG LOG : Success in retrieving device metadata(handle , object) , handle : %d\n", _handle); + } + + (*handle) = _handle; + (*device) = _device; + + MMD_DEBUG("DEBUG LOG : Success in creating new device object , handle : %d\n", _handle); + return DeviceMapManager::SUCCESS; +} + +/** Return obj id based on ASP name.*/ +uint64_t DeviceMapManager::id_from_name(const char *board_name) { + uint64_t obj_id = 0; + if (Device::parse_board_name(board_name, obj_id)) { + MMD_DEBUG("DEBUG LOG : Success in retrieving object id from board name\n"); + return obj_id; + } else { + MMD_DEBUG("DEBUG LOG : Failed to retrieve object id from board name\n"); + return 0; + } +} + +/** Return MMD handle based on obj id. Returned value is negative if board + * doesn't exist + */ +inline int DeviceMapManager::handle_from_id(uint64_t obj_id) { + int handle = MMD_INVALID_PARAM; + if (id_to_handle_map) { + auto it = id_to_handle_map->find(obj_id); + if (it != id_to_handle_map->end()) { + handle = it->second; + } + MMD_DEBUG("DEBUG LOG : Success in retrieving handle from object id. handle : %d \n", handle); + } else { + MMD_DEBUG("DEBUG LOG : Failed to retrieve handle from object id \n"); + } + return handle; +} + +/** Return pointer to device based on MMD handle. Returned value is null + * if board doesn't exist + */ +Device *DeviceMapManager::device_from_handle(int handle) { + Device *dev = nullptr; + if (handle_to_dev_map) { + auto it = handle_to_dev_map->find(handle); + if (it != handle_to_dev_map->end()) { + return it->second; + } + MMD_DEBUG("DEBUG LOG : Success in retrieving device from handle. handle : %d \n", handle); + } else { + MMD_DEBUG("DEBUG LOG : Failed to retrieve device from handle\n"); + } + return dev; +} + +/** Closes specified device if it exists */ +void DeviceMapManager::close_device_if_exists(int handle) { + if (handle_to_dev_map) { + if (handle_to_dev_map->count(handle) > 0) { + Device *dev = handle_to_dev_map->at(handle); + uint64_t obj_id = dev->get_fpga_obj_id(); + delete dev; + + handle_to_dev_map->erase(handle); + id_to_handle_map->erase(obj_id); + MMD_DEBUG("DEBUG LOG : Closing device with handle : %d\n", handle); + } else { + MMD_DEBUG("DEBUG LOG : Nothing to close. Device with handle : %d already closed\n", handle); + } + } else { + MMD_DEBUG("DEBUG LOG : Error, no handle to device map entry found for handle : %d \n", handle); + } +} + +/** Interface for checking if AFU has ASP loaded */ +bool mmd_asp_loaded(const char *name) { + uint64_t obj_id = device_manager.id_from_name(name); + if (!obj_id) { + MMD_DEBUG("DEBUG LOG : Error, no object id found for board : %s \n", name); + return false; + } + + int handle = device_manager.handle_from_id(obj_id); + if (handle > 0) { + Device *dev = device_manager.device_from_handle(handle); + if (dev) { + MMD_DEBUG("DEBUG LOG : ASP loaded for handle : %d \n", handle); + return dev->asp_loaded(); + } else { + MMD_DEBUG("DEBUG LOG : ASP not loaded for handle : %d \n", handle); + return false; + } + } else { + bool asp_loaded = false; + try { + Device dev(obj_id); + asp_loaded = dev.asp_loaded(); + } catch (std::runtime_error &e) { + MMD_DEBUG("DEBUG LOG : ASP not loaded for handle : %d , %s\n", handle, e.what()); + return false; + } + + MMD_DEBUG("DEBUG LOG : ASP loaded : %d (0 - not loaded , 1 - loaded) for handle : %d \n", asp_loaded, handle); + return asp_loaded; + } +} + +/** Function called as part of aocl_mmd_get_offline_info() + * to determine number of baords in system + */ +static unsigned int get_offline_num_acl_boards(const char *asp_uuid) { + bool asp_only = true; + fpga_guid guid; + fpga_result res = FPGA_OK; + uint32_t num_matches = 0; + bool ret_err = false; + fpga_properties filter = NULL; + + if (uuid_parse(asp_uuid, guid) < 0) { + MMD_DEBUG("Error parsing guid '%s'\n", asp_uuid); + ret_err = true; + goto out; + } + + res = fpgaGetProperties(NULL, &filter); + if (res != FPGA_OK) { + MMD_DEBUG("Error creating properties object: %s\n", fpgaErrStr(res)); + ret_err = true; + goto out; + } + + if (asp_only) { + res = fpgaPropertiesSetGUID(filter, guid); + if (res != FPGA_OK) { + MMD_DEBUG("Error setting GUID: %s\n", fpgaErrStr(res)); + ret_err = true; + goto out; + } + } + + res = fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR); + if (res != FPGA_OK) { + MMD_DEBUG("Error setting object type: %s\n", fpgaErrStr(res)); + ret_err = true; + goto out; + } + + res = fpgaEnumerate(&filter, 1, NULL, 0, &num_matches); + if (res != FPGA_OK) { + MMD_DEBUG("Error enumerating AFCs: %s\n", fpgaErrStr(res)); + ret_err = true; + goto out; + } + +out: + if (filter) fpgaDestroyProperties(&filter); + + if (ret_err) { + return MMD_AOCL_ERR; + } else { + return num_matches; + } +} + +/** Function called as part of aocl_mmd_get_offline_info() + * to determine names of boards in the system + */ +static bool get_offline_board_names(std::string &boards, bool asp_only = true) { + boards = "dla_agx7_ofs_board"; + return true; +} + +// Macros used for acol_mmd_get_offline_info and aocl_mmd_get_info +#define RESULT_INT(X) \ + { \ + *((int *)param_value) = X; \ + if (param_size_ret) *param_size_ret = sizeof(int); \ + } +#define RESULT_SIZE_T(X) \ + { \ + *((size_t *)param_value) = X; \ + if (param_size_ret) *param_size_ret = sizeof(size_t); \ + } + +#define RESULT_STR(X) \ + do { \ + unsigned Xlen = strnlen(X, 4096) + 1; \ + unsigned Xcpylen = (param_value_size <= Xlen) ? param_value_size : Xlen; \ + memcpy((void *)param_value, X, Xcpylen); \ + if (param_size_ret) *param_size_ret = Xcpylen; \ + } while (0) + +/** Get information about the board using the enum aocl_mmd_offline_info_t for + * offline info (called without a handle), and the enum aocl_mmd_info_t for + * info specific to a certain board. + * Arguments: + * + * requested_info_id - a value from the aocl_mmd_offline_info_t enum + * + * param_value_size - size of the param_value field in bytes. This should + * match the size of the return type expected as indicated in the enum + * definition. + * + * param_value - pointer to the variable that will receive the returned info + * + * param_size_ret - receives the number of bytes of data actually returned + * + * Returns: a negative value to indicate error. + */ + +// From DLA perspective, only AOCL_MMD_BOARD_NAMES info we care +int aocl_mmd_get_offline_info(aocl_mmd_offline_info_t requested_info_id, + size_t param_value_size, + void *param_value, + size_t *param_size_ret) { + /** aocl_mmd_get_offline_info can be called many times by the runtime + * and it is expensive to query the system. Only compute values first + * time aocl_mmd_get_offline_info called future iterations use saved results + */ + static bool initialized = false; + static int mem_type_info; + static unsigned int num_acl_boards; + static std::string boards; + static bool success; + + if (!initialized) { + mem_type_info = (int)AOCL_MMD_PHYSICAL_MEMORY; + num_acl_boards = get_offline_num_acl_boards(I_DK_AFU_ID); + success = get_offline_board_names(boards, true); + initialized = true; + } + + switch (requested_info_id) { + case AOCL_MMD_VERSION: + RESULT_STR(AOCL_MMD_VERSION_STRING); + break; + case AOCL_MMD_NUM_BOARDS: { + RESULT_INT(num_acl_boards); + break; + } + case AOCL_MMD_VENDOR_NAME: + RESULT_STR("Intel Corp"); + break; + case AOCL_MMD_BOARD_NAMES: { + if (success) { + RESULT_STR(boards.c_str()); + } else { + return MMD_AOCL_ERR; + } + break; + } + case AOCL_MMD_VENDOR_ID: + RESULT_INT(0); + break; + case AOCL_MMD_USES_YIELD: + RESULT_INT(0); + break; + case AOCL_MMD_MEM_TYPES_SUPPORTED: + RESULT_INT(mem_type_info); + break; + } + + return 0; +} + +/** Get information about the board using the enum aocl_mmd_info_t for + * info specific to a certain board. + * Arguments: + * + * requested_info_id - a value from the aocl_mmd_info_t enum + * + * param_value_size - size of the param_value field in bytes. This should + * match the size of the return type expected as indicated in the enum + * definition. For example, the AOCL_MMD_TEMPERATURE returns a float, so + * the param_value_size should be set to sizeof(float) and you should + * expect the same number of bytes returned in param_size_ret. + * + * param_value - pointer to the variable that will receive the returned info + * + * param_size_ret - receives the number of bytes of data actually returned + * + * Returns: a negative value to indicate error. + */ +int aocl_mmd_get_info( + int handle, aocl_mmd_info_t requested_info_id, size_t param_value_size, void *param_value, size_t *param_size_ret) { + MMD_DEBUG("DEBUG LOG : called aocl_mmd_get_info\n"); + Device *dev = device_manager.device_from_handle(handle); + if (dev == NULL) return 0; + + assert(param_value); + switch (requested_info_id) { + case AOCL_MMD_BOARD_NAME: { + std::ostringstream board_name; + board_name << "Intel OFS Platform" + << " (" << dev->get_dev_name() << ")"; + RESULT_STR(board_name.str().c_str()); + break; + } + case AOCL_MMD_NUM_KERNEL_INTERFACES: + RESULT_INT(1); + break; + case AOCL_MMD_KERNEL_INTERFACES: + RESULT_INT(AOCL_MMD_KERNEL); + break; +#ifdef SIM + case AOCL_MMD_PLL_INTERFACES: + RESULT_INT(-1); + break; +#else + case AOCL_MMD_PLL_INTERFACES: + RESULT_INT(-1); + break; +#endif + case AOCL_MMD_MEMORY_INTERFACE: + RESULT_INT(AOCL_MMD_MEMORY); + break; + case AOCL_MMD_PCIE_INFO: { + RESULT_STR(dev->get_bdf().c_str()); + break; + } + case AOCL_MMD_BOARD_UNIQUE_ID: + RESULT_INT(0); + break; + case AOCL_MMD_TEMPERATURE: { + if (param_value_size == sizeof(float)) { + float *ptr = static_cast<float *>(param_value); + *ptr = dev->get_temperature(); + if (param_size_ret) *param_size_ret = sizeof(float); + } + break; + } + case AOCL_MMD_CONCURRENT_READS: + RESULT_INT(1); + break; + case AOCL_MMD_CONCURRENT_WRITES: + RESULT_INT(1); + break; + case AOCL_MMD_CONCURRENT_READS_OR_WRITES: + RESULT_INT(2); + break; + + case AOCL_MMD_MIN_HOST_MEMORY_ALIGNMENT: + RESULT_SIZE_T(64); + break; + + case AOCL_MMD_HOST_MEM_CAPABILITIES: { + RESULT_INT(0); + break; + } + case AOCL_MMD_SHARED_MEM_CAPABILITIES: { + RESULT_INT(0); + break; + } + + case AOCL_MMD_DEVICE_MEM_CAPABILITIES: + RESULT_INT(0); + break; + case AOCL_MMD_HOST_MEM_CONCURRENT_GRANULARITY: + RESULT_SIZE_T(0); + break; + case AOCL_MMD_SHARED_MEM_CONCURRENT_GRANULARITY: + RESULT_SIZE_T(0); + break; + case AOCL_MMD_DEVICE_MEM_CONCURRENT_GRANULARITY: + RESULT_SIZE_T(0); + break; + } + return 0; +} + +#undef RESULT_INT +#undef RESULT_STR + +/** Set the interrupt handler for the opened device. + * The interrupt handler is called whenever the client needs to be notified + * of an asynchronous event signaled by the device internals. + * For example, the kernel has completed or is stalled. + * + * Important: Interrupts from the kernel must be ignored until this handler is + * set + * + * Arguments: + * fn - the callback function to invoke when a kernel interrupt occurs + * user_data - the data that should be passed to fn when it is called. + * + * Returns: 0 if successful, negative on error + */ +int AOCL_MMD_CALL aocl_mmd_set_interrupt_handler(int handle, aocl_mmd_interrupt_handler_fn fn, void *user_data) { + Device *dev = device_manager.device_from_handle(handle); + if (dev) { + dev->set_kernel_interrupt(fn, user_data); + MMD_DEBUG("DEBUG LOG : Set kernel interrupt handler for device handle : %d\n", handle); + } else { + MMD_DEBUG("DEBUG LOG : Error setting kernel interrupt handler for device handle : %d\n", handle); + return MMD_AOCL_ERR; + } + return 0; +} + +/** Set the operation status handler for the opened device. + * The operation status handler is called with + * status 0 when the operation has completed successfully. + * status negative when the operation completed with errors. + * + * Arguments: + * fn - the callback function to invoke when a status update is to be + * performed. + * user_data - the data that should be passed to fn when it is called. + * + * Returns: 0 if successful, negative on error + */ + +int AOCL_MMD_CALL aocl_mmd_set_status_handler(int handle, aocl_mmd_status_handler_fn fn, void *user_data) { + Device *dev = device_manager.device_from_handle(handle); + if (dev) { + dev->set_status_handler(fn, user_data); + MMD_DEBUG("DEBUG LOG : Set status handler for device handle : %d\n", handle); + } + return 0; +} + +/** Host to device-global-memory write (HOST DDR -> FPGA DDR) + * If op is NULL + * - Then these calls must block until the operation is complete. + * - The status handler is not called for this operation. + * + * If op is non-NULL, then: + * - These may be non-blocking calls + * - The status handler must be called upon completion, with status 0 + * for success, and a negative value for failure. + * + * Arguments: + * op - the operation object used to track this operations progress + * + * len - the size in bytes to transfer + * + * src - the host buffer being read from + * + * dst - the host buffer being written to + * + * mmd_interface - the handle to the interface being accessed. E.g. To + * access global memory this handle will be whatever is returned by + * aocl_mmd_get_info when called with AOCL_MMD_MEMORY_INTERFACE. + * + * offset/src_offset/dst_offset - the byte offset within the interface that + * the transfer will begin at. + * + * The return value is 0 if the operation launch was successful, and + * negative otherwise. + */ +int AOCL_MMD_CALL +aocl_mmd_write(int handle, aocl_mmd_op_t op, size_t len, const void *src, int mmd_interface, size_t offset) { + MMD_DEBUG( + "DEBUG LOG : aocl_mmd_write: handle : %d\t operation : %p\t len : 0x%zx\t src : %p\t mmd_interface : %d\t offset " + ": 0x%zx\n", + handle, + op, + len, + src, + mmd_interface, + offset); + Device *dev = device_manager.device_from_handle(handle); + if (dev){ + return dev->write_block(op, mmd_interface, src, offset, len); + } + else { + MMD_DEBUG("DEBUG LOG : Error in aocl_mmd_write , device not found for handle : %d\n", handle); + return -1; + } +} + +/** Host reading from device-global-memory (FPGA DDR -> HOST DDR) + * If op is NULL + * - Then these calls must block until the operation is complete. + * - The status handler is not called for this operation. + * + * If op is non-NULL, then: + * - These may be non-blocking calls + * - The status handler must be called upon completion, with status 0 + * for success, and a negative value for failure. + * + * Arguments: + * op - the operation object used to track this operations progress + * + * len - the size in bytes to transfer + * + * src - the host buffer being read from + * + * dst - the host buffer being written to + * + * mmd_interface - the handle to the interface being accessed. E.g. To + * access global memory this handle will be whatever is returned by + * aocl_mmd_get_info when called with AOCL_MMD_MEMORY_INTERFACE. + * + * offset/src_offset/dst_offset - the byte offset within the interface that + * the transfer will begin at. + * + * The return value is 0 if the operation launch was successful, and + * negative otherwise. + */ + +int AOCL_MMD_CALL aocl_mmd_read(int handle, aocl_mmd_op_t op, size_t len, void *dst, int mmd_interface, size_t offset) { + MMD_DEBUG( + "DEBUG LOG : aocl_mmd_read: handle : %d\t operation : %p\t len : 0x%zx\t dst : %p\t mmd_interface : %d\t offset " + ": 0x%zx\n", + handle, + op, + len, + dst, + mmd_interface, + offset); + Device *dev = device_manager.device_from_handle(handle); + if (dev){ + return dev->read_block(op, mmd_interface, dst, offset, len); + } + else { + MMD_DEBUG("DEBUG LOG : Error in aocl_mmd_read , device not found for handle : %d\n", handle); + return -1; + } +} + +/** Open and initialize the named device. + * + * The name is typically one specified by the AOCL_MMD_BOARD_NAMES offline + * info. + * + * Arguments: + * name - open the board with this name (provided as a C-style string, + * i.e. NUL terminated ASCII.) + * + * Returns: the non-negative integer handle for the board, otherwise a + * negative value to indicate error. Upon receiving the error, the OpenCL + * runtime will proceed to open other known devices, hence the MMD mustn't + * exit the application if an open call fails. + */ + +int AOCL_MMD_CALL aocl_mmd_open(const char *name) { + + MMD_DEBUG("DEBUG LOG : aocl_mmd_open, Opening device: %s\n", name); + + uint64_t obj_id = device_manager.id_from_name(name); + if (!obj_id) { + MMD_DEBUG("DEBUG LOG : Error while aocl_mmd_open, object id not found for board : %s\n", name); + return MMD_INVALID_PARAM; + } + + int handle; + Device *dev = nullptr; + if (device_manager.get_or_create_device(name, &handle, &dev) != DeviceMapManager::SUCCESS) { + if (std::getenv("MMD_PROGRAM_DEBUG") || std::getenv("MMD_DMA_DEBUG") || std::getenv("MMD_ENABLE_DEBUG")) { + MMD_DEBUG("DEBUG LOG : Error while aocl_mmd_open, device not found for board : %s\n", name); + } + return MMD_AOCL_ERR; + } + + assert(dev); + if (dev->asp_loaded()) { + if (!dev->initialize_asp()) { + MMD_DEBUG("DEBUG LOG : Error while aocl_mmd_open, Error initializing asp for board : %s\n", name); + return MMD_ASP_INIT_FAILED; + } + } else { + MMD_DEBUG("DEBUG LOG : Error while aocl_mmd_open, asp not loaded for board : %s\n", name); + return MMD_ASP_NOT_LOADED; + } + MMD_DEBUG("end of aocl_mmd_open \n"); + MMD_DEBUG("DEBUG LOG : Success aocl_mmd_open for board : %s, handle : %d \n", name, handle); + return handle; +} + +/** Close an opened device, by its handle. + * Returns: 0 on success, negative values on error. + */ +int AOCL_MMD_CALL aocl_mmd_close(int handle) { +#ifndef SIM + device_manager.close_device_if_exists(handle); +#else + std::cout << "# mmd.cpp: During simulation (ASE) we are not closing the device.\n"; +#endif + return 0; +} + +// CoreDLA modifications +// To support multiple different FPGA boards, anything board specific must be implemented in a +// board-specific MMD instead of the CoreDLA runtime layer. +#ifdef DLA_MMD +// Query functions to get board-specific values +AOCL_MMD_CALL int dla_mmd_get_max_num_instances() { return 4; } + +// DLA can only uses 4GB DDR as of 2024.2 +AOCL_MMD_CALL uint64_t dla_mmd_get_ddr_size_per_instance() { return 1ULL << 32; } +AOCL_MMD_CALL double dla_mmd_get_ddr_clock_freq() { + #ifdef USE_N6001_BOARD + return 300.0; // MHz + #else + return 333.333333; // MHz + #endif +} + +// Helper functions for the wrapper functions around CSR and DDR +uint64_t dla_get_raw_csr_address(int instance, uint64_t addr) { return 0x10000 + (0x800 * instance) + addr; } +uint64_t dla_get_raw_ddr_address(int instance, uint64_t addr) { + #ifdef USE_N6001_BOARD + return (1ULL << 32) * instance + addr; + #else + return (1ULL << 33) * instance + addr; + #endif +} + +// Wrappers around CSR and DDR reads and writes to abstract away board-specific offsets +AOCL_MMD_CALL int dla_mmd_csr_write(int handle, int instance, uint64_t addr, const uint32_t *data) { + return aocl_mmd_write( + handle, NULL, sizeof(uint32_t), data, AOCL_MMD_DLA_CSR, dla_get_raw_csr_address(instance, addr)); +} + +AOCL_MMD_CALL int dla_mmd_csr_read(int handle, int instance, uint64_t addr, uint32_t *data) { + return aocl_mmd_read(handle, NULL, sizeof(uint32_t), data, AOCL_MMD_DLA_CSR, dla_get_raw_csr_address(instance, addr)); +} + +AOCL_MMD_CALL int dla_mmd_ddr_write(int handle, int instance, uint64_t addr, uint64_t length, const void *data) { + return aocl_mmd_write(handle, NULL, length, data, AOCL_MMD_MEMORY, dla_get_raw_ddr_address(instance, addr)); +} + +AOCL_MMD_CALL int dla_mmd_ddr_read(int handle, int instance, uint64_t addr, uint64_t length, void *data) { + return aocl_mmd_read(handle, NULL, length, data, AOCL_MMD_MEMORY, dla_get_raw_ddr_address(instance, addr)); +} + +AOCL_MMD_CALL double dla_mmd_get_coredla_clock_freq(int handle) { + constexpr uint64_t hw_timer_address = 0x37000; + const uint32_t start_bit = 1; + const uint32_t stop_bit = 2; + + // Send the start command to the hardware counter + std::chrono::high_resolution_clock::time_point time_before = std::chrono::high_resolution_clock::now(); + int status = aocl_mmd_write(handle, NULL, sizeof(uint32_t), &start_bit, AOCL_MMD_DLA_CSR, hw_timer_address); + assert(status == 0); + + // Unlikely to sleep for exactly 10 milliseconds, but it doesn't matter since we use a high resolution clock to + // determine the amount of time between the start and stop commands for the hardware counter + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + // Send the stop command to the hardware counter + std::chrono::high_resolution_clock::time_point time_after = std::chrono::high_resolution_clock::now(); + status = aocl_mmd_write(handle, NULL, sizeof(uint32_t), &stop_bit, AOCL_MMD_DLA_CSR, hw_timer_address); + assert(status == 0); + + // Read back the value of the counter + uint32_t counter = 0; + status = aocl_mmd_read(handle, NULL, sizeof(uint32_t), &counter, AOCL_MMD_DLA_CSR, hw_timer_address); + assert(status == 0); + + // Calculate the clock frequency of the counter, which is running on clk_dla + double elapsed_seconds = std::chrono::duration_cast<std::chrono::duration<double>>(time_after - time_before).count(); + return 1.0e-6 * counter / elapsed_seconds; // 1.0e-6 is to convert to MHz +} +#endif diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.cpp new file mode 100644 index 0000000..dd4ca42 --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.cpp @@ -0,0 +1,448 @@ +// (c) 1992-2024 Intel Corporation. +// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words +// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. +// and/or other countries. Other marks and brands may be claimed as the property +// of others. See Trademarks on intel.com for full list of Intel trademarks or +// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) +// Your use of Intel Corporation's design tools, logic functions and other +// software and tools, and its AMPP partner logic functions, and any output +// files any of the foregoing (including device programming or simulation +// files), and any associated documentation or information are expressly subject +// to the terms and conditions of the Altera Program License Subscription +// Agreement, Intel MegaCore Function License Agreement, or other applicable +// license agreement, including, without limitation, that your use is for the +// sole purpose of programming logic devices manufactured by Intel and sold by +// Intel or its authorized distributors. Please refer to the applicable +// agreement for further details. + +#include <assert.h> +#include <numa.h> + +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <fstream> +#include <iomanip> +#include <iostream> +#include <limits> +#include <sstream> + +#include "mmd_device.h" +#include "mmd_helper.h" + +int Device::next_mmd_handle{1}; + +/** + * The Device object is created for each device/board opened and + * it has methods to interact with fpga device. + * The entry point for Device is in DeviceMapManager Class + * which maintains mapping between device names and handles. + * Device Object is foundation for interacting with device. + */ +Device::Device(uint64_t obj_id) + : fpga_obj_id(obj_id), + kernel_interrupt_thread(NULL), + event_update(NULL), + event_update_user_data(NULL), + enable_set_numa(false), + fme_sysfs_temp_initialized(false), + bus(0), + device(0), + function(0), + afu_initialized(false), + asp_initialized(false), + mmio_is_mapped(false), + filter(NULL), + mmio_token(NULL), + mmio_handle(NULL), + fme_token(NULL), + guid(), + mmd_dma(NULL) { + // Note that this constructor is not thread-safe because next_mmd_handle + // is shared between all class instances + MMD_DEBUG("DEBUG LOG : Constructing Device object\n"); + + mmd_handle = next_mmd_handle; + if (next_mmd_handle == std::numeric_limits<int>::max()) + next_mmd_handle = 1; + else + next_mmd_handle++; + + fpga_properties filter = NULL; + uint32_t num_matches; + fpga_result r; + + // Set up a filter that will search for an accelerator + fpgaGetProperties(NULL, &filter); + fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR); + + // Add the desired UUID to the filter + uuid_parse(I_DK_AFU_ID, guid); + fpgaPropertiesSetGUID(filter, guid); + + // Do the search across the available FPGA contexts + num_matches = 1; + fpgaEnumerate(&filter, 1, &mmio_token, 1, &num_matches); + + fpgaPropertiesGetParent(filter, &fme_token); + + // Not needed anymore so we destroy the filter + fpgaDestroyProperties(&filter); + + if (num_matches < 1) { + throw std::runtime_error(std::string("Cannot find accelerator")); + } + + // Open accelerator + r = fpgaOpen(mmio_token, &mmio_handle, 0); + assert(FPGA_OK == r); + + // While the token is available, check whether it is for HW + // or for ASE simulation. + fpga_properties accel_props; + uint16_t vendor_id, dev_id; + fpgaGetProperties(mmio_token, &accel_props); + fpgaPropertiesGetVendorID(accel_props, &vendor_id); + fpgaPropertiesGetDeviceID(accel_props, &dev_id); + + afu_initialized = true; + MMD_DEBUG("DEBUG LOG : Done constructing Device object\n"); +} + +/** Return true if board name parses correctly, false if it does not + * Return the parsed object_id in obj_id as an [out] parameter + */ +bool Device::parse_board_name(const char *board_name_str, uint64_t &obj_id) { + MMD_DEBUG("DEBUG LOG : Parsing board name\n"); + std::string prefix(ASP_NAME); + std::string board_name(board_name_str); + + obj_id = 0; + if (board_name.length() <= prefix.length() && board_name.compare(0, prefix.length(), prefix)) { + MMD_DEBUG("DEBUG LOG : Error parsing device name '%s'\n", board_name_str); + return false; + } + + std::string device_num_str = board_name.substr(prefix.length()); + obj_id = std::stol(device_num_str, 0, 16); + + // Assume that OPAE does not use 0 as a valid object ID. This is true for now + // but relies somewhat on an implementaion dependent feature. + assert(obj_id > 0); + return true; +} + +/** initialize_asp() function is used in aocl_mmd_open() API + * It resets AFC and reinitializes DMA, Kernel Interrupts if in use + */ +bool Device::initialize_asp() { + MMD_DEBUG("DEBUG LOG : Initializing ASP ... \n"); + if (asp_initialized) { + MMD_DEBUG("DEBUG LOG : ASP already initialized \n"); + return true; + } + + fpga_result res = fpgaMapMMIO(mmio_handle, 0, NULL); + if (res != FPGA_OK) { + MMD_DEBUG("Error mapping MMIO space: %s\n", fpgaErrStr(res)); + return false; + } + mmio_is_mapped = true; + + // Trigger an user reset + uint64_t reset = 1; + fpgaWriteMMIO64(mmio_handle, 0, 0x40000, reset); + + AFU_RESET_DELAY(); + + // DMA performance is heavily dependent on the memcpy operation that transfers + // data from user allocated buffer to the pinned buffer that is used for + // DMA. On some machines with multiple NUMA nodes it is critical for + // performance that the pinned buffer is located on the NUMA node as the + // threads that performs the DMA operation. + // + // The performance also improves slighlty if the DMA threads are on the same + // NUMA node as the FPGA PCI device. + // + // This code pins memory allocation to occur from FPGA NUMA node prior to + // initializing the DMA buffers. It also pins all threads in the process + // to run on this same node. + struct bitmask *mask = NULL; + if (enable_set_numa) { + mask = numa_parse_nodestring(fpga_numa_node.c_str()); + numa_set_membind(mask); + int ret = numa_run_on_node_mask_all(mask); + if (ret < 0) { + fprintf(stderr, " Error setting NUMA node mask\n"); + } + } + + MMD_DEBUG("DEBUG LOG : Initializing HOST -> FPGA DMA channel \n"); + + mmd_dma = new intel_opae_mmd::mmd_dma(mmio_handle, mmd_handle); + if (!mmd_dma->initialized()) { + MMD_DEBUG("DEBUG LOG : Error initializing DMA channel \n"); + delete mmd_dma; + return false; + } + + // Turn off membind restriction in order to allow future allocation to + // occur on different NUMA nodes if needed. Hypothesis is that only + // the pinned buffers are performance critical for the memcpy. Other + // allocations in the process can occur on other NUMA nodes if needed. + if (enable_set_numa) { + numa_set_membind(numa_nodes_ptr); + numa_free_nodemask(mask); + } + +// Do not enable interrupt if polling mode is enabled in the DLA runtime. +#ifndef COREDLA_RUNTIME_POLLING + try { + kernel_interrupt_thread = new intel_opae_mmd::KernelInterrupt(mmio_handle, mmd_handle); + } catch (const std::system_error &e) { + std::cerr << "Error initializing kernel interrupt thread: " << e.what() << e.code() << std::endl; + return false; + } catch (const std::exception &e) { + std::cerr << "Error initializing kernel interrupt thread: " << e.what() << std::endl; + return false; + } +#endif + + asp_initialized = true; + MMD_DEBUG("DEBUG LOG : ASP Initialized ! \n"); + return asp_initialized; +} + +/** Device Class Destructor implementation + * Properly releasing and free-ing memory + * part of best coding practices and help + * with stable system performance and + * helps reduce bugs + */ +Device::~Device() { + MMD_DEBUG("DEBUG LOG : Destructing Device object \n"); + int num_errors = 0; + + if (kernel_interrupt_thread != nullptr) { + delete kernel_interrupt_thread; + kernel_interrupt_thread = NULL; + } + + if (mmd_dma) { + delete mmd_dma; + mmd_dma = NULL; + } + + if (mmio_is_mapped) { + if (fpgaUnmapMMIO(mmio_handle, 0)) { + MMD_DEBUG("DEBUG LOG : fpgaUnmapMMIO failed\n"); + num_errors++; + } + } + + if (mmio_handle) { + if (fpgaClose(mmio_handle) != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : fpgaClose mmio_handle failed\n"); + num_errors++; + } + } + + if (mmio_token) { + if (fpgaDestroyToken(&mmio_token) != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : fpgaDestroyToken mmio_token failed\n"); + num_errors++; + } + } + + if (filter) { + if (fpgaDestroyProperties(&filter) != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : fpgaDestroyProperties filter failed\n"); + num_errors++; + } + } + + if (num_errors > 0) { + MMD_DEBUG("DEBUG LOG : Error freeing resources in Device destructor\n"); + } +} + +/** asp_loaded() function which checks if asp is loaded on board + * it is used in aocl_mmd_open() API + */ +bool Device::asp_loaded() { + fpga_guid pci_guid; + fpga_guid afu_guid; + fpga_properties prop; + fpga_result res; + + if (uuid_parse(I_DK_AFU_ID, pci_guid) < 0) { + MMD_DEBUG("DEBUG LOG : Error parsing guid\n"); + return false; + } + + res = fpgaGetProperties(mmio_token, &prop); + if (res != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : Error reading properties: %s \n", fpgaErrStr(res)); + fpgaDestroyProperties(&prop); + return false; + } + + if (!mmio_token) { + fpgaDestroyProperties(&prop); + MMD_DEBUG("DEBUG LOG : Error reading the mmio_token\n"); + return false; + } + + res = fpgaPropertiesGetGUID(prop, &afu_guid); + if (res != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : Error reading GUID \n"); + fpgaDestroyProperties(&prop); + return false; + } + + fpgaDestroyProperties(&prop); + if (uuid_compare(pci_guid, afu_guid) == 0) { + MMD_DEBUG("DEBUG LOG : asp loaded : true \n"); + return true; + } else { + MMD_DEBUG("DEBUG LOG : asp loaded : false \n"); + return false; + } +} + +/** get_bdf() function is called + * in aocl_mmd_get_info() API + */ +std::string Device::get_bdf() { + std::ostringstream bdf; + bdf << std::setfill('0') << std::setw(2) << std::hex << unsigned(bus) << ":" << std::setfill('0') << std::setw(2) + << std::hex << unsigned(device) << "." << std::hex << unsigned(function); + + return bdf.str(); +} + +/** get_temperature() function is called + * in aocl_mmd_get_info() API + * We currently use hardcoded paths to retrieve temperature information + * We will replace with OPAE APIs in future + */ +float Device::get_temperature() { + if (std::getenv("MMD_ENABLE_DEBUG")) { + MMD_DEBUG("DEBUG LOG : Reading temperature ... \n"); + } + float temp = 0; + fpga_object obj; + const char *name; + name = "dfl_dev.*/spi_master/spi*/spi*.*/*-hwmon.*.auto/hwmon/hwmon*/temp1_input"; + fpga_result res; + res = fpgaTokenGetObject(fme_token, name, &obj, FPGA_OBJECT_GLOB); + if (res != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : Error reading temperature monitor from BMC :"); + MMD_DEBUG(" %s \n", fpgaErrStr(res)); + temp = -999; + return temp; + } + + uint64_t value = 0; + fpgaObjectRead64(obj, &value, FPGA_OBJECT_SYNC); + fpgaDestroyObject(&obj); + temp = value / 1000; + return temp; +} + +/** set_kernel_interrupt() function is used in aocl_mmd_set_interrupt_handler() API + */ +void Device::set_kernel_interrupt(aocl_mmd_interrupt_handler_fn fn, void *user_data) { + MMD_DEBUG("DEBUG LOG : Device::set_kernel_interrupt() \n"); + if (kernel_interrupt_thread) { + kernel_interrupt_thread->set_kernel_interrupt(fn, user_data); + } +} + +/** set_kernel_interrupt() function is used in aocl_mmd_set_status_handler() API + */ +void Device::set_status_handler(aocl_mmd_status_handler_fn fn, void *user_data) { + MMD_DEBUG("DEBUG LOG : Device::set_status_handler() \n"); + event_update = fn; + event_update_user_data = user_data; +} + +/** event_update_fn() is used in read_block(), write_block(), copy_block() functions + * OPAE provides event API for handling asynchronous events sucj as errors and interrupts + * under the hood those are used + */ +void Device::event_update_fn(aocl_mmd_op_t op, int status) { + MMD_DEBUG("DEBUG LOG : Device::event_update_fn() \n"); + event_update(mmd_handle, event_update_user_data, op, status); +} + +/** read_block() is used in aocl_mmd_read() API + * as name suggests its used for fpga->host DMA and MMIO transfers + */ +int Device::read_block(aocl_mmd_op_t op, int mmd_interface, void *host_addr, size_t offset, size_t size) { + MMD_DEBUG("DEBUG LOG : Device::read_block()\n"); + int res; + + // The mmd_interface is defined as the base address of the MMIO write. Access + // to memory requires special functionality. Otherwise do direct MMIO read. + + if (mmd_interface == AOCL_MMD_MEMORY) { + std::unique_lock<std::mutex> dma_mutex_lock(m_dma_mutex); + MMD_DEBUG("DEBUG LOG : Using DMA to read block\n"); + res = mmd_dma->fpga_to_host(host_addr, (uint64_t)offset, size); + } else if (mmd_interface == AOCL_MMD_DLA_CSR) { + assert(size == 4); // DLA CSR read should be always size ==4 as of 2024.2 + MMD_DEBUG("DEBUG LOG : Using MMIO to read block in the DLA CSR space\n"); + res = read_mmio(host_addr, offset, size); + } else { + MMD_DEBUG("DEBUG LOG : Using MMIO to read block\n"); + res = read_mmio(host_addr, mmd_interface + offset, size); + + if (op) { + this->event_update_fn(op, res); + } + } + return res; +} + +/** write_block() is used in aocl_mmd_write() API + * as name suggests its used for DMA and MMIO transfers + */ +int Device::write_block(aocl_mmd_op_t op, int mmd_interface, const void *host_addr, size_t offset, size_t size) { + MMD_DEBUG("DEBUG LOG : Device::write_block()\n"); + int res; + + // The mmd_interface is defined as the base address of the MMIO write. Access + // to memory requires special functionality. Otherwise do direct MMIO write + if (mmd_interface == AOCL_MMD_MEMORY) { + std::unique_lock<std::mutex> dma_mutex_lock(m_dma_mutex); + MMD_DEBUG("DEBUG LOG : Using DMA to write block\n"); + res = mmd_dma->host_to_fpga(host_addr, (uint64_t)offset, size); + } else if (mmd_interface == AOCL_MMD_DLA_CSR) { + assert(size == 4); // DLA CSR read should be always size ==4 as of 2024.2 + MMD_DEBUG("DEBUG LOG : Using MMIO to read block in the DLA CSR space\n"); + res = write_mmio(host_addr, offset, size); + } else { + MMD_DEBUG("DEBUG LOG : Using MMIO to write block\n"); + res = write_mmio(host_addr, mmd_interface + offset, size); + if (op) { + this->event_update_fn(op, res); + } + } + + return res; +} + +/** read_mmio() is used in read_block() function + * it uses OPAE APIs fpgaReadMMIO64() and fpgaReadMMIO32() + */ +int Device::read_mmio(void *host_addr, size_t mmio_addr, size_t size) { + return mmd_helper::read_mmio(mmio_handle, host_addr, mmio_addr, size); +} + +/** write_mmio() is used in write_block() function + * it uses OPAE APIs fpgaWriteMMIO64() and fpgaWriteMMIO32() + */ +int Device::write_mmio(const void *host_addr, size_t mmio_addr, size_t size) { + return mmd_helper::write_mmio(mmio_handle, host_addr, mmio_addr, size); +} diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.h new file mode 100644 index 0000000..1cded83 --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_device.h @@ -0,0 +1,151 @@ +// (c) 1992-2024 Intel Corporation. +// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words +// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. +// and/or other countries. Other marks and brands may be claimed as the property +// of others. See Trademarks on intel.com for full list of Intel trademarks or +// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) +// Your use of Intel Corporation's design tools, logic functions and other +// software and tools, and its AMPP partner logic functions, and any output +// files any of the foregoing (including device programming or simulation +// files), and any associated documentation or information are expressly subject +// to the terms and conditions of the Altera Program License Subscription +// Agreement, Intel MegaCore Function License Agreement, or other applicable +// license agreement, including, without limitation, that your use is for the +// sole purpose of programming logic devices manufactured by Intel and sold by +// Intel or its authorized distributors. Please refer to the applicable +// agreement for further details. + +#ifndef MMD_DEVICE_H +#define MMD_DEVICE_H + +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <string> + +#include <opae/fpga.h> + +#include <uuid/uuid.h> + +#include "aocl_mmd.h" +#include "mmd_dma.h" +#include "mmd_helper.h" + +#include "kernel_interrupt.h" + +// Tune delay for simulation or HW. Eventually delay +// should be removed for HW, may still be needed for ASE simulation +#ifdef SIM +#define DELAY_MULTIPLIER 100 +#else +#define DELAY_MULTIPLIER 1 +#endif + +// Most AOCL_MMD_CALL functions return negative number in case of error, +// MMD_AOCL_ERR is used to indicate an error from the MMD that is being +// returned to the runtime. Simply set to -2 for now since neither interface +// defines a meaning to return codes for errors. +#define MMD_AOCL_ERR -1 + +// NOTE: some of the code relies on invalid handle returning -1 +// future TODO eliminate dependency on specific error values +#define MMD_INVALID_PARAM -1 + +// Our diagnostic script relies on handle values < -1 to determine when +// a valid device is present but a functioning ASP is not loaded. +#define MMD_ASP_NOT_LOADED -2 +#define MMD_ASP_INIT_FAILED -3 + +// Delay settings +#define MMIO_DELAY() +#define YIELD_DELAY() usleep(1 * DELAY_MULTIPLIER) +#define OPENCL_SW_RESET_DELAY() usleep(5000 * DELAY_MULTIPLIER) +#define AFU_RESET_DELAY() usleep(20000 * DELAY_MULTIPLIER) + +#define KERNEL_SW_RESET_BASE (AOCL_MMD_KERNEL + 0x30) + +#define ASP_NAME "ofs_" + +#define SVM_MMD_MPF 0x24000 + +#define SVM_DDR_OFFSET 0x1000000000000 +#define PCI_DDR_OFFSET 0 + +enum { + // IRQ offsets no longer exist in DLA hardware (removed from board.qsys) + AOCL_IRQ_POLLING_BASE = 0x0100, // CSR to polling interrupt status + AOCL_IRQ_MASKING_BASE = 0x0108, // CSR to set/unset interrupt mask + AOCL_MMD_KERNEL = 0, + AOCL_MMD_MEMORY = 1, + AOCL_MMD_DLA_CSR = 2, +}; + +enum AfuStatu { MMD_INVALID_ID = 0, MMD_ASP, MMD_AFU }; + +class Device final { + public: + Device(uint64_t); + Device(const Device &) = delete; + Device &operator=(const Device &) = delete; + ~Device(); + + static bool parse_board_name(const char *board_name, uint64_t &obj_id); + + int get_mmd_handle() { return mmd_handle; } + uint64_t get_fpga_obj_id() { return fpga_obj_id; } + std::string get_dev_name() { return mmd_dev_name; } + std::string get_bdf(); + float get_temperature(); + + bool initialize_asp(); + void set_kernel_interrupt(aocl_mmd_interrupt_handler_fn fn, void *user_data); + void set_status_handler(aocl_mmd_status_handler_fn fn, void *user_data); + void event_update_fn(aocl_mmd_op_t op, int status); + bool asp_loaded(); + + int read_block(aocl_mmd_op_t op, int mmd_interface, void *host_addr, size_t dev_addr, size_t size); + int write_block(aocl_mmd_op_t op, int mmd_interface, const void *host_addr, size_t dev_addr, size_t size); + + private: + static int next_mmd_handle; + + int mmd_handle; + uint64_t fpga_obj_id; + std::string mmd_dev_name; + intel_opae_mmd::KernelInterrupt *kernel_interrupt_thread; + aocl_mmd_status_handler_fn event_update; + void *event_update_user_data; + + std::string fpga_numa_node; + bool enable_set_numa; + bool fme_sysfs_temp_initialized; + void initialize_fme_sysfs(); + void initialize_local_cpus_sysfs(); + bool find_dma_dfh_offsets(); + + uint8_t bus; + uint8_t device; + uint8_t function; + + bool afu_initialized; + bool asp_initialized; + bool mmio_is_mapped; + + fpga_properties filter; + fpga_token mmio_token; + fpga_handle mmio_handle; + fpga_token fme_token; + fpga_guid guid; + intel_opae_mmd::mmd_dma *mmd_dma; + std::mutex m_dma_mutex; + + // Helper functions + int read_mmio(void *host_addr, size_t dev_addr, size_t size); + int write_mmio(const void *host_addr, size_t dev_addr, size_t size); +}; + +#endif // MMD_DEVICE_H diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.cpp new file mode 100644 index 0000000..6a4e13c --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.cpp @@ -0,0 +1,573 @@ +// (c) 1992-2024 Intel Corporation. +// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words +// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. +// and/or other countries. Other marks and brands may be claimed as the property +// of others. See Trademarks on intel.com for full list of Intel trademarks or +// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) +// Your use of Intel Corporation's design tools, logic functions and other +// software and tools, and its AMPP partner logic functions, and any output +// files any of the foregoing (including device programming or simulation +// files), and any associated documentation or information are expressly subject +// to the terms and conditions of the Altera Program License Subscription +// Agreement, Intel MegaCore Function License Agreement, or other applicable +// license agreement, including, without limitation, that your use is for the +// sole purpose of programming logic devices manufactured by Intel and sold by +// Intel or its authorized distributors. Please refer to the applicable +// agreement for further details. + +#include <memory.h> +#include <sys/mman.h> +#include <cassert> +#include <chrono> +#include <cstdlib> +#include <cstring> +#include <iostream> +#include <unordered_map> + +#include <inttypes.h> +#include <sstream> + +#include "mmd_device.h" +#include "mmd_dma.h" +#include "mmd_helper.h" + +namespace intel_opae_mmd { + +/** mmd_dma class constructor + */ +mmd_dma::mmd_dma(fpga_handle fpga_handle_arg, int mmd_handle) : m_initialized(false), m_fpga_handle(fpga_handle_arg) { + MMD_DEBUG("DEBUG LOG : Constructing DMA \n"); + // Initialize shared buffer + auto res = fpgaPrepareBuffer(m_fpga_handle, DMA_BUFFER_SIZE, (void **)&dma_buf_ptr, &dma_buf_wsid, 0); + + assert(FPGA_OK == res && "Allocating DMA Buffer failed"); + + memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE); + + // Store virtual address of IO registers + res = fpgaGetIOAddress(m_fpga_handle, dma_buf_wsid, &dma_buf_iova); + assert(FPGA_OK == res && "getting dma DMA_BUF_IOVA failed"); + + m_initialized = true; +} + +/** mmd_dma destructor + * free-ing , releasing various resources created during object construction is a good idea + * it helps with system stability and reduces code bugs + */ +mmd_dma::~mmd_dma() { + MMD_DEBUG("DEBUG LOG : Destructing DMA \n"); + auto res = fpgaReleaseBuffer(m_fpga_handle, dma_buf_wsid); + assert(FPGA_OK == res && "Release DMA Buffer failed"); + m_initialized = false; +} + +// Called in dma_transfer() to send DMA descriptor +int mmd_dma::send_descriptor(uint64_t mmio_dst, dma_descriptor_t desc) { + // mmio requires 8 byte alignment + assert(mmio_dst % 8 == 0); + + fpgaWriteMMIO64(m_fpga_handle, 0, mmio_dst, desc.src_address); + MMD_DEBUG("Writing %lX to address %lX\n", desc.src_address, mmio_dst); + mmio_dst += 8; + fpgaWriteMMIO64(m_fpga_handle, 0, mmio_dst, desc.dest_address); + MMD_DEBUG("Writing %lX to address %lX\n", desc.dest_address, mmio_dst); + mmio_dst += 8; + fpgaWriteMMIO64(m_fpga_handle, 0, mmio_dst, desc.len); + MMD_DEBUG("Writing %X to address %lX\n", desc.len, mmio_dst); + mmio_dst += 8; + fpgaWriteMMIO64(m_fpga_handle, 0, mmio_dst, desc.control); + MMD_DEBUG("Writing %X to address %lX\n", desc.control, mmio_dst); + + return 0; +} + +// Use ASE to handle unaligned transfer and DMA to do aligned transfer. +int mmd_dma::fpga_to_host(void *host_addr, uint64_t dev_src, size_t size) { + fpga_result res = FPGA_OK; + uint64_t count_left = size; + uint64_t aligned_addr = 0; + uint64_t align_bytes = 0; + uint64_t curr_dev_src = dev_src; + void *curr_host_addr = host_addr; + + if (dev_src % 64 != 0) { + // We use ASE to handle unaligned DMA transfer + MMD_DEBUG("DEBUG LOG : mmd_dma::fpga_to_host dev_src is non 64B aligned\n"); + if (count_left < 64) { + MMD_DEBUG("DEBUG LOG : mmd_dma::fpga_to_host dev_src count < 64\n"); + res = _ase_fpga_to_host(curr_dev_src, curr_host_addr, count_left); + assert(FPGA_OK == res && "_ase_fpga_to_host failed"); + return res; + } else { + aligned_addr = ((curr_dev_src / 64) + 1) * 64; + align_bytes = aligned_addr - curr_dev_src; + res = _ase_fpga_to_host(curr_dev_src, curr_host_addr, align_bytes); + assert(FPGA_OK == res && "_ase_fpga_to_host failed"); + + // Update the processed data + count_left -= align_bytes; + curr_dev_src += align_bytes; + curr_host_addr = (void *)(static_cast<char *>(curr_host_addr) + align_bytes); + } + } + + if (count_left) { + uint64_t dma_chunks = count_left / DMA_BUFFER_SIZE; + for (uint64_t i = 0; i < dma_chunks; i++) { + // constant size transfer + + uint64_t dev_dest = dma_buf_iova | DMA_HOST_MASK; + int len = ((DMA_BUFFER_SIZE - 1) / DMA_LINE_SIZE) + 1; // Ceiling of test_buffer_size / DMA_LINE_SIZE + + dma_transfer(curr_dev_src, dev_dest, len, ddr_to_host); + + // Copy data from shared buffer to host addr + memcpy(curr_host_addr, (void *)dma_buf_ptr, DMA_BUFFER_SIZE); + + memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE); + + // Update the curr source and dest + curr_host_addr = (void *)(static_cast<char *>(curr_host_addr) + DMA_BUFFER_SIZE); + curr_dev_src += DMA_BUFFER_SIZE; + } + + // Updated the count_left for the for loop + count_left -= (dma_chunks * DMA_BUFFER_SIZE); + + if (count_left) { + uint64_t dma_tx_bytes = (count_left / 64) * 64; + if (dma_tx_bytes != 0) { + assert(dma_tx_bytes <= DMA_BUFFER_SIZE && "Illegal transfer size\n"); + + uint64_t dev_dest = dma_buf_iova | DMA_HOST_MASK; + int len = ((dma_tx_bytes - 1) / DMA_LINE_SIZE) + 1; // Ceiling of test_buffer_size / DMA_LINE_SIZE + + dma_transfer(curr_dev_src, dev_dest, len, ddr_to_host); + + // Copy data from shared buffer to host addr + memcpy(curr_host_addr, (void *)dma_buf_ptr, dma_tx_bytes); + + memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE); + + // Update the address + curr_host_addr = (void *)(static_cast<char *>(curr_host_addr) + dma_tx_bytes); + curr_dev_src += dma_tx_bytes; + count_left -= dma_tx_bytes; + } + if (count_left) { + MMD_DEBUG("DEBUG LOG : mmd_dma::fpga_to_host count_left after DMA transfer is "); + MMD_DEBUG("%" PRIu64 "\n", count_left); + // Handle the rest unaligned transfer using ASE + res = _ase_fpga_to_host(curr_dev_src, curr_host_addr, count_left); + if (FPGA_OK != res) { + MMD_DEBUG("DEBUG LOG : mmd_dma::_ase_fpga_to_host failed\n"); + return -1; + } + count_left = 0; + + // No need to update address as the transaction is done. + } + } + } + assert(count_left==0 && "fpga_to_host failed"); + return 0; +} + +// Use ASE to handle unaligned transfer and DMA to do aligned transfer. +int mmd_dma::host_to_fpga(const void *host_addr, uint64_t dev_dest, size_t size) { + fpga_result res = FPGA_OK; + uint64_t count_left = size; + uint64_t aligned_addr = 0; + uint64_t align_bytes = 0; + uint64_t curr_dest = dev_dest; + const void *curr_host_addr = host_addr; + + if (dev_dest % 64 != 0) { + // We use ASE to handle unaligned DMA transfer + MMD_DEBUG("DEBUG LOG : mmd_dma::host_to_fpga dev_dest is non 64B aligned\n"); + if (count_left < 64) { + res = _ase_host_to_fpga(dev_dest, host_addr, count_left); + assert(FPGA_OK == res && "_ase_host_to_fpga failed"); + return res; + } else { + aligned_addr = ((dev_dest / 64) + 1) * 64; + align_bytes = aligned_addr - dev_dest; + res = _ase_host_to_fpga(dev_dest, host_addr, align_bytes); + assert(FPGA_OK == res && "_ase_host_to_fpga failed"); + + // Update the processed data + count_left -= align_bytes; + curr_dest += align_bytes; + curr_host_addr = (const void *)(static_cast<const char *>(curr_host_addr) + align_bytes); + } + } + + if (count_left) { + uint64_t dma_chunks = count_left / DMA_BUFFER_SIZE; + for (uint64_t i = 0; i < dma_chunks; i++) { + // constant size transfer + // Copy host_src value to the shared buffer + memcpy((void *)dma_buf_ptr, curr_host_addr, DMA_BUFFER_SIZE); + uint64_t dev_src = dma_buf_iova | DMA_HOST_MASK; + + int len = ((DMA_BUFFER_SIZE - 1) / DMA_LINE_SIZE) + 1; // Ceiling of test_buffer_size / DMA_LINE_SIZE + + dma_transfer(dev_src, curr_dest, len, host_to_ddr); + + memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE); + + // Update the curr source and dest + curr_host_addr = (const void *)(static_cast<const char *>(curr_host_addr) + DMA_BUFFER_SIZE); + curr_dest += DMA_BUFFER_SIZE; + } + + // Updated the count_left for the for loop + count_left -= (dma_chunks * DMA_BUFFER_SIZE); + + if (count_left) { + uint64_t dma_tx_bytes = (count_left / 64) * 64; + if (dma_tx_bytes != 0) { + assert(dma_tx_bytes <= DMA_BUFFER_SIZE && "Illegal transfer size\n"); + + // Copy host_src value to the shared buffer + memcpy((void *)dma_buf_ptr, curr_host_addr, dma_tx_bytes); + uint64_t dev_src = dma_buf_iova | DMA_HOST_MASK; + + int len = ((dma_tx_bytes - 1) / DMA_LINE_SIZE) + 1; // Ceiling of dma_tx_bytes / DMA_LINE_SIZE + dma_transfer(dev_src, curr_dest, len, host_to_ddr); + + memset((void *)dma_buf_ptr, 0x0, DMA_BUFFER_SIZE); + } + + // Update the address + curr_host_addr = (const void *)(static_cast<const char *>(curr_host_addr) + dma_tx_bytes); + curr_dest += dma_tx_bytes; + count_left -= dma_tx_bytes; + + if (count_left) { + MMD_DEBUG("DEBUG LOG : mmd_dma::host_to_fpga count_left after DMA transfer is "); + MMD_DEBUG("%" PRIu64 "\n", count_left); + // Handle the rest unaligned transfer using ASE + res = _ase_host_to_fpga(curr_dest, curr_host_addr, count_left); + assert(FPGA_OK == res && "_ase_host_to_fpga failed"); + count_left = 0; + } + } + } + assert(count_left==0 && "host_to_fpga failed"); + return 0; +} + +int mmd_dma::dma_transfer(uint64_t dev_src, uint64_t dev_dest, int len, dma_mode descriptor_mode) { + + // Get debug information for thread id + std::stringstream ss; + ss << std::this_thread::get_id(); + uint64_t id = std::stoull(ss.str()); + MMD_DEBUG("dma_transfer start current thread_id is %04lX\n", id); + + // Native DMA transfer requires 64 byte alignment + assert(dev_src % 64 == 0); + assert(dev_dest % 64 == 0); + + const uint64_t MASK_FOR_35BIT_ADDR = 0x7FFFFFFFF; + + dma_descriptor_t desc; + + MMD_DEBUG("DEBUG LOG : mmd_dma::dma_transfer starts\n"); + MMD_DEBUG("DEBUG LOG dev_dest = %04lX\n", dev_dest); + + desc.src_address = dev_src & MASK_FOR_35BIT_ADDR; + desc.dest_address = dev_dest & MASK_FOR_35BIT_ADDR; + desc.len = len; + desc.control = 0x80000000 | (descriptor_mode << MODE_SHIFT); + + const uint64_t DMA_DESC_BASE = 8 * DMA_CSR_IDX_SRC_ADDR; + const uint64_t DMA_STATUS_BASE = 8 * DMA_CSR_IDX_STATUS; + uint64_t mmio_data = 0; + + int desc_size = sizeof(desc); + + MMD_DEBUG("Descriptor size = %d\n", desc_size); + MMD_DEBUG("desc.src_address = %04lX\n", desc.src_address); + MMD_DEBUG("desc.dest_address = %04lX\n", desc.dest_address); + MMD_DEBUG("desc.len = %d\n", desc.len); + MMD_DEBUG("desc.control = %04X\n", desc.control); + MMD_DEBUG("descriptor_mode = %04X\n", descriptor_mode); + + // send descriptor + send_descriptor(DMA_DESC_BASE, desc); + + fpga_result r; + r = fpgaReadMMIO64(m_fpga_handle, 0, DMA_STATUS_BASE, &mmio_data); + MMD_DEBUG("DMA_STATUS_BASE before = %04lX\n", mmio_data); + if (FPGA_OK != r) return -1; + + // If the busy bit is empty, then we are done. + while ((mmio_data & 0x1) == 0x1) { + r = fpgaReadMMIO64(m_fpga_handle, 0, DMA_STATUS_BASE, &mmio_data); + assert(FPGA_OK == r); + } + MMD_DEBUG("dma_transfer end current thread_id is %04lX\n", id); + return 0; +} + +// Transfer "count" bytes from HOST to FPGA using Address span expander(ASE)- will internally make +// calls to handle unaligned and aligned MMIO writes. +fpga_result mmd_dma::_ase_host_to_fpga(uint64_t dev_dest, const void *src_ptr, uint64_t count) { + MMD_DEBUG("DEBUG LOG: _ase_host_to_fpga is being called\n "); + + MMD_DEBUG("DEBUG LOG : dev_dest is "); + MMD_DEBUG("%" PRIu64 "\n", dev_dest); + + assert(count < 64); // DLA only uses ASE transfer with less than 64 Byte transfer. + + fpga_result res = FPGA_OK; + uint64_t count_left = count; + uint64_t unaligned_size = 0; + + // For ASE window + uint64_t ase_window; + uint64_t ase_addr; + uint64_t dev_addr; + + const void *curr_src_ptr = src_ptr; + + if (count == 0) return res; + + if (dev_dest % 8 == 0) { + while (count > 0) { + ase_window = dev_dest & ~(0xfff); + ase_addr = (dev_dest & 0xfff); // only keep the lower 12 bits. + + uint64_t mmio_base_control = ASE_MMIO_BASE + ASE_MMIO_CTRL; + + MMD_DEBUG("DEBUG LOG : ase_window is "); + MMD_DEBUG("%" PRIu64 "\n", ase_window); + + // Write to ASE control + res = fpgaWriteMMIO64(m_fpga_handle, 0, mmio_base_control, ase_window); + assert(res == FPGA_OK && "Write to ASE control failed"); + + // Set final dev_addr + // dev_addr will be 8 byte aligned as long as dev_dest is 8 byte aligned. + dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr; + + assert(dev_addr % 8 == 0); + + MMD_DEBUG("DEBUG LOG : _ase_host_to_fpga count is "); + MMD_DEBUG("%" PRIu64 "\n", count); + + MMD_DEBUG("DEBUG LOG : dev addr is "); + MMD_DEBUG("%" PRIu64 "\n", dev_addr); + + size_t size = (count > 8) ? 8 : count; + mmd_helper::write_mmio(m_fpga_handle, curr_src_ptr, dev_addr, size); + + count -= size; + dev_dest += size; + curr_src_ptr = (const void *)(static_cast<const char *>(curr_src_ptr) + size); + } + + assert(count == 0); + + } else { + // First we need to handle the non byte aligned transfer + + MMD_DEBUG("DEBUG LOG : _ase_host_to_fpga count is "); + MMD_DEBUG("%" PRIu64 "\n", count); + + // Aligns address to 8 byte using dst masking method + unaligned_size = 8 - (dev_dest % 8); + if (unaligned_size > count_left) unaligned_size = count_left; + + // Write to the unaligned address + assert(unaligned_size < 8); + uint64_t shift = dev_dest % 8; + + // Write to ASE control to switch page. + ase_window = dev_dest & ~(0xfff); + + MMD_DEBUG("DEBUG LOG : ase_window in non-aligned is "); + MMD_DEBUG("%" PRIu64 "\n", ase_window); + + fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window); + + // Get aligned dest address + uint64_t dev_aligned_addr = dev_dest - shift; + assert(dev_aligned_addr % 8 == 0); + + // read data from device memory with aligned dev dest + ase_addr = (dev_aligned_addr & 0xfff); + dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr; + uint64_t read_tmp = 0; + fpgaReadMMIO64(m_fpga_handle, 0, dev_addr, &read_tmp); + + // overlay our data, check if the shift is correct here + memcpy((reinterpret_cast<char *>(&read_tmp) + shift), src_ptr, unaligned_size); + + // Write back data to the device + fpgaWriteMMIO64(m_fpga_handle, 0, dev_addr, read_tmp); + + count_left -= unaligned_size; + + // Check if there is any byte left + if (count_left == 0) { + return res; + } + + // Now the dest address should be byte aligned now + // Start the regular ASE transfer + + const void *curr_src_ptr = (const void *)(static_cast<const char *>(src_ptr) + unaligned_size); + uint64_t next_dev_dest = dev_dest + unaligned_size; + + while (count_left > 0) { + ase_window = next_dev_dest & ~(0xfff); + ase_addr = (next_dev_dest & 0xfff); // only keep the lower 12 bits. + + MMD_DEBUG("DEBUG LOG : ase_window in non-aligned loop is "); + MMD_DEBUG("%" PRIu64 "\n", ase_window); + + // Write to ASE control + fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window); + + // Set final dev_addr + dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr; + + assert(dev_addr % 8 == 0); + + size_t size = (count_left > 8) ? 8 : count_left; + mmd_helper::write_mmio(m_fpga_handle, + curr_src_ptr, + dev_addr, + size); + + count_left -= size; + next_dev_dest += size; + curr_src_ptr = (const void *)(static_cast<const char *>(curr_src_ptr) + size); + } + assert(count_left == 0); + } + + return FPGA_OK; +} + +// Transfer "count" bytes from FPGA to HOST using Address span expander(ASE)- will internally make +// calls to handle unaligned and aligned MMIO reads. +fpga_result mmd_dma::_ase_fpga_to_host(uint64_t dev_dest, void *host_ptr, uint64_t count) { + MMD_DEBUG("DEBUG LOG : _ase_fpga_to_host is being called\n "); + + assert(count < 64); + + fpga_result res = FPGA_OK; + uint64_t count_left = count; + uint64_t unaligned_size = 0; + + // For ASE window + + uint64_t ase_window; + uint64_t ase_addr; + uint64_t dev_addr; + + if (count == 0) return res; + + void *curr_host_ptr = host_ptr; + + if (dev_dest % 8 == 0) { + while (count > 0) { + ase_window = dev_dest & ~(0xfff); + ase_addr = (dev_dest & 0xfff); // only keep the lower 12 bits. + + MMD_DEBUG("DEBUG LOG : ase_window is "); + MMD_DEBUG("%" PRIu64 "\n", ase_window); + + // Write to ASE control to switch page. + fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window); + + // Set final dev_addr + // dev_addr will be 8 byte aligned as long as dev_dest is 8 byte aligned. + dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr; + + assert(dev_addr % 8 == 0); + + size_t size = (count > 8) ? 8 : count; + + mmd_helper::read_mmio(m_fpga_handle, curr_host_ptr, dev_addr, size); + + count -= size; + dev_dest += size; + curr_host_ptr = (void *)(static_cast<char *>(curr_host_ptr) + size); + } + + } else { + // First we need to handle the non byte aligned transfer + + // Aligns address to 8 byte using dst masking method + unaligned_size = 8 - (dev_dest % 8); + if (unaligned_size > count_left) unaligned_size = count_left; + + // Write to the unaligned address + assert(unaligned_size < 8); + uint64_t shift = dev_dest % 8; + + // Write to ASE control to switch page. + ase_window = dev_dest & ~(0xfff); + + MMD_DEBUG("DEBUG LOG : ase_window is "); + MMD_DEBUG("%" PRIu64 "\n", ase_window); + + fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window); + + // Get aligned dest address + uint64_t dev_aligned_addr = dev_dest - shift; + assert(dev_aligned_addr % 8 == 0); + + // read data from device memory with aligned dev dest + ase_addr = (dev_aligned_addr & 0xfff); + dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr; + + uint64_t read_tmp = 0; + fpgaReadMMIO64(m_fpga_handle, 0, dev_addr, &read_tmp); + + // overlay our data + memcpy(host_ptr, (reinterpret_cast<char *>(&read_tmp) + shift), unaligned_size); + + count_left -= unaligned_size; + + // Check if there is any byte left + if (count_left == 0) { + return res; + } + + // Now the dest address should be byte aligned now + // Start the regular ASE transfer + curr_host_ptr = (void *)(static_cast<char *>(host_ptr) + unaligned_size); + uint64_t next_dev_dest = dev_dest + unaligned_size; + + while (count_left > 0) { + ase_window = next_dev_dest & ~(0xfff); + ase_addr = (next_dev_dest & 0xfff); // only keep the lower 12 bits. + + // Write to ASE control to switch page. + fpgaWriteMMIO64(m_fpga_handle, 0, ASE_MMIO_BASE + ASE_MMIO_CTRL, ase_window); + + // Set final dev_addr + dev_addr = ASE_MMIO_BASE + ASE_MMIO_WINDOW + ase_addr; + + assert(dev_addr % 8 == 0); + + size_t size = (count_left > 8) ? 8 : count_left; + mmd_helper::read_mmio(m_fpga_handle, curr_host_ptr, dev_addr, size); + + count_left -= size; + next_dev_dest += size; + curr_host_ptr = (void *)(static_cast<char *>(curr_host_ptr) + size); + } + + assert(count_left == 0); + } + return FPGA_OK; +} +} // namespace intel_opae_mmd diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.h new file mode 100644 index 0000000..a2841b1 --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_dma.h @@ -0,0 +1,89 @@ +// (c) 1992-2024 Intel Corporation. +// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words +// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. +// and/or other countries. Other marks and brands may be claimed as the property +// of others. See Trademarks on intel.com for full list of Intel trademarks or +// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) +// Your use of Intel Corporation's design tools, logic functions and other +// software and tools, and its AMPP partner logic functions, and any output +// files any of the foregoing (including device programming or simulation +// files), and any associated documentation or information are expressly subject +// to the terms and conditions of the Altera Program License Subscription +// Agreement, Intel MegaCore Function License Agreement, or other applicable +// license agreement, including, without limitation, that your use is for the +// sole purpose of programming logic devices manufactured by Intel and sold by +// Intel or its authorized distributors. Please refer to the applicable +// agreement for further details. +#ifndef MMD_DMA_H_ +#define MMD_DMA_H_ + +#include <opae/fpga.h> +#include <poll.h> + +#include <atomic> +#include <chrono> +#include <condition_variable> +#include <mutex> +#include <queue> +#include <thread> +#include <unordered_map> + +#include "aocl_mmd.h" +#include "mmd_helper.h" + +#define DMA_CSR_IDX_SRC_ADDR 0x5 +#define DMA_CSR_IDX_STATUS 0x9 +#define MODE_SHIFT 26 +// For now limits to 16K to avoid DMA transfer hang in hw, further testing required to increase the value. +#define DMA_BUFFER_SIZE (1024 * 16) +#define DMA_LINE_SIZE 64 +#define DMA_HOST_MASK 0x2000000000000 + +#define ASE_MMIO_BASE 0x20000 +#define ASE_MMIO_CTRL 0x200 +#define ASE_MMIO_WINDOW 0x1000 + +namespace intel_opae_mmd { + +enum dma_mode { stand_by = 0x0, host_to_ddr = 0x1, ddr_to_host = 0x2, ddr_to_ddr = 0x3 }; + +struct dma_descriptor_t { + uint64_t src_address; + uint64_t dest_address; + uint32_t len; + uint32_t control; +}; + +class mmd_dma final { + public: + mmd_dma(fpga_handle fpga_handle_arg, int mmd_handle); + ~mmd_dma(); + + bool initialized() { return m_initialized; } + + int fpga_to_host(void *host_addr, uint64_t dev_src, size_t size); + int host_to_fpga(const void *host_addr, uint64_t dev_dest, size_t size); + int dma_transfer(uint64_t dev_src, uint64_t dev_dest, int len, dma_mode descriptor_mode); + fpga_result _ase_host_to_fpga(uint64_t dev_dest, const void *src_ptr, uint64_t count); + fpga_result _ase_fpga_to_host(uint64_t dev_dest, void *host_ptr, uint64_t count); + mmd_dma(mmd_dma &other) = delete; + mmd_dma &operator=(const mmd_dma &other) = delete; + + private: + // Helper functions + int send_descriptor(uint64_t mmio_dst, dma_descriptor_t desc); + // Member variables + bool m_initialized; + fpga_handle m_fpga_handle; + + // Shared buffer in host memory + uint64_t *dma_buf_ptr = NULL; + // Workspace ID used by OPAE to identify buffer + uint64_t dma_buf_wsid; + // IO virtual address + uint64_t dma_buf_iova; +}; + +}; // namespace intel_opae_mmd + +#endif // MMD_DMA_H_ diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.cpp b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.cpp new file mode 100644 index 0000000..4af482a --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.cpp @@ -0,0 +1,163 @@ +// (c) 1992-2024 Intel Corporation. +// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words +// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. +// and/or other countries. Other marks and brands may be claimed as the property +// of others. See Trademarks on intel.com for full list of Intel trademarks or +// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) +// Your use of Intel Corporation's design tools, logic functions and other +// software and tools, and its AMPP partner logic functions, and any output +// files any of the foregoing (including device programming or simulation +// files), and any associated documentation or information are expressly subject +// to the terms and conditions of the Altera Program License Subscription +// Agreement, Intel MegaCore Function License Agreement, or other applicable +// license agreement, including, without limitation, that your use is for the +// sole purpose of programming logic devices manufactured by Intel and sold by +// Intel or its authorized distributors. Please refer to the applicable +// agreement for further details. + +#include "mmd_helper.h" +#include <inttypes.h> + +namespace mmd_helper { + +int read_mmio(fpga_handle mmio_handle, void *host_addr, size_t mmio_addr, size_t size) { + fpga_result res = FPGA_OK; + + MMD_DEBUG("DEBUG LOG : Device::read_mmio start: host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n", + host_addr, + mmio_addr, + size); + + if (mmio_addr % 4 != 0) { + MMD_DEBUG("DEBUG LOG : ead_mmio function doesn't support non 4 Byte aligned mmio_addr due to OPAE\n"); + return -1; + } + + uint64_t *host_addr64 = static_cast<uint64_t *>(host_addr); + + while (size >= 8) { + MMD_DEBUG("DEBUG LOG : Using fpgaReadMMIO64() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x8\n", + host_addr64, + mmio_addr); + res = fpgaReadMMIO64(mmio_handle, 0, mmio_addr, host_addr64); + if (res != FPGA_OK) { + MMD_DEBUG( + "DEBUG LOG : Error in read_mmio() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x8\n", host_addr64, mmio_addr); + return -1; + } + MMD_DEBUG("DEBUG LOG : the host_addr64 value is "); + MMD_DEBUG("%" PRIu64 "\n", *host_addr64); + host_addr64 += 1; + mmio_addr += 8; + size -= 8; + } + + uint32_t *host_addr32 = reinterpret_cast<uint32_t *>(host_addr64); + while (size >= 4) { + MMD_DEBUG("DEBUG LOG : Using fpgaReadMMIO32() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x4\n", + host_addr32, + mmio_addr); + res = fpgaReadMMIO32(mmio_handle, 0, mmio_addr, host_addr32); + if (res != FPGA_OK) { + MMD_DEBUG( + "DEBUG LOG : Error in read_mmio() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x4\n", host_addr32, mmio_addr); + return -1; + } + host_addr32 += 1; + mmio_addr += 4; + size -= 4; + } + + if (size > 0) { + uint32_t read_data; + MMD_DEBUG("DEBUG LOG : Using fpgaReadMMIO32() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n", + host_addr, + mmio_addr, + size); + res = fpgaReadMMIO32(mmio_handle, 0, mmio_addr, &read_data); + if (res != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : Error in read_mmio() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n", + host_addr, + mmio_addr, + size); + MMD_DEBUG("result is %d \n", res); + return -1; + } + + memcpy(host_addr32, &read_data, size); + } + + return res; +} + +int write_mmio(fpga_handle mmio_handle, const void *host_addr, size_t mmio_addr, size_t size) { + fpga_result res = FPGA_OK; + + MMD_DEBUG("DEBUG LOG : Device::write_mmio start: host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n", + host_addr, + mmio_addr, + size); + + const uint64_t *host_addr64 = static_cast<const uint64_t *>(host_addr); + while (size >= 8) { + MMD_DEBUG("DEBUG LOG : Using fpgaWriteMMIO64() host_addr : %p\t mmio_addr : 0x%zx\t \n", + host_addr64, + mmio_addr); + res = fpgaWriteMMIO64(mmio_handle, 0, mmio_addr, *host_addr64); + if (res != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : Error in write_mmio() host_addr : %p\t mmio_addr : 0x%zx\t \n", + host_addr64, + mmio_addr); + return -1; + } + host_addr64 += 1; + mmio_addr += 8; + size -= 8; + } + + const uint32_t *host_addr32 = reinterpret_cast<const uint32_t *>(host_addr64); + + while (size >= 4) { + MMD_DEBUG("DEBUG LOG : Using fpgaWriteMMIO32() host_addr : %p\t mmio_addr : 0x%zx\t \n", + host_addr32, + mmio_addr); + res = fpgaWriteMMIO32(mmio_handle, 0, mmio_addr, *host_addr32); + if (res != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : Error in write_mmio() host_addr : %p\t mmio_addr : 0x%zx\t\n", + host_addr32, + mmio_addr); + return -1; + } + host_addr32 += 1; + mmio_addr += 4; + size -= 4; + } + + while (size > 0) { + MMD_DEBUG("DEBUG LOG : Using fpgaWriteMMIO32() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n", + host_addr32, + mmio_addr, + size); + uint32_t tmp_data32 = 0; + fpgaReadMMIO32(mmio_handle, 0, mmio_addr, &tmp_data32); // First read the data back + size_t chunk_size = (size >= 4) ? 4 : size; + + memcpy(&tmp_data32, host_addr32, chunk_size); // Apply our data overlay + + res = fpgaWriteMMIO32(mmio_handle, 0, mmio_addr, tmp_data32); + if (res != FPGA_OK) { + MMD_DEBUG("DEBUG LOG : Error in write_mmio() host_addr : %p\t mmio_addr : 0x%zx\t size : 0x%zx\n", + host_addr32, + mmio_addr, + size); + return -1; + } + host_addr32 += 1; + mmio_addr += chunk_size; + size -= chunk_size; + } + + return 0; +} + +}; // namespace mmd_helper diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.h new file mode 100644 index 0000000..b7e2667 --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/host/mmd_helper.h @@ -0,0 +1,41 @@ +// (c) 1992-2024 Intel Corporation. +// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words +// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. +// and/or other countries. Other marks and brands may be claimed as the property +// of others. See Trademarks on intel.com for full list of Intel trademarks or +// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) +// Your use of Intel Corporation's design tools, logic functions and other +// software and tools, and its AMPP partner logic functions, and any output +// files any of the foregoing (including device programming or simulation +// files), and any associated documentation or information are expressly subject +// to the terms and conditions of the Altera Program License Subscription +// Agreement, Intel MegaCore Function License Agreement, or other applicable +// license agreement, including, without limitation, that your use is for the +// sole purpose of programming logic devices manufactured by Intel and sold by +// Intel or its authorized distributors. Please refer to the applicable +// agreement for further details. + +#ifndef MMD_HELPER_H +#define MMD_HELPER_H + +#include <opae/fpga.h> +#include <stdarg.h> + +inline void MMD_DEBUG(const char *format, ...) { + if (std::getenv("MMD_ENABLE_DEBUG")) { + va_list arglist; + va_start(arglist, format); + vprintf(format, arglist); + va_end(arglist); + fflush(stdout); + } +} + +namespace mmd_helper { + +int read_mmio(fpga_handle mmio_handle, void *host_addr, size_t mmio_addr, size_t size); +int write_mmio(fpga_handle mmio_handle, const void *host_addr, size_t mmio_addr, size_t size); + +}; // namespace mmd_helper + +#endif // MMD_HELPER_H diff --git a/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/include/aocl_mmd.h b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/include/aocl_mmd.h new file mode 100644 index 0000000..16992da --- /dev/null +++ b/python/openvino/runtime/coredla_device/mmd/agx7_ofs_pcie/include/aocl_mmd.h @@ -0,0 +1,377 @@ +// Copyright 2022 Intel Corporation +// SPDX-License-Identifier: MIT + +#ifndef AOCL_MMD_H +#define AOCL_MMD_H + +/* TODO: this file comes from OpenCL SDK and should be formatted there first */ +/* clang-format off */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Support for memory mapped ACL devices. + * + * Typical API lifecycle, from the perspective of the caller. + * + * 1. aocl_mmd_open must be called first, to provide a handle for further + * operations. + * + * 2. The interrupt and status handlers must be set. + * + * 3. Read and write operations are performed. + * + * 4. aocl_mmd_close may be called to shut down the device. No further + * operations are permitted until a subsequent aocl_mmd_open call. + * + * aocl_mmd_get_offline_info can be called anytime including before + * open. aocl_mmd_get_info can be called anytime between open and close. + */ + +// #ifndef AOCL_MMD_CALL +// #if defined(_WIN32) +// #define AOCL_MMD_CALL __declspec(dllimport) +// #else +// #define AOCL_MMD_CALL +// #endif +// #endif + +#ifndef AOCL_MMD_CALL +#if defined(_WIN32) +#define AOCL_MMD_CALL __declspec(dllimport) +#else +#define AOCL_MMD_CALL __attribute__((visibility ("default"))) +#endif +#endif + +#ifndef WEAK +#if defined(_WIN32) +#define WEAK +#else +#define WEAK __attribute__((weak)) +#endif +#endif + +#ifdef __cplusplus +#include <cstddef> //size_t +#else +#include <stddef.h> //size_t +#endif + +/* The MMD API's version - the runtime expects this string when + * AOCL_MMD_VERSION is queried. This changes only if the API has changed */ +#define AOCL_MMD_VERSION_STRING "20.3" + +/* Memory types that can be supported - bitfield. Other than physical memory + * these types closely align with the OpenCL SVM types. + * + * AOCL_MMD_PHYSICAL_MEMORY - The vendor interface includes IP to communicate + * directly with physical memory such as DDR, QDR, etc. + * + * AOCL_MMD_SVM_COARSE_GRAIN_BUFFER - The vendor interface includes support for + * caching SVM pointer data and requires explicit function calls from the user + * to synchronize the cache between the host processor and the FPGA. This level + * of SVM is not currently supported by Altera except as a subset of + * SVM_FINE_GAIN_SYSTEM support. + * + * AOCL_MMD_SVM_FINE_GRAIN_BUFFER - The vendor interface includes support for + * caching SVM pointer data and requires additional information from the user + * and/or host runtime that can be collected during pointer allocation in order + * to synchronize the cache between the host processor and the FPGA. Once this + * additional data is provided for an SVM pointer, the vendor interface handles + * cache synchronization between the host processor & the FPGA automatically. + * This level of SVM is not currently supported by Altera except as a subset + * of SVM_FINE_GRAIN_SYSTEM support. + * + * AOCL_MMD_SVM_FINE_GRAIN_SYSTEM - The vendor interface includes support for + * caching SVM pointer data and does not require any additional information to + * synchronize the cache between the host processor and the FPGA. The vendor + * interface handles cache synchronization between the host processor & the + * FPGA automatically for all SVM pointers. This level of SVM support is + * currently under development by Altera and some features may not be fully + * supported. + */ +#define AOCL_MMD_PHYSICAL_MEMORY (1 << 0) +#define AOCL_MMD_SVM_COARSE_GRAIN_BUFFER (1 << 1) +#define AOCL_MMD_SVM_FINE_GRAIN_BUFFER (1 << 2) +#define AOCL_MMD_SVM_FINE_GRAIN_SYSTEM (1 << 3) + +/* program modes - bitfield + * + * AOCL_MMD_PROGRAM_PRESERVE_GLOBAL_MEM - preserve contents of global memory + * when this bit is set to 1. If programming can't occur without preserving + * global memory contents, the program function must fail, in which case the + * runtime may re-invoke program with this bit set to 0, allowing programming + * to occur even if doing so destroys global memory contents. + * + * more modes are reserved for stacking on in the future + */ +#define AOCL_MMD_PROGRAM_PRESERVE_GLOBAL_MEM (1 << 0) +typedef int aocl_mmd_program_mode_t; + + +typedef void* aocl_mmd_op_t; + +typedef struct { + unsigned lo; /* 32 least significant bits of time value. */ + unsigned hi; /* 32 most significant bits of time value. */ +} aocl_mmd_timestamp_t; + + +/* Defines the set of characteristics that can be probed about the board before + * opening a device. The type of data returned by each is specified in + * parentheses in the adjacent comment. + * + * AOCL_MMD_NUM_BOARDS and AOCL_MMD_BOARD_NAMES + * These two fields can be used to implement multi-device support. The MMD + * layer may have a list of devices it is capable of interacting with, each + * identified with a unique name. The length of the list should be returned + * in AOCL_MMD_NUM_BOARDS, and the names of these devices returned in + * AOCL_MMD_BOARD_NAMES. The OpenCL runtime will try to call aocl_mmd_open + * for each board name returned in AOCL_MMD_BOARD_NAMES. + */ +typedef enum { + AOCL_MMD_VERSION = 0, /* Version of MMD (char*)*/ + AOCL_MMD_NUM_BOARDS = 1, /* Number of candidate boards (int)*/ + AOCL_MMD_BOARD_NAMES = 2, /* Names of boards available delimiter=; (char*)*/ + AOCL_MMD_VENDOR_NAME = 3, /* Name of vendor (char*) */ + AOCL_MMD_VENDOR_ID = 4, /* An integer ID for the vendor (int) */ + AOCL_MMD_USES_YIELD = 5, /* 1 if yield must be called to poll hw (int) */ + /* The following can be combined in a bit field: + * AOCL_MMD_PHYSICAL_MEMORY, AOCL_MMD_SVM_COARSE_GRAIN_BUFFER, AOCL_MMD_SVM_FINE_GRAIN_BUFFER, AOCL_MMD_SVM_FINE_GRAIN_SYSTEM. + * Prior to 14.1, all existing devices supported physical memory and no types of SVM memory, so this + * is the default when this operation returns '0' for board MMDs with a version prior to 14.1 + */ + AOCL_MMD_MEM_TYPES_SUPPORTED = 6, +} aocl_mmd_offline_info_t; + + +/** Possible capabilities to return from AOCL_MMD_*_MEM_CAPABILITIES query */ +/** + * If not set allocation function is not supported, even if other capabilities are set. + */ +#define AOCL_MMD_MEM_CAPABILITY_SUPPORTED (1 << 0) +/** + * Supports atomic access to the memory by either the host or device. + */ +#define AOCL_MMD_MEM_CAPABILITY_ATOMIC (1 << 1) +/** + * Supports concurrent access to the memory either by host or device if the + * accesses are not on the same block. Block granularity is defined by + * AOCL_MMD_*_MEM_CONCURRENT_GRANULARITY., blocks are aligned to this + * granularity + */ +#define AOCL_MMD_MEM_CAPABILITY_CONCURRENT (1 << 2) +/** + * Memory can be accessed by multiple devices at the same time. + */ +#define AOCL_MMD_MEM_CAPABILITY_P2P (1 << 3) + + +/* Defines the set of characteristics that can be probed about the board after + * opening a device. This can involve communication to the device + * + * AOCL_MMD_NUM_KERNEL_INTERFACES - The number of kernel interfaces, usually 1 + * + * AOCL_MMD_KERNEL_INTERFACES - the handle for each kernel interface. + * param_value will have size AOCL_MMD_NUM_KERNEL_INTERFACES * sizeof int + * + * AOCL_MMD_PLL_INTERFACES - the handle for each pll associated with each + * kernel interface. If a kernel interface is not clocked by acl_kernel_clk + * then return -1 + * + * */ +typedef enum { + AOCL_MMD_NUM_KERNEL_INTERFACES = 1, /* Number of Kernel interfaces (int) */ + AOCL_MMD_KERNEL_INTERFACES = 2, /* Kernel interface (int*) */ + AOCL_MMD_PLL_INTERFACES = 3, /* Kernel clk handles (int*) */ + AOCL_MMD_MEMORY_INTERFACE = 4, /* Global memory handle (int) */ + AOCL_MMD_TEMPERATURE = 5, /* Temperature measurement (float) */ + AOCL_MMD_PCIE_INFO = 6, /* PCIe information (char*) */ + AOCL_MMD_BOARD_NAME = 7, /* Name of board (char*) */ + AOCL_MMD_BOARD_UNIQUE_ID = 8, /* Unique ID of board (int) */ + AOCL_MMD_CONCURRENT_READS = 9, /* # of parallel reads; 1 is serial*/ + AOCL_MMD_CONCURRENT_WRITES = 10, /* # of parallel writes; 1 is serial*/ + AOCL_MMD_CONCURRENT_READS_OR_WRITES = 11, /* total # of concurrent operations read + writes*/ + AOCL_MMD_MIN_HOST_MEMORY_ALIGNMENT = 12, /* Min alignment that the ASP supports for host allocations (size_t) */ + AOCL_MMD_HOST_MEM_CAPABILITIES = 13, /* Capabilities of aocl_mmd_host_alloc() (unsigned int)*/ + AOCL_MMD_SHARED_MEM_CAPABILITIES = 14, /* Capabilities of aocl_mmd_shared_alloc (unsigned int)*/ + AOCL_MMD_DEVICE_MEM_CAPABILITIES = 15, /* Capabilities of aocl_mmd_device_alloc (unsigned int)*/ + AOCL_MMD_HOST_MEM_CONCURRENT_GRANULARITY = 16, /*(size_t)*/ + AOCL_MMD_SHARED_MEM_CONCURRENT_GRANULARITY = 17, /*(size_t)*/ + AOCL_MMD_DEVICE_MEM_CONCURRENT_GRANULARITY = 18, /*(size_t)*/ +} aocl_mmd_info_t; + +typedef struct { + unsigned long long int exception_type; + void *user_private_info; + size_t user_cb; +}aocl_mmd_interrupt_info; + +typedef void (*aocl_mmd_interrupt_handler_fn)( int handle, void* user_data ); +typedef void (*aocl_mmd_device_interrupt_handler_fn)( int handle, aocl_mmd_interrupt_info* data_in, void* user_data ); +typedef void (*aocl_mmd_status_handler_fn)( int handle, void* user_data, aocl_mmd_op_t op, int status ); + + +/* Get information about the board using the enum aocl_mmd_offline_info_t for + * offline info (called without a handle), and the enum aocl_mmd_info_t for + * info specific to a certain board. + * Arguments: + * + * requested_info_id - a value from the aocl_mmd_offline_info_t enum + * + * param_value_size - size of the param_value field in bytes. This should + * match the size of the return type expected as indicated in the enum + * definition. For example, the AOCL_MMD_TEMPERATURE returns a float, so + * the param_value_size should be set to sizeof(float) and you should + * expect the same number of bytes returned in param_size_ret. + * + * param_value - pointer to the variable that will receive the returned info + * + * param_size_ret - receives the number of bytes of data actually returned + * + * Returns: a negative value to indicate error. + */ +AOCL_MMD_CALL int aocl_mmd_get_offline_info( + aocl_mmd_offline_info_t requested_info_id, + size_t param_value_size, + void* param_value, + size_t* param_size_ret ) WEAK; + +AOCL_MMD_CALL int aocl_mmd_get_info( + int handle, + aocl_mmd_info_t requested_info_id, + size_t param_value_size, + void* param_value, + size_t* param_size_ret ) WEAK; + +/* Open and initialize the named device. + * + * The name is typically one specified by the AOCL_MMD_BOARD_NAMES offline + * info. + * + * Arguments: + * name - open the board with this name (provided as a C-style string, + * i.e. NUL terminated ASCII.) + * + * Returns: the non-negative integer handle for the board, otherwise a + * negative value to indicate error. Upon receiving the error, the OpenCL + * runtime will proceed to open other known devices, hence the MMD mustn't + * exit the application if an open call fails. + */ +AOCL_MMD_CALL int aocl_mmd_open(const char *name) WEAK; + +/* Close an opened device, by its handle. + * Returns: 0 on success, negative values on error. + */ +AOCL_MMD_CALL int aocl_mmd_close(int handle) WEAK; + +/* Set the interrupt handler for the opened device. + * The interrupt handler is called whenever the client needs to be notified + * of an asynchronous event signaled by the device internals. + * For example, the kernel has completed or is stalled. + * + * Important: Interrupts from the kernel must be ignored until this handler is + * set + * + * Arguments: + * fn - the callback function to invoke when a kernel interrupt occurs + * user_data - the data that should be passed to fn when it is called. + * + * Returns: 0 if successful, negative on error + */ +AOCL_MMD_CALL int aocl_mmd_set_interrupt_handler( int handle, aocl_mmd_interrupt_handler_fn fn, void* user_data ) WEAK; + +/* Set the operation status handler for the opened device. + * The operation status handler is called with + * status 0 when the operation has completed successfully. + * status negative when the operation completed with errors. + * + * Arguments: + * fn - the callback function to invoke when a status update is to be + * performed. + * user_data - the data that should be passed to fn when it is called. + * + * Returns: 0 if successful, negative on error + */ +AOCL_MMD_CALL int aocl_mmd_set_status_handler( int handle, aocl_mmd_status_handler_fn fn, void* user_data ) WEAK; + +/* Read, write and copy operations on a single interface. + * If op is NULL + * - Then these calls must block until the operation is complete. + * - The status handler is not called for this operation. + * + * If op is non-NULL, then: + * - These may be non-blocking calls + * - The status handler must be called upon completion, with status 0 + * for success, and a negative value for failure. + * + * Arguments: + * op - the operation object used to track this operations progress + * + * len - the size in bytes to transfer + * + * src - the host buffer being read from + * + * dst - the host buffer being written to + * + * mmd_interface - the handle to the interface being accessed. E.g. To + * access global memory this handle will be whatever is returned by + * aocl_mmd_get_info when called with AOCL_MMD_MEMORY_INTERFACE. + * + * offset/src_offset/dst_offset - the byte offset within the interface that + * the transfer will begin at. + * + * The return value is 0 if the operation launch was successful, and + * negative otherwise. + */ +AOCL_MMD_CALL int aocl_mmd_read( + int handle, + aocl_mmd_op_t op, + size_t len, + void* dst, + int mmd_interface, size_t offset) WEAK; +AOCL_MMD_CALL int aocl_mmd_write( + int handle, + aocl_mmd_op_t op, + size_t len, + const void* src, + int mmd_interface, size_t offset ) WEAK; + +/** Error values*/ +#define AOCL_MMD_ERROR_SUCCESS 0 +#define AOCL_MMD_ERROR_INVALID_HANDLE -1 +#define AOCL_MMD_ERROR_OUT_OF_MEMORY -2 +#define AOCL_MMD_ERROR_UNSUPPORTED_ALIGNMENT -3 +#define AOCL_MMD_ERROR_UNSUPPORTED_PROPERTY -4 +#define AOCL_MMD_ERROR_INVALID_POINTER -5 +#define AOCL_MMD_ERROR_INVALID_MIGRATION_SIZE -6 + +// CoreDLA modifications +// To support multiple different FPGA boards, anything board specific must be implemented in a +// board-specific MMD instead of the CoreDLA runtime layer. +#ifdef DLA_MMD +#include <cstdint> +// Query functions to get board-specific values +AOCL_MMD_CALL int dla_mmd_get_max_num_instances() WEAK; +AOCL_MMD_CALL uint64_t dla_mmd_get_ddr_size_per_instance() WEAK; +AOCL_MMD_CALL double dla_mmd_get_ddr_clock_freq() WEAK; + +// Wrappers around CSR and DDR reads and writes to abstract away board-specific offsets +AOCL_MMD_CALL int dla_mmd_csr_write(int handle, int instance, uint64_t addr, const uint32_t* data) WEAK; +AOCL_MMD_CALL int dla_mmd_csr_read(int handle, int instance, uint64_t addr, uint32_t* data) WEAK; +AOCL_MMD_CALL int dla_mmd_ddr_write(int handle, int instance, uint64_t addr, uint64_t length, const void* data) WEAK; +AOCL_MMD_CALL int dla_mmd_ddr_read(int handle, int instance, uint64_t addr, uint64_t length, void* data) WEAK; + +// Get the clk_dla PLL clock frequency in MHz, returns a negative value if there is an error +AOCL_MMD_CALL double dla_mmd_get_coredla_clock_freq(int handle) WEAK; + +#endif + +#ifdef __cplusplus +} +#endif + +/* clang-format on */ +#endif |
