summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_dma.sv
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_dma.sv')
-rw-r--r--python/openvino/demo/ip/intel_ai_ip/verilog/dla_dma.sv668
1 files changed, 668 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_dma.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_dma.sv
new file mode 100644
index 0000000..a3a133e
--- /dev/null
+++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_dma.sv
@@ -0,0 +1,668 @@
+// Copyright 2020-2022 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+
+// This is the top level module for coreDLA DMA.
+//
+// This module includes the following:
+// - CSR (which contains interrupt generating logic as well as descriptor queue)
+// - config reader (which provides input data to config network)
+// - filter reader (which provides input data to filter caches)
+// - feature reader (which provides input data to stream buffers)
+// - DDR arbitration of the readers
+// - feature writer (which accepts output data to write to DDR)
+//
+// The FD and a highly illustrative design review are available at:
+// https://sharepoint.amr.ith.intel.com/sites/DLA/Shared%20Documents/Forms/AllItems.aspx?RootFolder=%2Fsites%2FDLA%2FShared%20Documents%2FcoreDLA%2FFDs%5FTest%5FPlans%2FDMA%5FFD
+//
+// Outside of coreDLA DMA, one still needs to arbitrate DDR between PCIe and DLA.
+// All interfaces of coreDLA DMA are AXI, which means if the outside world favors
+// AvalonMM then the conversion of interfaces must be done outside of coreDLA.
+// The testbench uses AXI BFMs for DDR and PCIe, inside these bus functional models
+// are synthesizable code that converts between AXI and Avalon and then uses an
+// Avalon BFM.
+//
+// DMA IP depends on other IP blocks, which are listed below:
+// - dla/fpga/hld_lsu/rtl
+// - dla/fpga/hld_fifo/rtl
+// - dla/fpga/acl_dcfifo/rtl
+// - dla/fpga/acl_reset_handler/rtl
+//
+// Not all AXI ports have been implemented. The following assumptions have been made:
+// - id can be assumed to be 0 if not provided (only arid on the ddr interface is provided because there are 3 readers)
+// - burst length is limited to 2**DDR_BURST_WIDTH words in a burst even through the signal width is 8 (as per the axi4 spec)
+// - lock is not used (no atomic access), encoded as 2'b00
+// - cache is not used, device non-bufferable is encoded as 4'b0000
+// - prot is not used, no need to protect against illegal transactions, encode as 3'b000
+// - qos is not used, encode as 4'b0000 to indicate no participation in any quality of service scheme
+// - region is not used, no need for multiple regions, encode as 4'b0000
+// - user sideband signals are not used
+// - if wstrb (byte enable) is not provided, assume all bytes are updated during a write, encode as all ones
+// - response value is ignored for both reads and writes, status assumed to be okay, encoded as 2'h00
+// - read last (last word in a burst) value is ignored since number of words in each burst is tracked internally
+// - low power interface signals are not used
+//
+// Unrelated to the AXI spec itself, beware of the following restrictions on the AXI interfaces:
+// - all addresses are word aligned (required by AvalonMM, not required by AXI)
+// - csr is a slave, it expects the master to produce such transactions
+// - ddr is a master, this simplification can be exploited by the slave e.g. an AXI to AvalonMM converter
+// - bursts will never cross a burst boundary, e.g. if max burst size is 1024 bytes, no burst will ever cross a 1024 byte boundary
+//
+// This module will internally bias in favour of writes to DDR rather than reads. This is a performance optimization
+// for the IP core. The readers in the core will happily continue reading even if the compute engine is stalled due
+// to a full output fifo, so it is believed that keeping the (expensive) output fifo empty is more important than
+// pre-loading the input buffers (which tend to be large anyways in order to avoid DDR reads entirely). (Limited
+// anecdotal evidence supports this hypothesis, as well).
+//
+// Note that the CoreDLA IP has never been tested without the write-over-read priority. It is not impossible that
+// there is a subtle hidden functional requirement for this prioritization (but it seems unlikely except in the
+// case of an extreme imbalance against writes).
+//
+
+`resetall
+`undefineall
+`default_nettype none
+`include "dla_acl_parameter_assert.svh"
+
+module dla_dma import dla_dma_pkg::*; #(
+ parameter int CSR_ADDR_WIDTH, //width of the byte address signal, determines CSR address space size, e.g. 11 bit address = 2048 bytes, the largest size that uses only 1 M20K
+ parameter int CSR_DATA_BYTES, //width of the CSR data path, typically 4 bytes
+ parameter int CONFIG_DATA_BYTES, //data width of the config network output port, typically 4 bytes
+ parameter int CONFIG_READER_DATA_BYTES, //data width of the config network input port, typically 8 bytes
+ parameter int FILTER_READER_DATA_BYTES, //data width of the filter reader, typically a whole DDR word (assuming block floating point, C_VECTOR=16 so 4 filter words packed into 1 DDR word)
+ parameter int FEATURE_READER_DATA_BYTES, //data width of the feature reader, typically half of a DDR word for C_VECTOR=16 (assuming FP16 or smaller)
+ parameter int FEATURE_WRITER_DATA_BYTES, //data width of the feature writer, typically half of a DDR word for C_VECTOR=16 (assuming FP16 or smaller)
+ parameter int DDR_ADDR_WIDTH, //width of all byte address signals to global memory, 32 would allow 4 GB of addressable memory
+ parameter int DDR_BURST_WIDTH, //internal width of the axi burst length signal, typically 4, max number of words in a burst = 2**DDR_BURST_WIDTH
+ parameter int DDR_DATA_BYTES, //width of the global memory data path, must be a power of 2, typically 64 bytes
+ parameter int DDR_READ_ID_WIDTH, //width of the AXI ID signal for DDR reads, must be 2 since there are 3 read masters
+
+ parameter bit ENABLE_INPUT_STREAMING,
+ parameter bit ENABLE_OUTPUT_STREAMING,
+
+ parameter dla_common_pkg::device_family_t DEVICE, //enumerated device value, required for dma writer
+ parameter dla_lt_pkg::lt_arch_t LT_ARCH = '{default:0}, // the arch for the dedicated layout transform (if it exists)
+ parameter bit ENABLE_ON_CHIP_PARAMETERS, // Whether configs and filters are on-chip, meaning config reader and filter reader can be disabled
+
+ //derived parameters and constants
+ localparam int AXI_BURST_LENGTH_WIDTH = 8, //width of the axi burst length signal as per the axi4 spec
+ localparam int AXI_BURST_SIZE_WIDTH = 3, //width of the axi burst size signal as per the axi4 spec
+ localparam int AXI_BURST_TYPE_WIDTH = 2 //width of the axi burst type signal as per the axi4 spec
+) (
+ input wire clk_ddr,
+ input wire clk_dla,
+ input wire clk_pcie,
+ input wire i_resetn_async, //active low reset that has NOT been synchronized to any clock
+
+ //interrupt request, AXI4 stream master without data, runs on pcie clock
+ output logic o_interrupt_level,
+
+ //dla can report an error to dma csr by asserting this for one clock cycle, runs on ddr clock
+ input wire i_token_error,
+
+ //CSR can request a reset of the DLA IP by asserting this signal (held until reset), runs on ddr clock
+ output logic o_request_ip_reset,
+
+ input wire i_stream_started, //indicates that the first word of the input stream is being read this cycle
+ input wire i_stream_done, //indcates that the output streamer is done writing the output feature
+
+ // Indicates when input feeder received the first word (aka input streamer sent the first word)
+ // and xbar sent the last word (aka output streamer received the last word)
+ input wire i_stream_received_first_word,
+ input wire i_stream_sent_last_word,
+
+ //CSR, AXI4 lite slave, runs on ddr clock
+ input wire i_csr_arvalid,
+ input wire [CSR_ADDR_WIDTH-1:0] i_csr_araddr,
+ output logic o_csr_arready,
+ output logic o_csr_rvalid,
+ output logic [8*CSR_DATA_BYTES-1:0] o_csr_rdata,
+ input wire i_csr_rready,
+ input wire i_csr_awvalid,
+ input wire [CSR_ADDR_WIDTH-1:0] i_csr_awaddr,
+ output logic o_csr_awready,
+ input wire i_csr_wvalid,
+ input wire [8*CSR_DATA_BYTES-1:0] i_csr_wdata,
+ output logic o_csr_wready,
+ output logic o_csr_bvalid,
+ input wire i_csr_bready,
+
+ //config reader data, AXI4 stream master, runs on dla clock
+ output logic o_config_reader_valid,
+ output logic [8*CONFIG_READER_DATA_BYTES-1:0] o_config_reader_data,
+ input wire i_config_reader_ready,
+
+ //config for filter reader, AXI4 stream slave, runs on ddr clock
+ input wire i_config_filter_reader_valid,
+ input wire [8*CONFIG_DATA_BYTES-1:0] i_config_filter_reader_data,
+ output logic o_config_filter_reader_ready,
+
+ //filter reader data, AXI4 stream master, runs on dla clock
+ output logic o_filter_reader_valid,
+ output logic [8*FILTER_READER_DATA_BYTES-1:0] o_filter_reader_data,
+ input wire i_filter_reader_ready,
+
+ //config for feature reader, AXI4 stream slave, runs on ddr clock
+ input wire i_config_feature_reader_valid,
+ input wire [8*CONFIG_DATA_BYTES-1:0] i_config_feature_reader_data,
+ output logic o_config_feature_reader_ready,
+
+ //config for layout transform
+ input wire i_config_lt_reader_valid,
+ input wire [8*CONFIG_DATA_BYTES-1:0] i_config_lt_reader_data,
+ output logic o_config_lt_reader_ready,
+
+ //feature reader data, AXI4 stream master, runs on dla clock
+ output logic o_feature_reader_valid,
+ output logic [8*FEATURE_READER_DATA_BYTES-1:0] o_feature_reader_data,
+ input wire i_feature_reader_ready,
+
+ //config for feature writer, AXI4 stream slave, runs on ddr clock
+ input wire i_config_feature_writer_valid,
+ input wire [8*CONFIG_DATA_BYTES-1:0] i_config_feature_writer_data,
+ output logic o_config_feature_writer_ready,
+
+ //feature writer data, AXI4 stream slave, runs on ddr clock
+ input wire i_feature_writer_valid,
+ input wire [8*FEATURE_WRITER_DATA_BYTES-1:0] i_feature_writer_data,
+ output logic o_feature_writer_ready,
+
+ //debug network AXI-4 lite interface, read request and read response channels, runs on dla_clock
+ output logic o_debug_network_arvalid,
+ output logic [8*CSR_DATA_BYTES-1:0] o_debug_network_araddr,
+ input wire i_debug_network_arready,
+ input wire i_debug_network_rvalid,
+ input wire [8*CSR_DATA_BYTES-1:0] i_debug_network_rdata,
+ output logic o_debug_network_rready,
+
+ //global memory, AXI4 master, runs on ddr clock
+ output logic o_ddr_arvalid,
+ output logic [DDR_ADDR_WIDTH-1:0] o_ddr_araddr,
+ output logic [AXI_BURST_LENGTH_WIDTH-1:0] o_ddr_arlen,
+ output logic [AXI_BURST_SIZE_WIDTH-1:0] o_ddr_arsize,
+ output logic [AXI_BURST_TYPE_WIDTH-1:0] o_ddr_arburst,
+ output logic [DDR_READ_ID_WIDTH-1:0] o_ddr_arid,
+ input wire i_ddr_arready,
+ input wire i_ddr_rvalid,
+ input wire [8*DDR_DATA_BYTES-1:0] i_ddr_rdata,
+ input wire [DDR_READ_ID_WIDTH-1:0] i_ddr_rid,
+ output logic o_ddr_rready,
+ output logic o_ddr_awvalid,
+ output logic [DDR_ADDR_WIDTH-1:0] o_ddr_awaddr,
+ output logic [AXI_BURST_LENGTH_WIDTH-1:0] o_ddr_awlen,
+ output logic [AXI_BURST_SIZE_WIDTH-1:0] o_ddr_awsize,
+ output logic [AXI_BURST_TYPE_WIDTH-1:0] o_ddr_awburst,
+ input wire i_ddr_awready,
+ output logic o_ddr_wvalid,
+ output logic [8*DDR_DATA_BYTES-1:0] o_ddr_wdata,
+ output logic [DDR_DATA_BYTES-1:0] o_ddr_wstrb,
+ output logic o_ddr_wlast,
+ input wire i_ddr_wready,
+ input wire i_ddr_bvalid,
+ output logic o_ddr_bready,
+ output logic o_streaming_active
+);
+
+ /////////////////////////////////
+ // Parameter legality checks //
+ /////////////////////////////////
+ //do not allow number of words per burst to exceed the axi spec (even through the LSU will behave just fine)
+ `DLA_ACL_PARAMETER_ASSERT(DDR_BURST_WIDTH <= AXI_BURST_LENGTH_WIDTH)
+
+ //id width on the ddr interface is a parameter instead of localparam only so that if the value changes,
+ //then it can be changed in one place instead all everywhere the signal width is used
+ //3 readers requires
+ `DLA_ACL_PARAMETER_ASSERT(DDR_READ_ID_WIDTH == 2)
+
+ //data width is limited by the axi spec
+ `DLA_ACL_PARAMETER_ASSERT(DDR_DATA_BYTES >= 1 && DDR_DATA_BYTES <= 128)
+
+ //load-store units require a power of 2 width for the global memory interface
+ `DLA_ACL_PARAMETER_ASSERT(DDR_DATA_BYTES == 2**$clog2(DDR_DATA_BYTES))
+
+
+ ///////////////
+ // Signals //
+ ///////////////
+
+ //reset
+ logic ddr_sclrn;
+
+ //feature writer reports it is done, goes to csr and feature reader
+ logic token_done_csr, token_done_reader;
+
+ logic license_flag;
+ logic writer_error;
+
+ //csr to config reader or to ddrfree config network (langsu: latter is not implemented yet)
+ logic [8*CONFIG_DATA_BYTES-1:0] csr_config_data;
+ logic csr_config_valid, csr_config_for_intercept, csr_config_ready;
+
+ //lsu to read arb
+ logic lsu_ddr_arvalid [NUM_READERS-1:0];
+ logic [DDR_ADDR_WIDTH-1:0] lsu_ddr_araddr [NUM_READERS-1:0];
+ logic [DDR_BURST_WIDTH-1:0] lsu_ddr_arlen [NUM_READERS-1:0];
+ logic lsu_ddr_arready [NUM_READERS-1:0];
+ logic lsu_ddr_rvalid [NUM_READERS-1:0];
+ logic [8*DDR_DATA_BYTES-1:0] lsu_ddr_rdata [NUM_READERS-1:0];
+ logic lsu_ddr_rready [NUM_READERS-1:0];
+
+ //favor writes over reads for ddr; see the comment block by the
+ //combinational logic associated with these signals for the explanation
+ //of their naming.
+ logic dma_prevcycle_read_not_acknowledged;
+ logic write_overrides_read;
+ logic rawp_ddr_awvalid, rawp_ddr_wvalid;
+ logic rawf_ddr_awready, rawf_ddr_wready;
+ logic rawp_ddr_arvalid;
+ logic rawf_ddr_arready;
+
+ //axi spec requires a signal width of 8 for burst length
+ logic [DDR_BURST_WIDTH-1:0] raw_ddr_arlen;
+ logic [DDR_BURST_WIDTH-1:0] raw_ddr_awlen;
+
+ //used to backpressure ddrfree config network read
+ logic streaming_reload;
+ logic lt_param_error;
+
+ /////////////////////////////
+ // Reset Synchronization //
+ /////////////////////////////
+
+ dla_reset_handler_simple #(
+ .USE_SYNCHRONIZER (RESET_USE_SYNCHRONIZER),
+ .PIPE_DEPTH (RESET_PIPE_DEPTH),
+ .NUM_COPIES (RESET_NUM_COPIES)
+ )
+ ddr_reset_synchronizer
+ (
+ .clk (clk_ddr),
+ .i_resetn (i_resetn_async),
+ .o_sclrn (ddr_sclrn)
+ );
+
+
+
+ ///////////
+ // CSR //
+ ///////////
+
+ //includes register interface for host control as well as interrupt
+ //contains the descriptor queue for providing work to the config reader
+
+ dla_dma_csr #(
+ .CSR_ADDR_WIDTH (CSR_ADDR_WIDTH),
+ .CSR_DATA_BYTES (CSR_DATA_BYTES),
+ .CONFIG_DATA_BYTES (CONFIG_DATA_BYTES),
+ .CONFIG_READER_DATA_BYTES (CONFIG_READER_DATA_BYTES),
+ .ENABLE_INPUT_STREAMING (ENABLE_INPUT_STREAMING),
+ .ENABLE_OUTPUT_STREAMING (ENABLE_OUTPUT_STREAMING),
+ .ENABLE_ON_CHIP_PARAMETERS (ENABLE_ON_CHIP_PARAMETERS)
+ )
+ csr
+ (
+ .clk_ddr (clk_ddr),
+ .clk_pcie (clk_pcie),
+ .clk_dla (clk_dla),
+ .i_sclrn_ddr (ddr_sclrn),
+ .i_resetn_async (i_resetn_async),
+ .i_token_done (token_done_csr | (i_stream_done & ENABLE_OUTPUT_STREAMING)),
+ .i_token_stream_started (i_stream_started),
+ .i_token_error (i_token_error | lt_param_error),
+ .i_stream_received_first_word (i_stream_received_first_word),
+ .i_stream_sent_last_word (i_stream_sent_last_word),
+ .i_license_flag (license_flag),
+ .i_token_out_of_inferences (writer_error),
+ .i_input_feature_rvalid (lsu_ddr_rvalid[FEATURE_READER_ID]),
+ .i_input_feature_rready (lsu_ddr_rready[FEATURE_READER_ID]),
+ .i_input_filter_rvalid (lsu_ddr_rvalid[FILTER_READER_ID]),
+ .i_input_filter_rready (lsu_ddr_rready[FILTER_READER_ID]),
+ .i_output_feature_wvalid (rawp_ddr_wvalid),
+ .i_output_feature_wready (rawf_ddr_wready),
+ .o_interrupt_level (o_interrupt_level),
+ .o_config_valid (csr_config_valid),
+ .o_config_data (csr_config_data),
+ .o_config_for_intercept (csr_config_for_intercept),
+ .i_config_ready (csr_config_ready),
+ .o_debug_network_arvalid (o_debug_network_arvalid),
+ .o_debug_network_araddr (o_debug_network_araddr),
+ .i_debug_network_arready (i_debug_network_arready),
+ .i_debug_network_rvalid (i_debug_network_rvalid),
+ .i_debug_network_rdata (i_debug_network_rdata),
+ .o_debug_network_rready (o_debug_network_rready),
+ .i_csr_arvalid (i_csr_arvalid),
+ .i_csr_araddr (i_csr_araddr),
+ .o_csr_arready (o_csr_arready),
+ .o_csr_rvalid (o_csr_rvalid),
+ .o_csr_rdata (o_csr_rdata),
+ .i_csr_rready (i_csr_rready),
+ .i_csr_awvalid (i_csr_awvalid),
+ .i_csr_awaddr (i_csr_awaddr),
+ .o_csr_awready (o_csr_awready),
+ .i_csr_wvalid (i_csr_wvalid),
+ .i_csr_wdata (i_csr_wdata),
+ .o_csr_wready (o_csr_wready),
+ .o_csr_bvalid (o_csr_bvalid),
+ .i_csr_bready (i_csr_bready),
+ .o_request_ip_reset (o_request_ip_reset),
+ .o_streaming_active (o_streaming_active)
+ );
+
+
+
+ /////////////////////
+ // Config reader //
+ /////////////////////
+
+ //the config interface of the generic dma reader comes from the descriptor queue inside the csr
+ //output data interface of the generic dma reader serves as the input for the config network
+
+ if (~ENABLE_ON_CHIP_PARAMETERS) begin
+ dla_dma_reader #(
+ .READER_WRITER_SEL (CONFIG_READER_ID),
+ .IS_CONFIG_READER (1),
+ .NUM_DIMENSIONS (CONFIG_READER_NUM_DIMENSIONS),
+ .CONFIG_DATA_BYTES (CONFIG_DATA_BYTES),
+ .READER_DATA_BYTES (CONFIG_READER_DATA_BYTES),
+ .DDR_ADDR_WIDTH (DDR_ADDR_WIDTH),
+ .DDR_DATA_BYTES (DDR_DATA_BYTES),
+ .DDR_BURST_WIDTH (DDR_BURST_WIDTH),
+ .LT_ARCH (LT_ARCH)
+ )
+ config_reader
+ (
+ .clk_ddr (clk_ddr),
+ .clk_dla (clk_dla),
+ .i_sclrn_ddr (ddr_sclrn),
+ .i_resetn_async (i_resetn_async),
+ .i_config_valid (csr_config_valid),
+ .i_config_data (csr_config_data),
+ .i_config_for_intercept (csr_config_for_intercept),
+ .o_config_ready (csr_config_ready), // config reader is ready to receive data from csr
+ .i_token_can_start (1'b0), //config data is read only, there is no data dependency that would prevent the config reader from starting
+ .o_reader_valid (o_config_reader_valid), // config data read is valid
+ .o_reader_data (o_config_reader_data), // config data read from reader
+ .i_reader_ready (i_config_reader_ready), // config network is ready to receive config data
+ .o_ddr_arvalid (lsu_ddr_arvalid[CONFIG_READER_ID]),
+ .o_ddr_araddr (lsu_ddr_araddr [CONFIG_READER_ID]),
+ .o_ddr_arlen (lsu_ddr_arlen [CONFIG_READER_ID]),
+ .i_ddr_arready (lsu_ddr_arready[CONFIG_READER_ID]),
+ .i_ddr_rvalid (lsu_ddr_rvalid [CONFIG_READER_ID]),
+ .i_ddr_rdata (lsu_ddr_rdata [CONFIG_READER_ID]),
+ .o_ddr_rready (lsu_ddr_rready [CONFIG_READER_ID])
+ );
+ end else begin
+ // Indicate config_reader is ready to receive data, but we don't care.
+ assign lsu_ddr_rready [CONFIG_READER_ID] = 1'b0;
+ // we don't care if read addr to arbitar is valid or not
+ assign lsu_ddr_arvalid[CONFIG_READER_ID] = 1'b0;
+ // Don't care. config_network doesn't check valid to start ddrfree config read
+ assign o_config_reader_valid = 1'b0;
+ // Set to 1 to not stall descriptor_queue
+ assign csr_config_ready = 1'b1;
+ end
+
+
+ /////////////////////
+ // Filter reader //
+ /////////////////////
+
+ if (~ENABLE_ON_CHIP_PARAMETERS) begin
+ dla_dma_reader #(
+ .READER_WRITER_SEL (FILTER_READER_ID),
+ .IS_CONFIG_READER (0),
+ .NUM_DIMENSIONS (FILTER_READER_NUM_DIMENSIONS),
+ .CONFIG_DATA_BYTES (CONFIG_DATA_BYTES),
+ .READER_DATA_BYTES (FILTER_READER_DATA_BYTES),
+ .DDR_ADDR_WIDTH (DDR_ADDR_WIDTH),
+ .DDR_DATA_BYTES (DDR_DATA_BYTES),
+ .DDR_BURST_WIDTH (DDR_BURST_WIDTH),
+ .LT_ARCH (LT_ARCH)
+ )
+ filter_reader
+ (
+ .clk_ddr (clk_ddr),
+ .clk_dla (clk_dla),
+ .i_sclrn_ddr (ddr_sclrn),
+ .i_resetn_async (i_resetn_async),
+ .i_config_valid (i_config_filter_reader_valid),
+ .i_config_data (i_config_filter_reader_data),
+ .i_config_for_intercept (1'b0), //since this is not the config reader, all config data goes to the address generator
+ .o_config_ready (o_config_filter_reader_ready), // filter reader module is ready to receive config data from config network
+ .i_token_can_start (1'b0), //filter data is read only, there is no data dependency that would prevent the filter reader from starting
+ .o_reader_valid (o_filter_reader_valid),
+ .o_reader_data (o_filter_reader_data),
+ .i_reader_ready (i_filter_reader_ready),
+ .o_ddr_arvalid (lsu_ddr_arvalid[FILTER_READER_ID]),
+ .o_ddr_araddr (lsu_ddr_araddr [FILTER_READER_ID]),
+ .o_ddr_arlen (lsu_ddr_arlen [FILTER_READER_ID]),
+ .i_ddr_arready (lsu_ddr_arready[FILTER_READER_ID]),
+ .i_ddr_rvalid (lsu_ddr_rvalid [FILTER_READER_ID]),
+ .i_ddr_rdata (lsu_ddr_rdata [FILTER_READER_ID]),
+ .o_ddr_rready (lsu_ddr_rready [FILTER_READER_ID])
+ );
+ end else begin
+ // Indicate filter_reader is ready to receive data, but we don't care.
+ assign lsu_ddr_rready [FILTER_READER_ID] = 1'b0;
+ // we don't care if read addr to arbitar is valid or not
+ assign lsu_ddr_arvalid[FILTER_READER_ID] = 1'b0;
+ // Don't care. Sequencer ignores this
+ assign o_filter_reader_valid = 1'b0;
+ // Indicate filter reader is ready to receive configs,
+ // so that config network fifo pops filter reader configs out.
+ // We don't really use them because we don't have any filter_reader
+ assign o_config_filter_reader_ready = 1'b1;
+ end
+
+
+ //////////////////////
+ // Feature reader //
+ //////////////////////
+
+ dla_dma_reader #(
+ .READER_WRITER_SEL (FEATURE_READER_ID),
+ .IS_CONFIG_READER (0),
+ .DO_LAYOUT_TRANSFORM (LT_ARCH.ENABLE_LT & ~ENABLE_INPUT_STREAMING),
+ .NUM_DIMENSIONS (FEATURE_READER_NUM_DIMENSIONS),
+ .CONFIG_DATA_BYTES (CONFIG_DATA_BYTES),
+ .READER_DATA_BYTES (FEATURE_READER_DATA_BYTES),
+ .DDR_ADDR_WIDTH (DDR_ADDR_WIDTH),
+ .DDR_DATA_BYTES (DDR_DATA_BYTES),
+ .DDR_BURST_WIDTH (DDR_BURST_WIDTH),
+ .LT_ARCH (LT_ARCH)
+ )
+ feature_reader
+ (
+ .clk_ddr (clk_ddr),
+ .clk_dla (clk_dla),
+ .i_sclrn_ddr (ddr_sclrn),
+ .i_resetn_async (i_resetn_async),
+ .i_config_valid (i_config_feature_reader_valid),
+ .i_config_data (i_config_feature_reader_data),
+ .i_config_for_intercept (1'b0), //since this is not the config reader, all config data goes to the address generator
+ .o_config_ready (o_config_feature_reader_ready),
+ .i_config_lt_valid (i_config_lt_reader_valid),
+ .i_config_lt_data (i_config_lt_reader_data),
+ .o_config_lt_ready (o_config_lt_reader_ready),
+ .i_token_can_start (token_done_reader),
+ .o_reader_valid (o_feature_reader_valid),
+ .o_reader_data (o_feature_reader_data),
+ .i_reader_ready (i_feature_reader_ready),
+ .o_ddr_arvalid (lsu_ddr_arvalid[FEATURE_READER_ID]),
+ .o_ddr_araddr (lsu_ddr_araddr [FEATURE_READER_ID]),
+ .o_ddr_arlen (lsu_ddr_arlen [FEATURE_READER_ID]),
+ .i_ddr_arready (lsu_ddr_arready[FEATURE_READER_ID]),
+ .i_ddr_rvalid (lsu_ddr_rvalid [FEATURE_READER_ID]),
+ .i_ddr_rdata (lsu_ddr_rdata [FEATURE_READER_ID]),
+ .o_ddr_rready (lsu_ddr_rready [FEATURE_READER_ID]),
+ .o_param_error (lt_param_error)
+ );
+
+
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Arbitrate read requests and steer read data for all DLA DMA readers //
+ ///////////////////////////////////////////////////////////////////////////
+
+ //note there is another DDR arbiter between PCIe and DLA that lives outside of DLA
+
+ dla_dma_read_arb #(
+ .NUM_PORTS (NUM_READERS),
+ .DDR_ADDR_WIDTH (DDR_ADDR_WIDTH),
+ .DDR_BURST_WIDTH (DDR_BURST_WIDTH),
+ .DDR_DATA_BYTES (DDR_DATA_BYTES)
+ )
+ read_arb
+ (
+ .clk (clk_ddr),
+ .i_sclrn (ddr_sclrn),
+
+ // Read requests from config, filter, and feature readers
+ .i_lsu_arvalid (lsu_ddr_arvalid),
+ .i_lsu_araddr (lsu_ddr_araddr),
+ .i_lsu_arlen (lsu_ddr_arlen),
+ .o_lsu_arready (lsu_ddr_arready),
+
+ // Read address to the external world; tagged with an arid
+ .o_ddr_arvalid (rawp_ddr_arvalid),
+ .o_ddr_araddr (o_ddr_araddr),
+ .o_ddr_arlen (raw_ddr_arlen),
+ .o_ddr_arid (o_ddr_arid),
+ .i_ddr_arready (rawf_ddr_arready),
+
+ // Read data from external world
+ .i_ddr_rvalid (i_ddr_rvalid),
+ .i_ddr_rdata (i_ddr_rdata),
+ .i_ddr_rid (i_ddr_rid),
+ .o_ddr_rready (o_ddr_rready),
+
+ // Read data config, filter, and feature readers
+ .o_lsu_rvalid (lsu_ddr_rvalid),
+ .o_lsu_rdata (lsu_ddr_rdata),
+ .i_lsu_rready (lsu_ddr_rready)
+ );
+
+ // Prioritize writes over reads; believed to improve performance.
+ //
+ // We do a couple things here:
+ // 1) If the prev cycle was an unacknowledged read (arvalid HIGH *and* arready LOW), then
+ // we must keep the read request outstanding. In this case, block any writes (by
+ // forcing awvalid, wvalid, awready, and wready to LOW).
+ // 2) Otherwise, if there is a write request, then block a read request if a new request
+ // has been made (do this by forcing arready and arvalid to LOW). (Note that we
+ // do not touch rready or rvalid, since if data happens to arrive, then there is
+ // no reason not to accept it if we can).
+ // 3) Lastly, allow a read request through (if any).
+ //
+ // rawp_ -- these are the internal signals prior to our forcing logic.
+ // rawf_ -- these are the internal signals after our forcing logic.
+ //
+ // Where the forcing logic drives directly to the external world, we simply use the
+ // external signal name.
+ //
+
+ // 1: Block writes, if necessary.
+ //
+ // If the previous cycle was a read (whether an acknowledged read or an unacknowledged read),
+ // then neither o_ddr_awvalid nor o_ddr_wvalid were HIGH.
+ // Therefore it is okay to keep them forced to LOW, regardless of rawp_ddr_awvalid/rawp_ddr_wvalid.
+ assign o_ddr_awvalid = rawp_ddr_awvalid & ~dma_prevcycle_read_not_acknowledged;
+ assign o_ddr_wvalid = rawp_ddr_wvalid & ~dma_prevcycle_read_not_acknowledged;
+ assign rawf_ddr_awready = i_ddr_awready & ~dma_prevcycle_read_not_acknowledged;
+ assign rawf_ddr_wready = i_ddr_wready & ~dma_prevcycle_read_not_acknowledged;
+
+ // 2: Is a write going to over-ride today's read request?
+ //
+ // Note that if dma_prevcycle_read_not_acknowledged is HIGH, then both o_ddr_awvalid
+ // and o_ddr_wvalid will already be forced LOW by the logic above, so
+ // "write_overrides_read" can never happen if the previous cycle was an unacknowledged
+ // read.
+ assign write_overrides_read = o_ddr_awvalid | o_ddr_wvalid;
+
+ assign o_ddr_arvalid = rawp_ddr_arvalid & ~write_overrides_read;
+ assign rawf_ddr_arready = i_ddr_arready & ~write_overrides_read;
+
+ //axi spec requires a signal width of 8 for burst length
+ assign o_ddr_arlen = { {(AXI_BURST_LENGTH_WIDTH-DDR_BURST_WIDTH){1'h0}}, raw_ddr_arlen };
+
+ //tie off constant axi signals
+ assign o_ddr_arsize = $clog2(DDR_DATA_BYTES); //burst size is always maximal, e.g. all bytes within a word should be transferred
+ assign o_ddr_arburst = 2'h1; //burst type is incrementing, this value comes from the axi spec
+
+ // If the previous cycle was an unacknowledged read, then we must continue to assert arvalid
+ // until it is acknowledged by arready. We are not allowed to randomly deassert arvalid. Use
+ // dma_prevcycle_read_not_acknowledged to track this condition.
+ always_ff @(posedge clk_ddr) begin
+ dma_prevcycle_read_not_acknowledged <= o_ddr_arvalid & ~i_ddr_arready;
+
+ if (~ddr_sclrn) begin
+ // If we are in reset, then o_ddr_arvalid will be LOW since the read_arb
+ // is in reset as well (forcing rawp_ddr_arvalid to LOW). The read_arb
+ // shares our reset, namely ddr_sclrn. This means that we can safely
+ // reset to ~dma_prevcycle_read_not_acknowledged.
+ //
+ // We can make no assumption about i_ddr_arready while in reset.
+ dma_prevcycle_read_not_acknowledged <= 1'b0;
+ end
+ end
+
+ //////////////////////
+ // Feature writer //
+ //////////////////////
+
+ dla_dma_writer #(
+ .READER_WRITER_SEL (FEATURE_WRITER_ID),
+ .NUM_DIMENSIONS (FEATURE_WRITER_NUM_DIMENSIONS),
+ .CONFIG_DATA_BYTES (CONFIG_DATA_BYTES),
+ .WRITER_DATA_BYTES (FEATURE_WRITER_DATA_BYTES),
+ .DDR_ADDR_WIDTH (DDR_ADDR_WIDTH),
+ .DDR_DATA_BYTES (DDR_DATA_BYTES),
+ .DDR_BURST_WIDTH (DDR_BURST_WIDTH),
+ .DEVICE (DEVICE)
+ )
+ feature_writer
+ (
+ .clk_ddr (clk_ddr),
+ .i_sclrn_ddr (ddr_sclrn),
+ .i_resetn_async (i_resetn_async),
+ .i_config_valid (i_config_feature_writer_valid),
+ .i_config_data (i_config_feature_writer_data),
+ .o_config_ready (o_config_feature_writer_ready),
+ .o_token_done_csr (token_done_csr),
+ .o_token_done_reader (token_done_reader),
+ .o_license_flag (license_flag),
+ .o_writer_err (writer_error),
+ .i_writer_valid (i_feature_writer_valid),
+ .i_writer_data (i_feature_writer_data),
+ .o_writer_ready (o_feature_writer_ready),
+ .o_ddr_awvalid (rawp_ddr_awvalid),
+ .o_ddr_awaddr (o_ddr_awaddr),
+ .o_ddr_awlen (raw_ddr_awlen),
+ .i_ddr_awready (rawf_ddr_awready),
+ .o_ddr_wvalid (rawp_ddr_wvalid),
+ .o_ddr_wdata (o_ddr_wdata),
+ .o_ddr_wstrb (o_ddr_wstrb),
+ .o_ddr_wlast (o_ddr_wlast),
+ .i_ddr_wready (rawf_ddr_wready),
+ .i_ddr_bvalid (i_ddr_bvalid),
+ .o_ddr_bready (o_ddr_bready)
+ );
+
+ //axi spec requires a signal width of 8 for burst length
+ assign o_ddr_awlen = { {(AXI_BURST_LENGTH_WIDTH-DDR_BURST_WIDTH){1'h0}}, raw_ddr_awlen };
+
+ //tie off constant axi signals, use the same settings as read address channel
+ assign o_ddr_awsize = o_ddr_arsize;
+ assign o_ddr_awburst = o_ddr_arburst;
+
+endmodule