summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv')
-rw-r--r--python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv218
1 files changed, 218 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv
new file mode 100644
index 0000000..2f77c53
--- /dev/null
+++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv
@@ -0,0 +1,218 @@
+// Copyright 2024 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/**
+ * dla_input_streamer.sv
+ *
+ * FPGA AI Suite input streaming is handled here. This provides an AXI interface to the top-level entity,
+ * and is responsible for clock-crossing from AXI to DLA clock domains. This module is also responsible for
+ * applying the layout transform to incoming data.
+ *
+ * For now, enabling streaming implies enabling the hardware layout transform module.
+ * The layout transform assumes input tensors in HWC format; and the input bus width is arbitrary, width
+ * conversion is done in the layout transform where the output is always CVEC*sizeof(fp16).
+ *
+ * Flow control is handled by the configuration of the stream-buffer writer. Backpressuring from the SB
+ * writer will be propagated to this AXI connection to avoid the need to configure the transfer frame sizes
+ * in this module.
+ *
+ */
+
+/**
+TODO (arooney):
+ - Consider behaviour when a frame is done, it gets consumed by the SB, and the
+ LT can accept a few frames before backpressuring. But then the producer is a few packets into
+ the transmission. Maybe its best to only accept data when the layout transform is done AND the
+ SB is ready.
+ - Implement strobe signal handling.
+ - Remove unused AXI signals.
+*/
+
+`resetall
+`undefineall
+`default_nettype none
+
+module dla_input_streamer
+#(
+ parameter int TDATA_WIDTH,
+ parameter int FIFO_DEPTH,
+ parameter int TID_WIDTH,
+ parameter int TDEST_WIDTH,
+ parameter int TUSER_WIDTH,
+ parameter dla_lt_pkg::lt_arch_t LT_ARCH,
+ parameter int OUTPUT_WIDTH
+) (
+ input wire clk_dla,
+ input wire clk_ddr,
+ // AMBA AXI-Stream signals
+ input wire clk_axi,
+ input wire i_resetn_async,
+ input wire [LT_ARCH.CONFIG_BYTES*8-1:0] i_config_data,
+ input wire i_config_valid,
+ output logic o_config_ready,
+ input wire i_streaming_enable,
+ input wire i_tvalid, // indicates the transmitter is driving a valid transfer
+ output logic o_tready, // indicates that the receiver can accept a transfer
+ input wire [TDATA_WIDTH-1:0] i_tdata, // the primary payload of the interface
+ input wire [TDATA_WIDTH/8-1:0] i_tstrb, // (NOT USED) byte qualifier indicating whether the
+ // associated byte in tdata should be processed
+ // as a data, or position byte
+ input wire [TDATA_WIDTH/8-1:0] i_tkeep, // (NOT USED) byte qualifier indicating whether the
+ // contents of tdata is processed as part of the data stream
+ input wire i_tlast, // (NOT USED) indicates the boundary of a packet
+ input wire [TID_WIDTH-1:0] i_tid, // (NOT USED) a data stream identifier
+ input wire [TDEST_WIDTH-1:0] i_tdest, // (NOT USED) provides routing information for the data stream
+ input wire [TUSER_WIDTH-1:0] i_tuser, // (NOT USED) user-defined sideband information
+ input wire i_twakeup, // (AXI5-S ONLY, NOT USED) identifies any activity associated with the AXI-s interface
+ // output
+ output logic [OUTPUT_WIDTH-1:0] o_istream_data,
+ output logic o_istream_valid,
+ input wire i_istream_ready, // from input feeder
+
+ output logic o_reading_first_word, // for CSR active-jobs counter
+ output logic o_param_error
+);
+
+ logic resetn;
+ logic resetn_clk_dla;
+ logic reader_empty;
+ logic [OUTPUT_WIDTH-1:0] dcfifo_data;
+ logic dcfifo_valid, dcfifo_stall;
+ logic lt_done;
+ logic ready_input_state; // state-based input ready signal that accounts for inter-frame back-pressure
+ logic lt_ready; // ready signal from layout transform, accounts for intra-frame back-pressure
+ logic axi_param_error;
+
+ //reset parameterization
+ localparam int RESET_USE_SYNCHRONIZER = 1;
+ localparam int RESET_PIPE_DEPTH = 3;
+ localparam int RESET_NUM_COPIES = 1;
+ dla_reset_handler_simple #(
+ .USE_SYNCHRONIZER (RESET_USE_SYNCHRONIZER),
+ .PIPE_DEPTH (RESET_PIPE_DEPTH),
+ .NUM_COPIES (RESET_NUM_COPIES)
+ )
+ istream_reset_synchronizer
+ (
+ .clk (clk_axi),
+ .i_resetn (i_resetn_async),
+ .o_sclrn (resetn)
+ );
+ dla_reset_handler_simple #(
+ .USE_SYNCHRONIZER (RESET_USE_SYNCHRONIZER),
+ .PIPE_DEPTH (RESET_PIPE_DEPTH),
+ .NUM_COPIES (RESET_NUM_COPIES)
+ )
+ istream_clk_dla_reset_synchronizer
+ (
+ .clk (clk_dla),
+ .i_resetn (i_resetn_async),
+ .o_sclrn (resetn_clk_dla)
+ );
+
+ dla_streamer_fsm streamer_fsm (
+ .clk_dla (clk_dla),
+ .clk_axi (clk_axi),
+ .i_resetn_axi (resetn),
+ .i_resetn_async (i_resetn_async),
+ .i_dla_ready (i_istream_ready),
+ .i_lt_ready (lt_ready),
+ .i_streaming_enable (i_streaming_enable),
+ .i_lt_done_frame (lt_done),
+ .i_tvalid (i_tvalid),
+ .o_stream_ready (ready_input_state),
+ .o_reading_first_word (o_reading_first_word)
+ );
+
+ // accept new data when LT and input feeder are both ready. This should translate to
+ // only accepting data when we're prepared to accept a whole image (as opposed to accepting
+ // a couple transfers until LT is full, then waiting for previous inference, then accepting the rest,
+ // since this would probably compilate frame dropping).
+ dla_layout_transform #(
+ .CNT_BITS(20),
+ .DDR_BYTES(TDATA_WIDTH/8),
+ .CONFIG_DATA_BYTES(LT_ARCH.CONFIG_BYTES),
+ .DATA_ELEMENT_WIDTH(LT_ARCH.DATA_ELEMENT_WIDTH),
+ .MAX_CHANNELS(LT_ARCH.MAX_CHANNELS),
+ .MAX_FEATURE_HEIGHT(LT_ARCH.MAX_FEATURE_HEIGHT),
+ .MAX_FEATURE_WIDTH(LT_ARCH.MAX_FEATURE_WIDTH),
+ .MAX_FEATURE_DEPTH(LT_ARCH.MAX_FEATURE_DEPTH),
+ .MAX_STRIDE_HEIGHT(LT_ARCH.MAX_STRIDE_HEIGHT),
+ .MAX_STRIDE_WIDTH(LT_ARCH.MAX_STRIDE_WIDTH),
+ .MAX_STRIDE_DEPTH(LT_ARCH.MAX_STRIDE_DEPTH),
+ .CVEC(LT_ARCH.CVEC),
+ .MAX_PAD_FRONT(LT_ARCH.MAX_PAD_FRONT),
+ .MAX_PAD_LEFT(LT_ARCH.MAX_PAD_LEFT),
+ .MAX_PAD_TOP(LT_ARCH.MAX_PAD_TOP),
+ .MAX_FILTER_WIDTH(LT_ARCH.MAX_FILTER_WIDTH),
+ .MAX_FILTER_HEIGHT(LT_ARCH.MAX_FILTER_HEIGHT),
+ .MAX_FILTER_DEPTH(LT_ARCH.MAX_FILTER_DEPTH),
+ .MAX_DILATION_WIDTH(LT_ARCH.MAX_DILATION_WIDTH),
+ .MAX_DILATION_HEIGHT(LT_ARCH.MAX_DILATION_HEIGHT),
+ .MAX_DILATION_DEPTH(LT_ARCH.MAX_DILATION_DEPTH),
+ .DO_U8_CONV(LT_ARCH.DO_U8_CONV),
+ .DEVICE(LT_ARCH.DEVICE)
+ ) reader_layout_transform (
+ .clk(clk_axi),
+ .i_rstn(resetn),
+ .i_config_data(i_config_data),
+ .i_config_valid(i_config_valid),
+ .o_config_ready(o_config_ready),
+ .i_data(i_tdata),
+ .i_valid(i_tvalid & ready_input_state),
+ .o_ready(lt_ready),
+ .o_data(dcfifo_data),
+ .o_valid(dcfifo_valid),
+ .i_stall(dcfifo_stall),
+ .o_last(lt_done),
+ .o_param_error(axi_param_error)
+ );
+
+ localparam int DCFIFO_ALMOST_FULL_CUTOFF = 0;
+ dla_acl_dcfifo #(
+ .WIDTH (OUTPUT_WIDTH),
+ .DEPTH (FIFO_DEPTH),
+ .ALMOST_FULL_CUTOFF (DCFIFO_ALMOST_FULL_CUTOFF)
+ )
+ clock_crosser
+ (
+ .async_resetn (i_resetn_async), //reset synchronization is handled internally
+
+ //write side
+ .wr_clock (clk_axi),
+ .wr_req (dcfifo_valid),
+ .wr_data (dcfifo_data),
+ .wr_almost_full (dcfifo_stall),
+
+ //read side
+ .rd_clock (clk_dla),
+ .rd_empty (reader_empty),
+ .rd_data (o_istream_data),
+ .rd_ack (i_istream_ready)
+ );
+
+ dla_clock_cross_full_sync cc_param_error (
+ .clk_src(clk_axi),
+ .i_src_async_resetn(1'b1),
+ .i_src_data(axi_param_error),
+ .o_src_data(),
+
+ .clk_dst(clk_ddr),
+ .i_dst_async_resetn(1'b1),
+ .o_dst_data(o_param_error)
+ );
+
+ assign o_istream_valid = ~reader_empty;
+ assign o_tready = lt_ready & ready_input_state;
+
+endmodule