diff options
| author | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
|---|---|---|
| committer | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
| commit | ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch) | |
| tree | a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv | |
| parent | 40da1752f2c8639186b72f6838aa415e854d0b1d (diff) | |
| download | thesis-master.tar.gz thesis-master.tar.bz2 thesis-master.zip | |
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv')
| -rw-r--r-- | python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv new file mode 100644 index 0000000..2f77c53 --- /dev/null +++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_input_streamer.sv @@ -0,0 +1,218 @@ +// Copyright 2024 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/** + * dla_input_streamer.sv + * + * FPGA AI Suite input streaming is handled here. This provides an AXI interface to the top-level entity, + * and is responsible for clock-crossing from AXI to DLA clock domains. This module is also responsible for + * applying the layout transform to incoming data. + * + * For now, enabling streaming implies enabling the hardware layout transform module. + * The layout transform assumes input tensors in HWC format; and the input bus width is arbitrary, width + * conversion is done in the layout transform where the output is always CVEC*sizeof(fp16). + * + * Flow control is handled by the configuration of the stream-buffer writer. Backpressuring from the SB + * writer will be propagated to this AXI connection to avoid the need to configure the transfer frame sizes + * in this module. + * + */ + +/** +TODO (arooney): + - Consider behaviour when a frame is done, it gets consumed by the SB, and the + LT can accept a few frames before backpressuring. But then the producer is a few packets into + the transmission. Maybe its best to only accept data when the layout transform is done AND the + SB is ready. + - Implement strobe signal handling. + - Remove unused AXI signals. +*/ + +`resetall +`undefineall +`default_nettype none + +module dla_input_streamer +#( + parameter int TDATA_WIDTH, + parameter int FIFO_DEPTH, + parameter int TID_WIDTH, + parameter int TDEST_WIDTH, + parameter int TUSER_WIDTH, + parameter dla_lt_pkg::lt_arch_t LT_ARCH, + parameter int OUTPUT_WIDTH +) ( + input wire clk_dla, + input wire clk_ddr, + // AMBA AXI-Stream signals + input wire clk_axi, + input wire i_resetn_async, + input wire [LT_ARCH.CONFIG_BYTES*8-1:0] i_config_data, + input wire i_config_valid, + output logic o_config_ready, + input wire i_streaming_enable, + input wire i_tvalid, // indicates the transmitter is driving a valid transfer + output logic o_tready, // indicates that the receiver can accept a transfer + input wire [TDATA_WIDTH-1:0] i_tdata, // the primary payload of the interface + input wire [TDATA_WIDTH/8-1:0] i_tstrb, // (NOT USED) byte qualifier indicating whether the + // associated byte in tdata should be processed + // as a data, or position byte + input wire [TDATA_WIDTH/8-1:0] i_tkeep, // (NOT USED) byte qualifier indicating whether the + // contents of tdata is processed as part of the data stream + input wire i_tlast, // (NOT USED) indicates the boundary of a packet + input wire [TID_WIDTH-1:0] i_tid, // (NOT USED) a data stream identifier + input wire [TDEST_WIDTH-1:0] i_tdest, // (NOT USED) provides routing information for the data stream + input wire [TUSER_WIDTH-1:0] i_tuser, // (NOT USED) user-defined sideband information + input wire i_twakeup, // (AXI5-S ONLY, NOT USED) identifies any activity associated with the AXI-s interface + // output + output logic [OUTPUT_WIDTH-1:0] o_istream_data, + output logic o_istream_valid, + input wire i_istream_ready, // from input feeder + + output logic o_reading_first_word, // for CSR active-jobs counter + output logic o_param_error +); + + logic resetn; + logic resetn_clk_dla; + logic reader_empty; + logic [OUTPUT_WIDTH-1:0] dcfifo_data; + logic dcfifo_valid, dcfifo_stall; + logic lt_done; + logic ready_input_state; // state-based input ready signal that accounts for inter-frame back-pressure + logic lt_ready; // ready signal from layout transform, accounts for intra-frame back-pressure + logic axi_param_error; + + //reset parameterization + localparam int RESET_USE_SYNCHRONIZER = 1; + localparam int RESET_PIPE_DEPTH = 3; + localparam int RESET_NUM_COPIES = 1; + dla_reset_handler_simple #( + .USE_SYNCHRONIZER (RESET_USE_SYNCHRONIZER), + .PIPE_DEPTH (RESET_PIPE_DEPTH), + .NUM_COPIES (RESET_NUM_COPIES) + ) + istream_reset_synchronizer + ( + .clk (clk_axi), + .i_resetn (i_resetn_async), + .o_sclrn (resetn) + ); + dla_reset_handler_simple #( + .USE_SYNCHRONIZER (RESET_USE_SYNCHRONIZER), + .PIPE_DEPTH (RESET_PIPE_DEPTH), + .NUM_COPIES (RESET_NUM_COPIES) + ) + istream_clk_dla_reset_synchronizer + ( + .clk (clk_dla), + .i_resetn (i_resetn_async), + .o_sclrn (resetn_clk_dla) + ); + + dla_streamer_fsm streamer_fsm ( + .clk_dla (clk_dla), + .clk_axi (clk_axi), + .i_resetn_axi (resetn), + .i_resetn_async (i_resetn_async), + .i_dla_ready (i_istream_ready), + .i_lt_ready (lt_ready), + .i_streaming_enable (i_streaming_enable), + .i_lt_done_frame (lt_done), + .i_tvalid (i_tvalid), + .o_stream_ready (ready_input_state), + .o_reading_first_word (o_reading_first_word) + ); + + // accept new data when LT and input feeder are both ready. This should translate to + // only accepting data when we're prepared to accept a whole image (as opposed to accepting + // a couple transfers until LT is full, then waiting for previous inference, then accepting the rest, + // since this would probably compilate frame dropping). + dla_layout_transform #( + .CNT_BITS(20), + .DDR_BYTES(TDATA_WIDTH/8), + .CONFIG_DATA_BYTES(LT_ARCH.CONFIG_BYTES), + .DATA_ELEMENT_WIDTH(LT_ARCH.DATA_ELEMENT_WIDTH), + .MAX_CHANNELS(LT_ARCH.MAX_CHANNELS), + .MAX_FEATURE_HEIGHT(LT_ARCH.MAX_FEATURE_HEIGHT), + .MAX_FEATURE_WIDTH(LT_ARCH.MAX_FEATURE_WIDTH), + .MAX_FEATURE_DEPTH(LT_ARCH.MAX_FEATURE_DEPTH), + .MAX_STRIDE_HEIGHT(LT_ARCH.MAX_STRIDE_HEIGHT), + .MAX_STRIDE_WIDTH(LT_ARCH.MAX_STRIDE_WIDTH), + .MAX_STRIDE_DEPTH(LT_ARCH.MAX_STRIDE_DEPTH), + .CVEC(LT_ARCH.CVEC), + .MAX_PAD_FRONT(LT_ARCH.MAX_PAD_FRONT), + .MAX_PAD_LEFT(LT_ARCH.MAX_PAD_LEFT), + .MAX_PAD_TOP(LT_ARCH.MAX_PAD_TOP), + .MAX_FILTER_WIDTH(LT_ARCH.MAX_FILTER_WIDTH), + .MAX_FILTER_HEIGHT(LT_ARCH.MAX_FILTER_HEIGHT), + .MAX_FILTER_DEPTH(LT_ARCH.MAX_FILTER_DEPTH), + .MAX_DILATION_WIDTH(LT_ARCH.MAX_DILATION_WIDTH), + .MAX_DILATION_HEIGHT(LT_ARCH.MAX_DILATION_HEIGHT), + .MAX_DILATION_DEPTH(LT_ARCH.MAX_DILATION_DEPTH), + .DO_U8_CONV(LT_ARCH.DO_U8_CONV), + .DEVICE(LT_ARCH.DEVICE) + ) reader_layout_transform ( + .clk(clk_axi), + .i_rstn(resetn), + .i_config_data(i_config_data), + .i_config_valid(i_config_valid), + .o_config_ready(o_config_ready), + .i_data(i_tdata), + .i_valid(i_tvalid & ready_input_state), + .o_ready(lt_ready), + .o_data(dcfifo_data), + .o_valid(dcfifo_valid), + .i_stall(dcfifo_stall), + .o_last(lt_done), + .o_param_error(axi_param_error) + ); + + localparam int DCFIFO_ALMOST_FULL_CUTOFF = 0; + dla_acl_dcfifo #( + .WIDTH (OUTPUT_WIDTH), + .DEPTH (FIFO_DEPTH), + .ALMOST_FULL_CUTOFF (DCFIFO_ALMOST_FULL_CUTOFF) + ) + clock_crosser + ( + .async_resetn (i_resetn_async), //reset synchronization is handled internally + + //write side + .wr_clock (clk_axi), + .wr_req (dcfifo_valid), + .wr_data (dcfifo_data), + .wr_almost_full (dcfifo_stall), + + //read side + .rd_clock (clk_dla), + .rd_empty (reader_empty), + .rd_data (o_istream_data), + .rd_ack (i_istream_ready) + ); + + dla_clock_cross_full_sync cc_param_error ( + .clk_src(clk_axi), + .i_src_async_resetn(1'b1), + .i_src_data(axi_param_error), + .o_src_data(), + + .clk_dst(clk_ddr), + .i_dst_async_resetn(1'b1), + .o_dst_data(o_param_error) + ); + + assign o_istream_valid = ~reader_empty; + assign o_tready = lt_ready & ready_input_state; + +endmodule |
