summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_burst_splitter.sv
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_burst_splitter.sv')
-rw-r--r--python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_burst_splitter.sv242
1 files changed, 242 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_burst_splitter.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_burst_splitter.sv
new file mode 100644
index 0000000..903424d
--- /dev/null
+++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_burst_splitter.sv
@@ -0,0 +1,242 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+// AvalonMM Burst Splitter
+//
+// This module splits AvalonMM bursts. One can configure it to split only write bursts, split only read bursts, or split both. It is legal to configure this module to not
+// split any bursts, Quaurtus should sweep away all the unused logic and this module should become passthrough wires.
+//
+// The interfaces are only the command portion of the AvalonMM interface, it is assumed that the timing of the response path is independent of the command. This is certainly
+// the case when the response path includes readdatavalid, as the read response can happen an arbitraty number of clock cycles later than when the read request was accepted.
+//
+// This module has no capacity, it splits bursts on-the-fly. There is no change in control flow when splitting write bursts since a write burst of length N already takes N
+// clock cycles to transfer (it contains N words of data). Splitting a write burst basically involves calculating the address for the words inside the burst. Conversely, if
+// splitting a read burst, one read request of length N (which can be transferred in 1 clock cycle) will result in N read requests of length 1, and those N read requests
+// need N clock cycles to be transferred. Therefore the burst splitter must stall the upstream interface while these read requests are provided to the downstream interface.
+//
+// If this module is configured to split read bursts, there must be a zero-cycle handshake with the upstream interface. If only splitting write bursts, then one may
+// optionally use stall latency handshaking with upstream. Any style of handshaking (stall/valid or stall latency) can be used with the downstream interface, if using stall
+// latency then downstream should provide an almost full signal as backpressure.
+//
+// By default, the adder used to calculate the address for words inside a burst is the full width of the address. To improve fmax, we can reduce the adder width, however it
+// has to be known that bursts will not cross some boundary. For example, if it is known that bursts cannot cross a 4096 byte boundary, then adder only needs to span the lower
+// 12 bits of the address. Shortening a long carry chain helps to improve fmax, and saves area.
+//
+// Required files:
+// - dla_acl_burst_splitter.sv
+// - dla_acl_reset_handler.sv
+// - dla_acl_parameter_assert.svh
+
+`default_nettype none
+`include "dla_acl_parameter_assert.svh"
+
+module dla_acl_burst_splitter #(
+ //signal width -- all must be at least 1
+ parameter int unsigned ADDRESS_WIDTH, // byte address must be word aligned, e.g. if BYTEENABLE_WIDTH = 4, then address must be 4-byte aligned, bottom 2 bits must be 0
+ parameter int unsigned BURSTCOUNT_WIDTH,
+ parameter int unsigned BYTEENABLE_WIDTH, // must be a power of 2, specifies word size
+
+ //burst splitting configuration
+ parameter bit SPLIT_WRITE_BURSTS = 1, // 0 means leave writes bursts untouched, 1 means split write bursts
+ parameter bit SPLIT_READ_BURSTS = 1, // likewise for read bursts
+
+ //special configuration
+ parameter int unsigned BURST_BOUNDARY = 0, // set to nonzero to specify what address size a burst will not cross, e.g. 12 means bursts cannot cross a 4K boundary
+ parameter bit USE_STALL_LATENCY = 0, // for write burst splitting only where there is no change in control flow (address inside the burst is computed on-the-fly),
+ // 0 means stall/valid (up_write means we MAY accept it), 1 means stall/latency (up_write means we MUST accept it)
+ //reset configuration
+ parameter bit ASYNC_RESET = 0, // how do we use reset: 1 means registers are reset asynchronously, 0 means registers are reset synchronously
+ parameter bit SYNCHRONIZE_RESET = 1, // based on how reset gets to us, what do we need to do: 1 means synchronize reset before consumption (if reset arrives asynchronously), 0 means passthrough (managed externally)
+ parameter bit BACKPRESSURE_DURING_RESET = 1,// determine whether up_waitrequest will backpressure during reset, safer to do so but adds combinational logic
+
+ //derived parameters
+ localparam int unsigned DATA_WIDTH = 8*BYTEENABLE_WIDTH,
+ localparam int unsigned ADDRESS_BITS_PER_WORD = $clog2(BYTEENABLE_WIDTH) // how many lower bits of the byte address are stuck at 0 to ensure it is word aligned
+) (
+ input wire clock,
+ input wire resetn,
+
+ //upstream interface - avalon slave
+ output logic up_waitrequest,
+ input wire up_read,
+ input wire up_write,
+ input wire [ADDRESS_WIDTH-1:0] up_address,
+ input wire [DATA_WIDTH-1:0] up_writedata,
+ input wire [BYTEENABLE_WIDTH-1:0] up_byteenable,
+ input wire [BURSTCOUNT_WIDTH-1:0] up_burstcount,
+
+ //downstream interface - avalon master
+ input wire down_waitrequest,
+ output logic down_read,
+ output logic down_write,
+ output logic [ADDRESS_WIDTH-1:0] down_address,
+ output logic [DATA_WIDTH-1:0] down_writedata,
+ output logic [BYTEENABLE_WIDTH-1:0] down_byteenable,
+ output logic [BURSTCOUNT_WIDTH-1:0] down_burstcount
+);
+
+
+
+ //////////////////////////////////////
+ // //
+ // Sanity check on the parameters //
+ // //
+ //////////////////////////////////////
+
+ generate
+ `DLA_ACL_PARAMETER_ASSERT(ADDRESS_WIDTH >= 1)
+ `DLA_ACL_PARAMETER_ASSERT(BURSTCOUNT_WIDTH >= 1)
+ `DLA_ACL_PARAMETER_ASSERT(BYTEENABLE_WIDTH >= 1)
+ `DLA_ACL_PARAMETER_ASSERT(BYTEENABLE_WIDTH == 2**ADDRESS_BITS_PER_WORD)
+ `DLA_ACL_PARAMETER_ASSERT(BURST_BOUNDARY < ADDRESS_WIDTH)
+ `DLA_ACL_PARAMETER_ASSERT(BURST_BOUNDARY == 0 || BURST_BOUNDARY >= ADDRESS_BITS_PER_WORD)
+ `DLA_ACL_PARAMETER_ASSERT(USE_STALL_LATENCY == 0 || SPLIT_READ_BURSTS == 0)
+ endgenerate
+
+
+
+ /////////////
+ // //
+ // Reset //
+ // //
+ /////////////
+
+ logic aclrn, sclrn;
+ dla_acl_reset_handler
+ #(
+ .ASYNC_RESET (ASYNC_RESET),
+ .USE_SYNCHRONIZER (SYNCHRONIZE_RESET),
+ .SYNCHRONIZE_ACLRN (SYNCHRONIZE_RESET),
+ .PIPE_DEPTH (2),
+ .NUM_COPIES (1)
+ )
+ dla_acl_reset_handler_inst
+ (
+ .clk (clock),
+ .i_resetn (resetn),
+ .o_aclrn (aclrn),
+ .o_resetn_synchronized (),
+ .o_sclrn (sclrn)
+ );
+
+
+
+ //////////////////////
+ // //
+ // Burst splitter //
+ // //
+ //////////////////////
+
+ logic inside_read_burst; //inside a read burst
+ logic inside_burst; //inside some burst -- we are inside a write burst if inside_burst & ~inside_read_burst
+ logic [ADDRESS_WIDTH-1:0] internal_address; //address for words inside a burst
+ logic [BURSTCOUNT_WIDTH-1:0] internal_burstcount; //keep track of how many remaining words are in a burst
+ logic internal_burstcount_eq_two; //register the check for internal_burstcount == 2 by looking at how we get into that condition
+ logic backpressure_during_reset; //helper signal which sets up_waitrequest = 1 during reset under various reset configurations
+ logic backpressure_during_read_burst; //stall upstream while we split read bursts
+ logic [ADDRESS_WIDTH-1:0] up_address_plus_byteenable_width; //manually split the bits of the adder in the case where the bursts are known to not cross some boundary
+ logic [ADDRESS_WIDTH-1:0] internal_address_plus_byteenable_width; //same idea as above
+ logic down_burstcount_mask; //under which conditions should we override down_burstcount to 1
+ logic [ADDRESS_WIDTH-1:0] down_address_raw; //before outputting down_address, set the bottom ADDRESS_BITS_PER_WORD bits to 0, Quartus will prune any logic that drove these bits
+
+ generate
+ if (BURST_BOUNDARY) begin : GEN_SHORT_ADDRESS_ADDER //burst will not cross a boundary of 2**BURST_BOUNDARY
+ assign up_address_plus_byteenable_width[BURST_BOUNDARY-1:0] = up_address[BURST_BOUNDARY-1:0] + BYTEENABLE_WIDTH; //only the lower address bits within a burst need the adder
+ assign up_address_plus_byteenable_width[ADDRESS_WIDTH-1:BURST_BOUNDARY] = up_address[ADDRESS_WIDTH-1:BURST_BOUNDARY]; //upper bits come directly from the input address
+ assign internal_address_plus_byteenable_width[BURST_BOUNDARY-1:0] = internal_address[BURST_BOUNDARY-1:0] + BYTEENABLE_WIDTH;
+ assign internal_address_plus_byteenable_width[ADDRESS_WIDTH-1:BURST_BOUNDARY] = internal_address[ADDRESS_WIDTH-1:BURST_BOUNDARY]; //reg holds its value
+ end
+ else begin : GEN_FULL_ADDRESS_ADDER
+ assign up_address_plus_byteenable_width = up_address + BYTEENABLE_WIDTH;
+ assign internal_address_plus_byteenable_width = internal_address + BYTEENABLE_WIDTH;
+ end
+ endgenerate
+
+
+ always_ff @(posedge clock or negedge aclrn) begin
+ if (~aclrn) begin
+ inside_read_burst <= 1'b0;
+ inside_burst <= 1'b0;
+ internal_address <= '0;
+ internal_burstcount <= '0;
+ internal_burstcount_eq_two <= 1'b0;
+ end
+ else begin
+ if (~inside_burst) begin
+ internal_address <= up_address_plus_byteenable_width;
+ internal_burstcount <= up_burstcount;
+ internal_burstcount_eq_two <= (up_burstcount == 2);
+
+ //whether or not we enter inside a burst for splitting depends on the burst splitting configuration
+ if (SPLIT_WRITE_BURSTS && SPLIT_READ_BURSTS) begin //split both read and write bursts
+ if ((up_read | up_write) & ~down_waitrequest & (up_burstcount != 1)) inside_burst <= 1'b1;
+ if ( up_read & ~down_waitrequest & (up_burstcount != 1)) inside_read_burst <= 1'b1;
+ end
+ else if (SPLIT_WRITE_BURSTS) begin //split write bursts only
+ if (up_write & (~down_waitrequest | USE_STALL_LATENCY) & (up_burstcount != 1)) inside_burst <= 1'b1;
+ //inside_read_burst will be stuck at 0
+ end
+ else if (SPLIT_READ_BURSTS) begin //split read bursts only
+ if (up_read & ~down_waitrequest & (up_burstcount != 1)) begin
+ inside_burst <= 1'b1;
+ inside_read_burst <= 1'b1;
+ end
+ end
+ //else no burst splitting, inside_burst and inside_read_burst will both be stuck at 0
+ end
+ else begin
+ //note that USE_STALL_LATENCY applies to the upstream interface, but one can only set USE_STALL_LATENCY = 1 when splitting only write bursts
+ //in which case the control flow does not change, i.e. up_waitrequest = down_waitrequest
+ if ((~down_waitrequest | USE_STALL_LATENCY) & (backpressure_during_read_burst | up_write)) begin
+ internal_address <= internal_address_plus_byteenable_width;
+ internal_burstcount <= internal_burstcount - 1;
+ internal_burstcount_eq_two <= (internal_burstcount == 3);
+ if (internal_burstcount_eq_two) begin
+ inside_read_burst <= 1'b0;
+ inside_burst <= 1'b0;
+ end
+ end
+ end
+ if (~sclrn) begin
+ inside_read_burst <= 1'b0;
+ inside_burst <= 1'b0;
+ end
+ end
+ end
+
+ //backpressure
+ assign backpressure_during_reset = (!BACKPRESSURE_DURING_RESET) ? 1'b0 : (~aclrn) ? 1'b1 : (~sclrn) ? 1'b1 : 1'b0;
+ assign backpressure_during_read_burst = (SPLIT_READ_BURSTS) ? inside_read_burst : 1'b0;
+ assign up_waitrequest = down_waitrequest | backpressure_during_read_burst | backpressure_during_reset;
+
+ //write only data path can simply pass through
+ assign down_writedata = up_writedata;
+ assign down_byteenable = up_byteenable;
+
+ //if we are inside a burst, the read ack has already gone to upstream so the next transaction is being presented
+ assign down_read = (backpressure_during_read_burst) ? 1'b1 : up_read;
+ assign down_write = (backpressure_during_read_burst) ? 1'b0 : up_write;
+
+ //original address is used at the beginning of a burst (or if burst was not split), we computed the address inside a burst
+ assign down_address_raw = (inside_burst) ? internal_address : up_address;
+
+ //set lower ADDRESS_BITS_PER_WORD bits to 0, the lower bits will be pruned away from all logic related to address
+ assign down_address = down_address_raw[ADDRESS_WIDTH-1:ADDRESS_BITS_PER_WORD] << ADDRESS_BITS_PER_WORD;
+
+ //under what conditions are we splitting a burst, and therefore down_burstcount should be set to 1
+ assign down_burstcount_mask = (SPLIT_WRITE_BURSTS && SPLIT_READ_BURSTS) ? 1'b1 : (SPLIT_WRITE_BURSTS) ? down_write : (SPLIT_READ_BURSTS) ? down_read : 1'b0;
+ assign down_burstcount = (down_burstcount_mask) ? 1'h1 : up_burstcount;
+
+endmodule
+
+`default_nettype wire