// Copyright 2020 Intel Corporation. // // This software and the related documents are Intel copyrighted materials, // and your use of them is governed by the express license under which they // were provided to you ("License"). Unless the License provides otherwise, // you may not use, modify, copy, publish, distribute, disclose or transmit // this software or the related documents without Intel's prior written // permission. // // This software and the related documents are provided as is, with no express // or implied warranties, other than those that are expressly stated in the // License. // AvalonMM Burst Splitter // // This module splits AvalonMM bursts. One can configure it to split only write bursts, split only read bursts, or split both. It is legal to configure this module to not // split any bursts, Quaurtus should sweep away all the unused logic and this module should become passthrough wires. // // The interfaces are only the command portion of the AvalonMM interface, it is assumed that the timing of the response path is independent of the command. This is certainly // the case when the response path includes readdatavalid, as the read response can happen an arbitraty number of clock cycles later than when the read request was accepted. // // This module has no capacity, it splits bursts on-the-fly. There is no change in control flow when splitting write bursts since a write burst of length N already takes N // clock cycles to transfer (it contains N words of data). Splitting a write burst basically involves calculating the address for the words inside the burst. Conversely, if // splitting a read burst, one read request of length N (which can be transferred in 1 clock cycle) will result in N read requests of length 1, and those N read requests // need N clock cycles to be transferred. Therefore the burst splitter must stall the upstream interface while these read requests are provided to the downstream interface. // // If this module is configured to split read bursts, there must be a zero-cycle handshake with the upstream interface. If only splitting write bursts, then one may // optionally use stall latency handshaking with upstream. Any style of handshaking (stall/valid or stall latency) can be used with the downstream interface, if using stall // latency then downstream should provide an almost full signal as backpressure. // // By default, the adder used to calculate the address for words inside a burst is the full width of the address. To improve fmax, we can reduce the adder width, however it // has to be known that bursts will not cross some boundary. For example, if it is known that bursts cannot cross a 4096 byte boundary, then adder only needs to span the lower // 12 bits of the address. Shortening a long carry chain helps to improve fmax, and saves area. // // Required files: // - dla_acl_burst_splitter.sv // - dla_acl_reset_handler.sv // - dla_acl_parameter_assert.svh `default_nettype none `include "dla_acl_parameter_assert.svh" module dla_acl_burst_splitter #( //signal width -- all must be at least 1 parameter int unsigned ADDRESS_WIDTH, // byte address must be word aligned, e.g. if BYTEENABLE_WIDTH = 4, then address must be 4-byte aligned, bottom 2 bits must be 0 parameter int unsigned BURSTCOUNT_WIDTH, parameter int unsigned BYTEENABLE_WIDTH, // must be a power of 2, specifies word size //burst splitting configuration parameter bit SPLIT_WRITE_BURSTS = 1, // 0 means leave writes bursts untouched, 1 means split write bursts parameter bit SPLIT_READ_BURSTS = 1, // likewise for read bursts //special configuration parameter int unsigned BURST_BOUNDARY = 0, // set to nonzero to specify what address size a burst will not cross, e.g. 12 means bursts cannot cross a 4K boundary parameter bit USE_STALL_LATENCY = 0, // for write burst splitting only where there is no change in control flow (address inside the burst is computed on-the-fly), // 0 means stall/valid (up_write means we MAY accept it), 1 means stall/latency (up_write means we MUST accept it) //reset configuration parameter bit ASYNC_RESET = 0, // how do we use reset: 1 means registers are reset asynchronously, 0 means registers are reset synchronously parameter bit SYNCHRONIZE_RESET = 1, // based on how reset gets to us, what do we need to do: 1 means synchronize reset before consumption (if reset arrives asynchronously), 0 means passthrough (managed externally) parameter bit BACKPRESSURE_DURING_RESET = 1,// determine whether up_waitrequest will backpressure during reset, safer to do so but adds combinational logic //derived parameters localparam int unsigned DATA_WIDTH = 8*BYTEENABLE_WIDTH, localparam int unsigned ADDRESS_BITS_PER_WORD = $clog2(BYTEENABLE_WIDTH) // how many lower bits of the byte address are stuck at 0 to ensure it is word aligned ) ( input wire clock, input wire resetn, //upstream interface - avalon slave output logic up_waitrequest, input wire up_read, input wire up_write, input wire [ADDRESS_WIDTH-1:0] up_address, input wire [DATA_WIDTH-1:0] up_writedata, input wire [BYTEENABLE_WIDTH-1:0] up_byteenable, input wire [BURSTCOUNT_WIDTH-1:0] up_burstcount, //downstream interface - avalon master input wire down_waitrequest, output logic down_read, output logic down_write, output logic [ADDRESS_WIDTH-1:0] down_address, output logic [DATA_WIDTH-1:0] down_writedata, output logic [BYTEENABLE_WIDTH-1:0] down_byteenable, output logic [BURSTCOUNT_WIDTH-1:0] down_burstcount ); ////////////////////////////////////// // // // Sanity check on the parameters // // // ////////////////////////////////////// generate `DLA_ACL_PARAMETER_ASSERT(ADDRESS_WIDTH >= 1) `DLA_ACL_PARAMETER_ASSERT(BURSTCOUNT_WIDTH >= 1) `DLA_ACL_PARAMETER_ASSERT(BYTEENABLE_WIDTH >= 1) `DLA_ACL_PARAMETER_ASSERT(BYTEENABLE_WIDTH == 2**ADDRESS_BITS_PER_WORD) `DLA_ACL_PARAMETER_ASSERT(BURST_BOUNDARY < ADDRESS_WIDTH) `DLA_ACL_PARAMETER_ASSERT(BURST_BOUNDARY == 0 || BURST_BOUNDARY >= ADDRESS_BITS_PER_WORD) `DLA_ACL_PARAMETER_ASSERT(USE_STALL_LATENCY == 0 || SPLIT_READ_BURSTS == 0) endgenerate ///////////// // // // Reset // // // ///////////// logic aclrn, sclrn; dla_acl_reset_handler #( .ASYNC_RESET (ASYNC_RESET), .USE_SYNCHRONIZER (SYNCHRONIZE_RESET), .SYNCHRONIZE_ACLRN (SYNCHRONIZE_RESET), .PIPE_DEPTH (2), .NUM_COPIES (1) ) dla_acl_reset_handler_inst ( .clk (clock), .i_resetn (resetn), .o_aclrn (aclrn), .o_resetn_synchronized (), .o_sclrn (sclrn) ); ////////////////////// // // // Burst splitter // // // ////////////////////// logic inside_read_burst; //inside a read burst logic inside_burst; //inside some burst -- we are inside a write burst if inside_burst & ~inside_read_burst logic [ADDRESS_WIDTH-1:0] internal_address; //address for words inside a burst logic [BURSTCOUNT_WIDTH-1:0] internal_burstcount; //keep track of how many remaining words are in a burst logic internal_burstcount_eq_two; //register the check for internal_burstcount == 2 by looking at how we get into that condition logic backpressure_during_reset; //helper signal which sets up_waitrequest = 1 during reset under various reset configurations logic backpressure_during_read_burst; //stall upstream while we split read bursts logic [ADDRESS_WIDTH-1:0] up_address_plus_byteenable_width; //manually split the bits of the adder in the case where the bursts are known to not cross some boundary logic [ADDRESS_WIDTH-1:0] internal_address_plus_byteenable_width; //same idea as above logic down_burstcount_mask; //under which conditions should we override down_burstcount to 1 logic [ADDRESS_WIDTH-1:0] down_address_raw; //before outputting down_address, set the bottom ADDRESS_BITS_PER_WORD bits to 0, Quartus will prune any logic that drove these bits generate if (BURST_BOUNDARY) begin : GEN_SHORT_ADDRESS_ADDER //burst will not cross a boundary of 2**BURST_BOUNDARY assign up_address_plus_byteenable_width[BURST_BOUNDARY-1:0] = up_address[BURST_BOUNDARY-1:0] + BYTEENABLE_WIDTH; //only the lower address bits within a burst need the adder assign up_address_plus_byteenable_width[ADDRESS_WIDTH-1:BURST_BOUNDARY] = up_address[ADDRESS_WIDTH-1:BURST_BOUNDARY]; //upper bits come directly from the input address assign internal_address_plus_byteenable_width[BURST_BOUNDARY-1:0] = internal_address[BURST_BOUNDARY-1:0] + BYTEENABLE_WIDTH; assign internal_address_plus_byteenable_width[ADDRESS_WIDTH-1:BURST_BOUNDARY] = internal_address[ADDRESS_WIDTH-1:BURST_BOUNDARY]; //reg holds its value end else begin : GEN_FULL_ADDRESS_ADDER assign up_address_plus_byteenable_width = up_address + BYTEENABLE_WIDTH; assign internal_address_plus_byteenable_width = internal_address + BYTEENABLE_WIDTH; end endgenerate always_ff @(posedge clock or negedge aclrn) begin if (~aclrn) begin inside_read_burst <= 1'b0; inside_burst <= 1'b0; internal_address <= '0; internal_burstcount <= '0; internal_burstcount_eq_two <= 1'b0; end else begin if (~inside_burst) begin internal_address <= up_address_plus_byteenable_width; internal_burstcount <= up_burstcount; internal_burstcount_eq_two <= (up_burstcount == 2); //whether or not we enter inside a burst for splitting depends on the burst splitting configuration if (SPLIT_WRITE_BURSTS && SPLIT_READ_BURSTS) begin //split both read and write bursts if ((up_read | up_write) & ~down_waitrequest & (up_burstcount != 1)) inside_burst <= 1'b1; if ( up_read & ~down_waitrequest & (up_burstcount != 1)) inside_read_burst <= 1'b1; end else if (SPLIT_WRITE_BURSTS) begin //split write bursts only if (up_write & (~down_waitrequest | USE_STALL_LATENCY) & (up_burstcount != 1)) inside_burst <= 1'b1; //inside_read_burst will be stuck at 0 end else if (SPLIT_READ_BURSTS) begin //split read bursts only if (up_read & ~down_waitrequest & (up_burstcount != 1)) begin inside_burst <= 1'b1; inside_read_burst <= 1'b1; end end //else no burst splitting, inside_burst and inside_read_burst will both be stuck at 0 end else begin //note that USE_STALL_LATENCY applies to the upstream interface, but one can only set USE_STALL_LATENCY = 1 when splitting only write bursts //in which case the control flow does not change, i.e. up_waitrequest = down_waitrequest if ((~down_waitrequest | USE_STALL_LATENCY) & (backpressure_during_read_burst | up_write)) begin internal_address <= internal_address_plus_byteenable_width; internal_burstcount <= internal_burstcount - 1; internal_burstcount_eq_two <= (internal_burstcount == 3); if (internal_burstcount_eq_two) begin inside_read_burst <= 1'b0; inside_burst <= 1'b0; end end end if (~sclrn) begin inside_read_burst <= 1'b0; inside_burst <= 1'b0; end end end //backpressure assign backpressure_during_reset = (!BACKPRESSURE_DURING_RESET) ? 1'b0 : (~aclrn) ? 1'b1 : (~sclrn) ? 1'b1 : 1'b0; assign backpressure_during_read_burst = (SPLIT_READ_BURSTS) ? inside_read_burst : 1'b0; assign up_waitrequest = down_waitrequest | backpressure_during_read_burst | backpressure_during_reset; //write only data path can simply pass through assign down_writedata = up_writedata; assign down_byteenable = up_byteenable; //if we are inside a burst, the read ack has already gone to upstream so the next transaction is being presented assign down_read = (backpressure_during_read_burst) ? 1'b1 : up_read; assign down_write = (backpressure_during_read_burst) ? 1'b0 : up_write; //original address is used at the beginning of a burst (or if burst was not split), we computed the address inside a burst assign down_address_raw = (inside_burst) ? internal_address : up_address; //set lower ADDRESS_BITS_PER_WORD bits to 0, the lower bits will be pruned away from all logic related to address assign down_address = down_address_raw[ADDRESS_WIDTH-1:ADDRESS_BITS_PER_WORD] << ADDRESS_BITS_PER_WORD; //under what conditions are we splitting a burst, and therefore down_burstcount should be set to 1 assign down_burstcount_mask = (SPLIT_WRITE_BURSTS && SPLIT_READ_BURSTS) ? 1'b1 : (SPLIT_WRITE_BURSTS) ? down_write : (SPLIT_READ_BURSTS) ? down_read : 1'b0; assign down_burstcount = (down_burstcount_mask) ? 1'h1 : up_burstcount; endmodule `default_nettype wire