1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
|
// Copyright 2020 Intel Corporation.
//
// This software and the related documents are Intel copyrighted materials,
// and your use of them is governed by the express license under which they
// were provided to you ("License"). Unless the License provides otherwise,
// you may not use, modify, copy, publish, distribute, disclose or transmit
// this software or the related documents without Intel's prior written
// permission.
//
// This software and the related documents are provided as is, with no express
// or implied warranties, other than those that are expressly stated in the
// License.
// AvalonMM Burst Splitter
//
// This module splits AvalonMM bursts. One can configure it to split only write bursts, split only read bursts, or split both. It is legal to configure this module to not
// split any bursts, Quaurtus should sweep away all the unused logic and this module should become passthrough wires.
//
// The interfaces are only the command portion of the AvalonMM interface, it is assumed that the timing of the response path is independent of the command. This is certainly
// the case when the response path includes readdatavalid, as the read response can happen an arbitraty number of clock cycles later than when the read request was accepted.
//
// This module has no capacity, it splits bursts on-the-fly. There is no change in control flow when splitting write bursts since a write burst of length N already takes N
// clock cycles to transfer (it contains N words of data). Splitting a write burst basically involves calculating the address for the words inside the burst. Conversely, if
// splitting a read burst, one read request of length N (which can be transferred in 1 clock cycle) will result in N read requests of length 1, and those N read requests
// need N clock cycles to be transferred. Therefore the burst splitter must stall the upstream interface while these read requests are provided to the downstream interface.
//
// If this module is configured to split read bursts, there must be a zero-cycle handshake with the upstream interface. If only splitting write bursts, then one may
// optionally use stall latency handshaking with upstream. Any style of handshaking (stall/valid or stall latency) can be used with the downstream interface, if using stall
// latency then downstream should provide an almost full signal as backpressure.
//
// By default, the adder used to calculate the address for words inside a burst is the full width of the address. To improve fmax, we can reduce the adder width, however it
// has to be known that bursts will not cross some boundary. For example, if it is known that bursts cannot cross a 4096 byte boundary, then adder only needs to span the lower
// 12 bits of the address. Shortening a long carry chain helps to improve fmax, and saves area.
//
// Required files:
// - dla_acl_burst_splitter.sv
// - dla_acl_reset_handler.sv
// - dla_acl_parameter_assert.svh
`default_nettype none
`include "dla_acl_parameter_assert.svh"
module dla_acl_burst_splitter #(
//signal width -- all must be at least 1
parameter int unsigned ADDRESS_WIDTH, // byte address must be word aligned, e.g. if BYTEENABLE_WIDTH = 4, then address must be 4-byte aligned, bottom 2 bits must be 0
parameter int unsigned BURSTCOUNT_WIDTH,
parameter int unsigned BYTEENABLE_WIDTH, // must be a power of 2, specifies word size
//burst splitting configuration
parameter bit SPLIT_WRITE_BURSTS = 1, // 0 means leave writes bursts untouched, 1 means split write bursts
parameter bit SPLIT_READ_BURSTS = 1, // likewise for read bursts
//special configuration
parameter int unsigned BURST_BOUNDARY = 0, // set to nonzero to specify what address size a burst will not cross, e.g. 12 means bursts cannot cross a 4K boundary
parameter bit USE_STALL_LATENCY = 0, // for write burst splitting only where there is no change in control flow (address inside the burst is computed on-the-fly),
// 0 means stall/valid (up_write means we MAY accept it), 1 means stall/latency (up_write means we MUST accept it)
//reset configuration
parameter bit ASYNC_RESET = 0, // how do we use reset: 1 means registers are reset asynchronously, 0 means registers are reset synchronously
parameter bit SYNCHRONIZE_RESET = 1, // based on how reset gets to us, what do we need to do: 1 means synchronize reset before consumption (if reset arrives asynchronously), 0 means passthrough (managed externally)
parameter bit BACKPRESSURE_DURING_RESET = 1,// determine whether up_waitrequest will backpressure during reset, safer to do so but adds combinational logic
//derived parameters
localparam int unsigned DATA_WIDTH = 8*BYTEENABLE_WIDTH,
localparam int unsigned ADDRESS_BITS_PER_WORD = $clog2(BYTEENABLE_WIDTH) // how many lower bits of the byte address are stuck at 0 to ensure it is word aligned
) (
input wire clock,
input wire resetn,
//upstream interface - avalon slave
output logic up_waitrequest,
input wire up_read,
input wire up_write,
input wire [ADDRESS_WIDTH-1:0] up_address,
input wire [DATA_WIDTH-1:0] up_writedata,
input wire [BYTEENABLE_WIDTH-1:0] up_byteenable,
input wire [BURSTCOUNT_WIDTH-1:0] up_burstcount,
//downstream interface - avalon master
input wire down_waitrequest,
output logic down_read,
output logic down_write,
output logic [ADDRESS_WIDTH-1:0] down_address,
output logic [DATA_WIDTH-1:0] down_writedata,
output logic [BYTEENABLE_WIDTH-1:0] down_byteenable,
output logic [BURSTCOUNT_WIDTH-1:0] down_burstcount
);
//////////////////////////////////////
// //
// Sanity check on the parameters //
// //
//////////////////////////////////////
generate
`DLA_ACL_PARAMETER_ASSERT(ADDRESS_WIDTH >= 1)
`DLA_ACL_PARAMETER_ASSERT(BURSTCOUNT_WIDTH >= 1)
`DLA_ACL_PARAMETER_ASSERT(BYTEENABLE_WIDTH >= 1)
`DLA_ACL_PARAMETER_ASSERT(BYTEENABLE_WIDTH == 2**ADDRESS_BITS_PER_WORD)
`DLA_ACL_PARAMETER_ASSERT(BURST_BOUNDARY < ADDRESS_WIDTH)
`DLA_ACL_PARAMETER_ASSERT(BURST_BOUNDARY == 0 || BURST_BOUNDARY >= ADDRESS_BITS_PER_WORD)
`DLA_ACL_PARAMETER_ASSERT(USE_STALL_LATENCY == 0 || SPLIT_READ_BURSTS == 0)
endgenerate
/////////////
// //
// Reset //
// //
/////////////
logic aclrn, sclrn;
dla_acl_reset_handler
#(
.ASYNC_RESET (ASYNC_RESET),
.USE_SYNCHRONIZER (SYNCHRONIZE_RESET),
.SYNCHRONIZE_ACLRN (SYNCHRONIZE_RESET),
.PIPE_DEPTH (2),
.NUM_COPIES (1)
)
dla_acl_reset_handler_inst
(
.clk (clock),
.i_resetn (resetn),
.o_aclrn (aclrn),
.o_resetn_synchronized (),
.o_sclrn (sclrn)
);
//////////////////////
// //
// Burst splitter //
// //
//////////////////////
logic inside_read_burst; //inside a read burst
logic inside_burst; //inside some burst -- we are inside a write burst if inside_burst & ~inside_read_burst
logic [ADDRESS_WIDTH-1:0] internal_address; //address for words inside a burst
logic [BURSTCOUNT_WIDTH-1:0] internal_burstcount; //keep track of how many remaining words are in a burst
logic internal_burstcount_eq_two; //register the check for internal_burstcount == 2 by looking at how we get into that condition
logic backpressure_during_reset; //helper signal which sets up_waitrequest = 1 during reset under various reset configurations
logic backpressure_during_read_burst; //stall upstream while we split read bursts
logic [ADDRESS_WIDTH-1:0] up_address_plus_byteenable_width; //manually split the bits of the adder in the case where the bursts are known to not cross some boundary
logic [ADDRESS_WIDTH-1:0] internal_address_plus_byteenable_width; //same idea as above
logic down_burstcount_mask; //under which conditions should we override down_burstcount to 1
logic [ADDRESS_WIDTH-1:0] down_address_raw; //before outputting down_address, set the bottom ADDRESS_BITS_PER_WORD bits to 0, Quartus will prune any logic that drove these bits
generate
if (BURST_BOUNDARY) begin : GEN_SHORT_ADDRESS_ADDER //burst will not cross a boundary of 2**BURST_BOUNDARY
assign up_address_plus_byteenable_width[BURST_BOUNDARY-1:0] = up_address[BURST_BOUNDARY-1:0] + BYTEENABLE_WIDTH; //only the lower address bits within a burst need the adder
assign up_address_plus_byteenable_width[ADDRESS_WIDTH-1:BURST_BOUNDARY] = up_address[ADDRESS_WIDTH-1:BURST_BOUNDARY]; //upper bits come directly from the input address
assign internal_address_plus_byteenable_width[BURST_BOUNDARY-1:0] = internal_address[BURST_BOUNDARY-1:0] + BYTEENABLE_WIDTH;
assign internal_address_plus_byteenable_width[ADDRESS_WIDTH-1:BURST_BOUNDARY] = internal_address[ADDRESS_WIDTH-1:BURST_BOUNDARY]; //reg holds its value
end
else begin : GEN_FULL_ADDRESS_ADDER
assign up_address_plus_byteenable_width = up_address + BYTEENABLE_WIDTH;
assign internal_address_plus_byteenable_width = internal_address + BYTEENABLE_WIDTH;
end
endgenerate
always_ff @(posedge clock or negedge aclrn) begin
if (~aclrn) begin
inside_read_burst <= 1'b0;
inside_burst <= 1'b0;
internal_address <= '0;
internal_burstcount <= '0;
internal_burstcount_eq_two <= 1'b0;
end
else begin
if (~inside_burst) begin
internal_address <= up_address_plus_byteenable_width;
internal_burstcount <= up_burstcount;
internal_burstcount_eq_two <= (up_burstcount == 2);
//whether or not we enter inside a burst for splitting depends on the burst splitting configuration
if (SPLIT_WRITE_BURSTS && SPLIT_READ_BURSTS) begin //split both read and write bursts
if ((up_read | up_write) & ~down_waitrequest & (up_burstcount != 1)) inside_burst <= 1'b1;
if ( up_read & ~down_waitrequest & (up_burstcount != 1)) inside_read_burst <= 1'b1;
end
else if (SPLIT_WRITE_BURSTS) begin //split write bursts only
if (up_write & (~down_waitrequest | USE_STALL_LATENCY) & (up_burstcount != 1)) inside_burst <= 1'b1;
//inside_read_burst will be stuck at 0
end
else if (SPLIT_READ_BURSTS) begin //split read bursts only
if (up_read & ~down_waitrequest & (up_burstcount != 1)) begin
inside_burst <= 1'b1;
inside_read_burst <= 1'b1;
end
end
//else no burst splitting, inside_burst and inside_read_burst will both be stuck at 0
end
else begin
//note that USE_STALL_LATENCY applies to the upstream interface, but one can only set USE_STALL_LATENCY = 1 when splitting only write bursts
//in which case the control flow does not change, i.e. up_waitrequest = down_waitrequest
if ((~down_waitrequest | USE_STALL_LATENCY) & (backpressure_during_read_burst | up_write)) begin
internal_address <= internal_address_plus_byteenable_width;
internal_burstcount <= internal_burstcount - 1;
internal_burstcount_eq_two <= (internal_burstcount == 3);
if (internal_burstcount_eq_two) begin
inside_read_burst <= 1'b0;
inside_burst <= 1'b0;
end
end
end
if (~sclrn) begin
inside_read_burst <= 1'b0;
inside_burst <= 1'b0;
end
end
end
//backpressure
assign backpressure_during_reset = (!BACKPRESSURE_DURING_RESET) ? 1'b0 : (~aclrn) ? 1'b1 : (~sclrn) ? 1'b1 : 1'b0;
assign backpressure_during_read_burst = (SPLIT_READ_BURSTS) ? inside_read_burst : 1'b0;
assign up_waitrequest = down_waitrequest | backpressure_during_read_burst | backpressure_during_reset;
//write only data path can simply pass through
assign down_writedata = up_writedata;
assign down_byteenable = up_byteenable;
//if we are inside a burst, the read ack has already gone to upstream so the next transaction is being presented
assign down_read = (backpressure_during_read_burst) ? 1'b1 : up_read;
assign down_write = (backpressure_during_read_burst) ? 1'b0 : up_write;
//original address is used at the beginning of a burst (or if burst was not split), we computed the address inside a burst
assign down_address_raw = (inside_burst) ? internal_address : up_address;
//set lower ADDRESS_BITS_PER_WORD bits to 0, the lower bits will be pruned away from all logic related to address
assign down_address = down_address_raw[ADDRESS_WIDTH-1:ADDRESS_BITS_PER_WORD] << ADDRESS_BITS_PER_WORD;
//under what conditions are we splitting a burst, and therefore down_burstcount should be set to 1
assign down_burstcount_mask = (SPLIT_WRITE_BURSTS && SPLIT_READ_BURSTS) ? 1'b1 : (SPLIT_WRITE_BURSTS) ? down_write : (SPLIT_READ_BURSTS) ? down_read : 1'b0;
assign down_burstcount = (down_burstcount_mask) ? 1'h1 : up_burstcount;
endmodule
`default_nettype wire
|