1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
|
// Copyright 2020 Intel Corporation.
//
// This software and the related documents are Intel copyrighted materials,
// and your use of them is governed by the express license under which they
// were provided to you ("License"). Unless the License provides otherwise,
// you may not use, modify, copy, publish, distribute, disclose or transmit
// this software or the related documents without Intel's prior written
// permission.
//
// This software and the related documents are provided as is, with no express
// or implied warranties, other than those that are expressly stated in the
// License.
//see dla_hld_ram.sv for a description of the parameters, ports, and general functionality of all the dla_hld_ram layers
//this layer is adds error correction codes, specifically single error correct double error detect hamming codes
`default_nettype none
`include "dla_acl_parameter_assert.svh"
module dla_hld_ram_ecc
import dla_acl_ecc_pkg::*;
#(
//geometry configuration
parameter int DEPTH,
parameter int WIDTH,
parameter int BE_WIDTH,
//geometry constants
parameter bit MINIMIZE_MEMORY_USAGE,
parameter int MIN_PHYSICAL_DEPTH,
//memory initialization
parameter bit USE_MEM_INIT_FILE,
parameter bit ZERO_INITIALIZE_MEM,
parameter MEM_INIT_NAME,
//error correction codes -- these parameters are consumed at this layer, layers below do not have them
parameter bit ENABLE_ECC,
parameter bit ECC_STATUS_TIME_STRETCH,
parameter bit ASYNC_RESET,
parameter bit SYNCHRONIZE_RESET,
//memory configuration
parameter RAM_BLOCK_TYPE,
parameter RAM_OPERATION_MODE,
parameter DEVICE_FAMILY,
parameter READ_DURING_WRITE,
parameter bit REGISTER_A_READDATA,
parameter bit REGISTER_B_ADDRESS,
parameter bit REGISTER_B_READDATA,
//try to use memory hardened logic
parameter bit USE_ENABLE,
parameter bit COMMON_IN_CLOCK_EN,
parameter bit COMMON_OUT_CLOCK_EN,
//derived parameters
localparam int ADDR = $clog2(DEPTH)
) (
input wire clock,
//port a
input wire [ADDR-1:0] a_address,
input wire a_write,
input wire [WIDTH-1:0] a_writedata,
input wire [BE_WIDTH-1:0] a_byteenable,
output logic [WIDTH-1:0] a_readdata,
input wire a_in_clock_en,
input wire a_out_clock_en,
input wire a_read_enable,
//port b
input wire [ADDR-1:0] b_address,
input wire b_write,
input wire [WIDTH-1:0] b_writedata,
input wire [BE_WIDTH-1:0] b_byteenable,
output logic [WIDTH-1:0] b_readdata,
input wire b_in_clock_en,
input wire b_out_clock_en,
input wire b_read_enable,
//error correction code -- these signals are consumed at this layer, layers below do not have them
input wire resetn,
output logic [1:0] ecc_err_status
);
///////////////////////
// Legality checks //
///////////////////////
generate
//width / be_width must divide evenly with no remainder
`DLA_ACL_PARAMETER_ASSERT(WIDTH % BE_WIDTH == 0)
endgenerate
//////////////////////////
// Derived parameters //
//////////////////////////
//note: the max group size of 32 has been deliberately chosen, after encoding this expands to 39 bits which maps nicely to the physical width of m20k and mlab
localparam int BITS_PER_ENABLE = WIDTH / BE_WIDTH; //how many bits of data are controlled by each byte enable signal, typically we think of this as 8 but really can be any value
localparam int MAX_ECC_GROUP_SIZE = 32; //if data is wide, slice into smaller sections and encode each section independently, limit the xor network size to maintain high fmax
localparam int ECC_GROUP_SIZE = (BITS_PER_ENABLE > MAX_ECC_GROUP_SIZE) ? MAX_ECC_GROUP_SIZE : BITS_PER_ENABLE; //cannot jointly encode data from different byte enable groups
localparam int ENCODED_BITS_PER_ENABLE = (ENABLE_ECC) ? getEncodedBitsEccGroup(BITS_PER_ENABLE, ECC_GROUP_SIZE) : BITS_PER_ENABLE; //how many encoded bits of data are controlled by each byte enable signal
localparam int ENCODED_WIDTH = ENCODED_BITS_PER_ENABLE * BE_WIDTH; //total data width after encoding
////////////////////////////////////////////////////////////////
// Encode the write data, respecting byte enable boundaries //
////////////////////////////////////////////////////////////////
genvar g;
logic [ENCODED_WIDTH-1:0] encoded_a_writedata, encoded_b_writedata;
logic [ENCODED_WIDTH-1:0] encoded_a_readdata, encoded_b_readdata;
logic [ENCODED_WIDTH-1:0] encoded_a_readdata_raw, encoded_b_readdata_raw;
generate
if (ENABLE_ECC) begin : ECC_ENCODE
//each byte enable signal controls BITS_PER_ENABLE bits of the data path, to respect this boundary never ecc encode across different groups
//it is possible that the group could be large, so we would want multiple ecc encoders within that group to limit the size of the xor network
//dla_acl_ecc_encoder already does that for us, however we may need non-uniform slicing, so we still have to deal with that here
//example scenario, suppose WIDTH = 98 and BE_WIDTH = 2, therefore BITS_PER_ENABLE = 49
//given that MAX_ECC_GROUP_SIZE = 32, this is how the data should be sliced up:
//dla_acl_ecc_encoder instance 0 -- ecc instance 0 handles bits 31:0, ecc instance 1 handles bits 48:32
//dla_acl_ecc_encoder instance 1 -- ecc instance 0 handles bits 80:49, ecc instance 1 handles bits 97:81
//the above layout cannot be achieved with only one instance of dla_acl_ecc_encoder, there is no way to alternate between slicing 32 and 17 bits
for (g=0; g<BE_WIDTH; g++) begin
dla_acl_ecc_encoder
#(
.DATA_WIDTH (BITS_PER_ENABLE),
.ECC_GROUP_SIZE (ECC_GROUP_SIZE),
.INPUT_PIPELINE_STAGES (0), //must use zero latency to maintain the conceptual clock enable model of dla_hld_ram
.OUTPUT_PIPELINE_STAGES (0) //likewise as above
)
dla_acl_ecc_encoder_inst_a
(
.clock (clock), //this currently has no effect since the number of pipeline stages is 0
.clock_enable (1'b1), //this currently has no effect since the number of pipeline stages is 0
.i_data (a_writedata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
.o_encoded (encoded_a_writedata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE])
);
dla_acl_ecc_encoder
#(
.DATA_WIDTH (BITS_PER_ENABLE),
.ECC_GROUP_SIZE (ECC_GROUP_SIZE),
.INPUT_PIPELINE_STAGES (0),
.OUTPUT_PIPELINE_STAGES (0)
)
dla_acl_ecc_encoder_inst_b
(
.clock (clock),
.clock_enable (1'b1),
.i_data (b_writedata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
.o_encoded (encoded_b_writedata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE])
);
end
end
else begin : NO_ENCODE
assign encoded_a_writedata = a_writedata;
assign encoded_b_writedata = b_writedata;
end
endgenerate
/////////////////////////////////////////////////
// Next layer in the instantiation hierarchy //
/////////////////////////////////////////////////
dla_hld_ram_tall_depth_stitch
#(
.DEPTH (DEPTH),
.WIDTH (ENCODED_WIDTH), //changed
.BE_WIDTH (BE_WIDTH),
.MINIMIZE_MEMORY_USAGE (MINIMIZE_MEMORY_USAGE),
.MIN_PHYSICAL_DEPTH (MIN_PHYSICAL_DEPTH),
.USE_MEM_INIT_FILE (USE_MEM_INIT_FILE),
.ZERO_INITIALIZE_MEM (ZERO_INITIALIZE_MEM),
.MEM_INIT_NAME (MEM_INIT_NAME),
.RAM_BLOCK_TYPE (RAM_BLOCK_TYPE),
.RAM_OPERATION_MODE (RAM_OPERATION_MODE),
.DEVICE_FAMILY (DEVICE_FAMILY),
.READ_DURING_WRITE (READ_DURING_WRITE),
.REGISTER_A_READDATA (REGISTER_A_READDATA),
.REGISTER_B_ADDRESS (REGISTER_B_ADDRESS),
.REGISTER_B_READDATA (REGISTER_B_READDATA),
.USE_ENABLE (USE_ENABLE),
.COMMON_IN_CLOCK_EN (COMMON_IN_CLOCK_EN),
.COMMON_OUT_CLOCK_EN (COMMON_OUT_CLOCK_EN)
)
dla_hld_ram_tall_depth_stitch_inst
(
.clock (clock),
.a_address (a_address),
.a_write (a_write),
.a_writedata (encoded_a_writedata), //changed
.a_byteenable (a_byteenable),
.a_readdata (encoded_a_readdata_raw), //changed
.a_in_clock_en (a_in_clock_en),
.a_out_clock_en (a_out_clock_en),
.a_read_enable (a_read_enable),
.b_address (b_address),
.b_write (b_write),
.b_writedata (encoded_b_writedata), //changed
.b_byteenable (b_byteenable),
.b_readdata (encoded_b_readdata_raw), //changed
.b_in_clock_en (b_in_clock_en),
.b_out_clock_en (b_out_clock_en),
.b_read_enable (b_read_enable)
);
//imitate the query functions in the software model
// synthesis translate_off
int NUM_PHYSICAL_M20K, NUM_PHYSICAL_MLAB;
assign NUM_PHYSICAL_M20K = dla_hld_ram_tall_depth_stitch_inst.NUM_PHYSICAL_M20K;
assign NUM_PHYSICAL_MLAB = dla_hld_ram_tall_depth_stitch_inst.NUM_PHYSICAL_MLAB;
// synthesis translate_on
////////////////////////////////
// Sim-only error injection //
////////////////////////////////
//leave a hook for injecting errors into the read data, intended for simulation only
logic [ENCODED_WIDTH-1:0] SIM_ONLY_a_inject_error, SIM_ONLY_b_inject_error;
assign SIM_ONLY_a_inject_error = 0; //these signals are forced by the testbench
assign SIM_ONLY_b_inject_error = 0;
assign encoded_a_readdata = encoded_a_readdata_raw ^ SIM_ONLY_a_inject_error;
assign encoded_b_readdata = encoded_b_readdata_raw ^ SIM_ONLY_b_inject_error;
///////////////////////////////////////////////////////////
// Decode the read data and produce ECC status signals //
///////////////////////////////////////////////////////////
localparam bit CONNECT_A_READDATA_TO_ECC = RAM_OPERATION_MODE == "TRUE_DUAL_PORT"; //ignore port a read data if simple dual port
generate
if (ENABLE_ECC) begin : ECC_DECODE
logic [BE_WIDTH-1:0] a_single_error, a_double_error;
logic [BE_WIDTH-1:0] b_single_error, b_double_error;
logic any_single_error, any_double_error;
for (g=0; g<BE_WIDTH; g++) begin
if (CONNECT_A_READDATA_TO_ECC) begin
dla_acl_ecc_decoder
#(
.DATA_WIDTH (BITS_PER_ENABLE),
.ECC_GROUP_SIZE (ECC_GROUP_SIZE),
.INPUT_PIPELINE_STAGES (0), //must use zero latency to maintain the conceptual clock enable model of dla_hld_ram
.OUTPUT_PIPELINE_STAGES (0), //likewise as above
.STATUS_PIPELINE_STAGES (0) //likewise as above
)
dla_acl_ecc_decoder_inst_a
(
.clock (clock), //this currently has no effect since the number of pipeline stages is 0
.clock_enable (1'b1), //this currently has no effect since the number of pipeline stages is 0
.i_encoded (encoded_a_readdata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE]),
.o_data (a_readdata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
.o_single_error_corrected (a_single_error[g]),
.o_double_error_detected (a_double_error[g])
);
end
else begin
assign a_readdata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE] = 'x;
assign a_single_error[g] = '0;
assign a_double_error[g] = '0;
end
dla_acl_ecc_decoder
#(
.DATA_WIDTH (BITS_PER_ENABLE),
.ECC_GROUP_SIZE (ECC_GROUP_SIZE),
.INPUT_PIPELINE_STAGES (0),
.OUTPUT_PIPELINE_STAGES (0),
.STATUS_PIPELINE_STAGES (0)
)
dla_acl_ecc_decoder_inst_b
(
.clock (clock),
.clock_enable (1'b1),
.i_encoded (encoded_b_readdata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE]),
.o_data (b_readdata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
.o_single_error_corrected (b_single_error[g]),
.o_double_error_detected (b_double_error[g])
);
end
assign any_single_error = (|a_single_error) | (|b_single_error);
assign any_double_error = (|a_double_error) | (|b_double_error);
if (ECC_STATUS_TIME_STRETCH) begin
dla_hld_ram_ecc_pulse_stretch_and_sticky #(
.ASYNC_RESET (ASYNC_RESET),
.SYNCHRONIZE_RESET (SYNCHRONIZE_RESET),
.SINGLE_ERROR_PULSE_STRETCH (3) //this is existing behavior from dla_acl_altera_syncram_wrapped
)
dla_hld_ram_ecc_pulse_stretch_and_sticky_inst
(
.clock (clock),
.resetn (resetn),
.i_single_error_corrected (any_single_error),
.i_double_error_detected (any_double_error),
.o_ecc_err_status (ecc_err_status)
);
end
else begin
assign ecc_err_status = {any_single_error, any_double_error};
end
end
else begin : NO_DECODE
if (CONNECT_A_READDATA_TO_ECC) begin
assign a_readdata = encoded_a_readdata;
end
else begin
assign a_readdata = 'x;
end
assign b_readdata = encoded_b_readdata;
assign ecc_err_status = 2'h0;
end
endgenerate
endmodule
//this is a helper module to convert the raw signals from the ECC decoder into something suitable for lazy collection
//assuming bit errors are rare, one way to monitor the ECC status signals from all memories is to simply OR the status signals from all instances
//these may be physically spread across the FPGA, so pulse stretch them so that they can be collected on a slower clock (or by using a multicycle clock constraint)
module dla_hld_ram_ecc_pulse_stretch_and_sticky #(
parameter bit ASYNC_RESET, //how do registers CONSUME reset, 1 = asynchronously, 0 = synchronously
parameter bit SYNCHRONIZE_RESET, //should be reset be synchronized BEFORE it is consumed, 1 = synchronize it, 0 = no change to reset before consumption
parameter int SINGLE_ERROR_PULSE_STRETCH //at least 1, how many clock cycles to pulse stretch any single bit error, a value of 3 means an input high for one clock cycle results in an output high for four clocks
) (
input wire clock,
input wire resetn,
input wire i_single_error_corrected,
input wire i_double_error_detected,
output logic [1:0] o_ecc_err_status
);
//the double error detected status is a sticky bit, only reset can clear it, the intent being one should probably restart the system if an uncorrectable error is seen
logic aclrn, sclrn;
dla_acl_reset_handler
#(
.ASYNC_RESET (ASYNC_RESET),
.USE_SYNCHRONIZER (SYNCHRONIZE_RESET),
.SYNCHRONIZE_ACLRN (SYNCHRONIZE_RESET),
.PULSE_EXTENSION (0),
.PIPE_DEPTH (1),
.NUM_COPIES (1)
)
dla_acl_reset_handler_inst
(
.clk (clock),
.i_resetn (resetn),
.o_aclrn (aclrn),
.o_resetn_synchronized (),
.o_sclrn (sclrn)
);
logic [SINGLE_ERROR_PULSE_STRETCH-1:0] single_error_history;
logic single_error_pulse_stretched;
logic double_error_latched;
always_ff @(posedge clock or negedge aclrn) begin
if (~aclrn) begin
single_error_history <= '0;
single_error_pulse_stretched <= 1'b0;
double_error_latched <= 1'b0;
end
else begin
single_error_history[0] <= i_single_error_corrected;
for (int i=1; i<SINGLE_ERROR_PULSE_STRETCH; i++) single_error_history[i] <= single_error_history[i-1];
single_error_pulse_stretched <= i_single_error_corrected | (|single_error_history);
double_error_latched <= double_error_latched | i_double_error_detected;
if (~sclrn) begin
single_error_history <= '0;
single_error_pulse_stretched <= 1'b0;
double_error_latched <= 1'b0;
end
end
end
assign o_ecc_err_status = {single_error_pulse_stretched, double_error_latched};
endmodule
`default_nettype wire
|