summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram_ecc.sv
blob: 4f9289e44586ef8aded1579ed5823560ae332ee7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
// Copyright 2020 Intel Corporation.
//
// This software and the related documents are Intel copyrighted materials,
// and your use of them is governed by the express license under which they
// were provided to you ("License"). Unless the License provides otherwise,
// you may not use, modify, copy, publish, distribute, disclose or transmit
// this software or the related documents without Intel's prior written
// permission.
//
// This software and the related documents are provided as is, with no express
// or implied warranties, other than those that are expressly stated in the
// License.

//see dla_hld_ram.sv for a description of the parameters, ports, and general functionality of all the dla_hld_ram layers

//this layer is adds error correction codes, specifically single error correct double error detect hamming codes

`default_nettype none

`include "dla_acl_parameter_assert.svh"

module dla_hld_ram_ecc
import dla_acl_ecc_pkg::*;
#(
    //geometry configuration
    parameter  int DEPTH,
    parameter  int WIDTH,
    parameter  int BE_WIDTH,

    //geometry constants
    parameter  bit MINIMIZE_MEMORY_USAGE,
    parameter  int MIN_PHYSICAL_DEPTH,

    //memory initialization
    parameter  bit USE_MEM_INIT_FILE,
    parameter  bit ZERO_INITIALIZE_MEM,
    parameter      MEM_INIT_NAME,

    //error correction codes -- these parameters are consumed at this layer, layers below do not have them
    parameter  bit ENABLE_ECC,
    parameter  bit ECC_STATUS_TIME_STRETCH,
    parameter  bit ASYNC_RESET,
    parameter  bit SYNCHRONIZE_RESET,

    //memory configuration
    parameter      RAM_BLOCK_TYPE,
    parameter      RAM_OPERATION_MODE,
    parameter      DEVICE_FAMILY,
    parameter      READ_DURING_WRITE,
    parameter  bit REGISTER_A_READDATA,
    parameter  bit REGISTER_B_ADDRESS,
    parameter  bit REGISTER_B_READDATA,

    //try to use memory hardened logic
    parameter  bit USE_ENABLE,
    parameter  bit COMMON_IN_CLOCK_EN,
    parameter  bit COMMON_OUT_CLOCK_EN,

    //derived parameters
    localparam int ADDR = $clog2(DEPTH)
) (
    input  wire                 clock,

    //port a
    input  wire      [ADDR-1:0] a_address,
    input  wire                 a_write,
    input  wire     [WIDTH-1:0] a_writedata,
    input  wire  [BE_WIDTH-1:0] a_byteenable,
    output logic    [WIDTH-1:0] a_readdata,
    input  wire                 a_in_clock_en,
    input  wire                 a_out_clock_en,
    input  wire                 a_read_enable,

    //port b
    input  wire      [ADDR-1:0] b_address,
    input  wire                 b_write,
    input  wire     [WIDTH-1:0] b_writedata,
    input  wire  [BE_WIDTH-1:0] b_byteenable,
    output logic    [WIDTH-1:0] b_readdata,
    input  wire                 b_in_clock_en,
    input  wire                 b_out_clock_en,
    input  wire                 b_read_enable,

    //error correction code -- these signals are consumed at this layer, layers below do not have them
    input  wire                 resetn,
    output logic          [1:0] ecc_err_status
);

    ///////////////////////
    //  Legality checks  //
    ///////////////////////

    generate
    //width / be_width must divide evenly with no remainder
    `DLA_ACL_PARAMETER_ASSERT(WIDTH % BE_WIDTH == 0)
    endgenerate



    //////////////////////////
    //  Derived parameters  //
    //////////////////////////

    //note: the max group size of 32 has been deliberately chosen, after encoding this expands to 39 bits which maps nicely to the physical width of m20k and mlab

    localparam int BITS_PER_ENABLE = WIDTH / BE_WIDTH;      //how many bits of data are controlled by each byte enable signal, typically we think of this as 8 but really can be any value
    localparam int MAX_ECC_GROUP_SIZE = 32;                 //if data is wide, slice into smaller sections and encode each section independently, limit the xor network size to maintain high fmax
    localparam int ECC_GROUP_SIZE = (BITS_PER_ENABLE > MAX_ECC_GROUP_SIZE) ? MAX_ECC_GROUP_SIZE : BITS_PER_ENABLE;                      //cannot jointly encode data from different byte enable groups
    localparam int ENCODED_BITS_PER_ENABLE = (ENABLE_ECC) ? getEncodedBitsEccGroup(BITS_PER_ENABLE, ECC_GROUP_SIZE) : BITS_PER_ENABLE;  //how many encoded bits of data are controlled by each byte enable signal
    localparam int ENCODED_WIDTH = ENCODED_BITS_PER_ENABLE * BE_WIDTH;                                                                  //total data width after encoding



    ////////////////////////////////////////////////////////////////
    //  Encode the write data, respecting byte enable boundaries  //
    ////////////////////////////////////////////////////////////////

    genvar g;
    logic [ENCODED_WIDTH-1:0] encoded_a_writedata, encoded_b_writedata;
    logic [ENCODED_WIDTH-1:0] encoded_a_readdata, encoded_b_readdata;
    logic [ENCODED_WIDTH-1:0] encoded_a_readdata_raw, encoded_b_readdata_raw;

    generate
    if (ENABLE_ECC) begin : ECC_ENCODE
        //each byte enable signal controls BITS_PER_ENABLE bits of the data path, to respect this boundary never ecc encode across different groups
        //it is possible that the group could be large, so we would want multiple ecc encoders within that group to limit the size of the xor network
        //dla_acl_ecc_encoder already does that for us, however we may need non-uniform slicing, so we still have to deal with that here

        //example scenario, suppose WIDTH = 98 and BE_WIDTH = 2, therefore BITS_PER_ENABLE = 49
        //given that MAX_ECC_GROUP_SIZE = 32, this is how the data should be sliced up:
        //dla_acl_ecc_encoder instance 0 -- ecc instance 0 handles bits 31:0, ecc instance 1 handles bits 48:32
        //dla_acl_ecc_encoder instance 1 -- ecc instance 0 handles bits 80:49, ecc instance 1 handles bits 97:81

        //the above layout cannot be achieved with only one instance of dla_acl_ecc_encoder, there is no way to alternate between slicing 32 and 17 bits

        for (g=0; g<BE_WIDTH; g++) begin
            dla_acl_ecc_encoder
            #(
                .DATA_WIDTH                 (BITS_PER_ENABLE),
                .ECC_GROUP_SIZE             (ECC_GROUP_SIZE),
                .INPUT_PIPELINE_STAGES      (0),        //must use zero latency to maintain the conceptual clock enable model of dla_hld_ram
                .OUTPUT_PIPELINE_STAGES     (0)         //likewise as above
            )
            dla_acl_ecc_encoder_inst_a
            (
                .clock                      (clock),    //this currently has no effect since the number of pipeline stages is 0
                .clock_enable               (1'b1),     //this currently has no effect since the number of pipeline stages is 0
                .i_data                     (a_writedata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
                .o_encoded                  (encoded_a_writedata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE])
            );

            dla_acl_ecc_encoder
            #(
                .DATA_WIDTH                 (BITS_PER_ENABLE),
                .ECC_GROUP_SIZE             (ECC_GROUP_SIZE),
                .INPUT_PIPELINE_STAGES      (0),
                .OUTPUT_PIPELINE_STAGES     (0)
            )
            dla_acl_ecc_encoder_inst_b
            (
                .clock                      (clock),
                .clock_enable               (1'b1),
                .i_data                     (b_writedata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
                .o_encoded                  (encoded_b_writedata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE])
            );
        end
    end
    else begin : NO_ENCODE
        assign encoded_a_writedata = a_writedata;
        assign encoded_b_writedata = b_writedata;
    end
    endgenerate



    /////////////////////////////////////////////////
    //  Next layer in the instantiation hierarchy  //
    /////////////////////////////////////////////////

    dla_hld_ram_tall_depth_stitch
    #(
        .DEPTH                  (DEPTH),
        .WIDTH                  (ENCODED_WIDTH),            //changed
        .BE_WIDTH               (BE_WIDTH),
        .MINIMIZE_MEMORY_USAGE  (MINIMIZE_MEMORY_USAGE),
        .MIN_PHYSICAL_DEPTH     (MIN_PHYSICAL_DEPTH),
        .USE_MEM_INIT_FILE      (USE_MEM_INIT_FILE),
        .ZERO_INITIALIZE_MEM    (ZERO_INITIALIZE_MEM),
        .MEM_INIT_NAME          (MEM_INIT_NAME),
        .RAM_BLOCK_TYPE         (RAM_BLOCK_TYPE),
        .RAM_OPERATION_MODE     (RAM_OPERATION_MODE),
        .DEVICE_FAMILY          (DEVICE_FAMILY),
        .READ_DURING_WRITE      (READ_DURING_WRITE),
        .REGISTER_A_READDATA    (REGISTER_A_READDATA),
        .REGISTER_B_ADDRESS     (REGISTER_B_ADDRESS),
        .REGISTER_B_READDATA    (REGISTER_B_READDATA),
        .USE_ENABLE             (USE_ENABLE),
        .COMMON_IN_CLOCK_EN     (COMMON_IN_CLOCK_EN),
        .COMMON_OUT_CLOCK_EN    (COMMON_OUT_CLOCK_EN)
    )
    dla_hld_ram_tall_depth_stitch_inst
    (
        .clock                  (clock),
        .a_address              (a_address),
        .a_write                (a_write),
        .a_writedata            (encoded_a_writedata),      //changed
        .a_byteenable           (a_byteenable),
        .a_readdata             (encoded_a_readdata_raw),   //changed
        .a_in_clock_en          (a_in_clock_en),
        .a_out_clock_en         (a_out_clock_en),
        .a_read_enable          (a_read_enable),
        .b_address              (b_address),
        .b_write                (b_write),
        .b_writedata            (encoded_b_writedata),      //changed
        .b_byteenable           (b_byteenable),
        .b_readdata             (encoded_b_readdata_raw),   //changed
        .b_in_clock_en          (b_in_clock_en),
        .b_out_clock_en         (b_out_clock_en),
        .b_read_enable          (b_read_enable)
    );

    //imitate the query functions in the software model
    // synthesis translate_off
    int NUM_PHYSICAL_M20K, NUM_PHYSICAL_MLAB;
    assign NUM_PHYSICAL_M20K = dla_hld_ram_tall_depth_stitch_inst.NUM_PHYSICAL_M20K;
    assign NUM_PHYSICAL_MLAB = dla_hld_ram_tall_depth_stitch_inst.NUM_PHYSICAL_MLAB;
    // synthesis translate_on



    ////////////////////////////////
    //  Sim-only error injection  //
    ////////////////////////////////

    //leave a hook for injecting errors into the read data, intended for simulation only
    logic [ENCODED_WIDTH-1:0] SIM_ONLY_a_inject_error, SIM_ONLY_b_inject_error;
    assign SIM_ONLY_a_inject_error = 0;     //these signals are forced by the testbench
    assign SIM_ONLY_b_inject_error = 0;
    assign encoded_a_readdata = encoded_a_readdata_raw ^ SIM_ONLY_a_inject_error;
    assign encoded_b_readdata = encoded_b_readdata_raw ^ SIM_ONLY_b_inject_error;



    ///////////////////////////////////////////////////////////
    //  Decode the read data and produce ECC status signals  //
    ///////////////////////////////////////////////////////////

    localparam bit CONNECT_A_READDATA_TO_ECC = RAM_OPERATION_MODE == "TRUE_DUAL_PORT";  //ignore port a read data if simple dual port

    generate
    if (ENABLE_ECC) begin : ECC_DECODE
        logic [BE_WIDTH-1:0] a_single_error, a_double_error;
        logic [BE_WIDTH-1:0] b_single_error, b_double_error;
        logic any_single_error, any_double_error;

        for (g=0; g<BE_WIDTH; g++) begin
            if (CONNECT_A_READDATA_TO_ECC) begin
                dla_acl_ecc_decoder
                #(
                    .DATA_WIDTH                 (BITS_PER_ENABLE),
                    .ECC_GROUP_SIZE             (ECC_GROUP_SIZE),
                    .INPUT_PIPELINE_STAGES      (0),        //must use zero latency to maintain the conceptual clock enable model of dla_hld_ram
                    .OUTPUT_PIPELINE_STAGES     (0),        //likewise as above
                    .STATUS_PIPELINE_STAGES     (0)         //likewise as above
                )
                dla_acl_ecc_decoder_inst_a
                (
                    .clock                      (clock),    //this currently has no effect since the number of pipeline stages is 0
                    .clock_enable               (1'b1),     //this currently has no effect since the number of pipeline stages is 0
                    .i_encoded                  (encoded_a_readdata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE]),
                    .o_data                     (a_readdata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
                    .o_single_error_corrected   (a_single_error[g]),
                    .o_double_error_detected    (a_double_error[g])
                );
            end
            else begin
                assign a_readdata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE] = 'x;
                assign a_single_error[g] = '0;
                assign a_double_error[g] = '0;
            end

            dla_acl_ecc_decoder
            #(
                .DATA_WIDTH                 (BITS_PER_ENABLE),
                .ECC_GROUP_SIZE             (ECC_GROUP_SIZE),
                .INPUT_PIPELINE_STAGES      (0),
                .OUTPUT_PIPELINE_STAGES     (0),
                .STATUS_PIPELINE_STAGES     (0)
            )
            dla_acl_ecc_decoder_inst_b
            (
                .clock                      (clock),
                .clock_enable               (1'b1),
                .i_encoded                  (encoded_b_readdata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE]),
                .o_data                     (b_readdata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
                .o_single_error_corrected   (b_single_error[g]),
                .o_double_error_detected    (b_double_error[g])
            );
        end

        assign any_single_error = (|a_single_error) | (|b_single_error);
        assign any_double_error = (|a_double_error) | (|b_double_error);

        if (ECC_STATUS_TIME_STRETCH) begin
            dla_hld_ram_ecc_pulse_stretch_and_sticky #(
                .ASYNC_RESET                (ASYNC_RESET),
                .SYNCHRONIZE_RESET          (SYNCHRONIZE_RESET),
                .SINGLE_ERROR_PULSE_STRETCH (3)     //this is existing behavior from dla_acl_altera_syncram_wrapped
            )
            dla_hld_ram_ecc_pulse_stretch_and_sticky_inst
            (
                .clock                      (clock),
                .resetn                     (resetn),
                .i_single_error_corrected   (any_single_error),
                .i_double_error_detected    (any_double_error),
                .o_ecc_err_status           (ecc_err_status)
            );
        end
        else begin
            assign ecc_err_status = {any_single_error, any_double_error};
        end
    end
    else begin : NO_DECODE
        if (CONNECT_A_READDATA_TO_ECC) begin
            assign a_readdata = encoded_a_readdata;
        end
        else begin
            assign a_readdata = 'x;
        end
        assign b_readdata = encoded_b_readdata;
        assign ecc_err_status = 2'h0;
    end
    endgenerate

endmodule




//this is a helper module to convert the raw signals from the ECC decoder into something suitable for lazy collection
//assuming bit errors are rare, one way to monitor the ECC status signals from all memories is to simply OR the status signals from all instances
//these may be physically spread across the FPGA, so pulse stretch them so that they can be collected on a slower clock (or by using a multicycle clock constraint)

module dla_hld_ram_ecc_pulse_stretch_and_sticky #(
    parameter  bit ASYNC_RESET,                 //how do registers CONSUME reset, 1 = asynchronously, 0 = synchronously
    parameter  bit SYNCHRONIZE_RESET,           //should be reset be synchronized BEFORE it is consumed, 1 = synchronize it, 0 = no change to reset before consumption
    parameter  int SINGLE_ERROR_PULSE_STRETCH   //at least 1, how many clock cycles to pulse stretch any single bit error, a value of 3 means an input high for one clock cycle results in an output high for four clocks
) (
    input  wire         clock,
    input  wire         resetn,
    input  wire         i_single_error_corrected,
    input  wire         i_double_error_detected,
    output logic  [1:0] o_ecc_err_status
);

    //the double error detected status is a sticky bit, only reset can clear it, the intent being one should probably restart the system if an uncorrectable error is seen
    logic aclrn, sclrn;
    dla_acl_reset_handler
    #(
        .ASYNC_RESET            (ASYNC_RESET),
        .USE_SYNCHRONIZER       (SYNCHRONIZE_RESET),
        .SYNCHRONIZE_ACLRN      (SYNCHRONIZE_RESET),
        .PULSE_EXTENSION        (0),
        .PIPE_DEPTH             (1),
        .NUM_COPIES             (1)
    )
    dla_acl_reset_handler_inst
    (
        .clk                    (clock),
        .i_resetn               (resetn),
        .o_aclrn                (aclrn),
        .o_resetn_synchronized  (),
        .o_sclrn                (sclrn)
    );

    logic [SINGLE_ERROR_PULSE_STRETCH-1:0] single_error_history;
    logic single_error_pulse_stretched;
    logic double_error_latched;

    always_ff @(posedge clock or negedge aclrn) begin
        if (~aclrn) begin
            single_error_history <= '0;
            single_error_pulse_stretched <= 1'b0;
            double_error_latched <= 1'b0;
        end
        else begin
            single_error_history[0] <= i_single_error_corrected;
            for (int i=1; i<SINGLE_ERROR_PULSE_STRETCH; i++) single_error_history[i] <= single_error_history[i-1];
            single_error_pulse_stretched <= i_single_error_corrected | (|single_error_history);
            double_error_latched <= double_error_latched | i_double_error_detected;
            if (~sclrn) begin
                single_error_history <= '0;
                single_error_pulse_stretched <= 1'b0;
                double_error_latched <= 1'b0;
            end
        end
    end

    assign o_ecc_err_status = {single_error_pulse_stretched, double_error_latched};

endmodule

`default_nettype wire