python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_ecc_encoder.sv


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225

// Copyright 2020 Intel Corporation.
//
// This software and the related documents are Intel copyrighted materials,
// and your use of them is governed by the express license under which they
// were provided to you ("License"). Unless the License provides otherwise,
// you may not use, modify, copy, publish, distribute, disclose or transmit
// this software or the related documents without Intel's prior written
// permission.
//
// This software and the related documents are provided as is, with no express
// or implied warranties, other than those that are expressly stated in the
// License.

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
//  ACL ECC ENCODER
//
//  This module encodes data using a single error correct, double error detect Hamming code. As the data width get large,
//  so will the xor network and that would limit fmax. To resolve this, we slice the data into smaller groups and encode
//  each independently. Essentially we trade off more memory overhead for parity bits in order to limit the fmax
//  degradation due to ECC.
//
//  The user must specify the data width and the slicing size. From this, one can compute the number of parity bits and
//  total encoded bits (see the calculations in localparams below).
//
//  Reset: there is no reset. Pipeline stages are purely feed-forward, the intent is that reset will propagate through.
//
//  This module is actually a wrapper around the actual ECC implementation in secded_encoder. Here is the architecture.
//  For example, suppose DATA_WIDTH is 70 and ECC_GROUP_SIZE is 32, then we will slice input data into 32 + 32 + 6, and
//  3 encoders are used to produce 39 + 39 + 11 encoded bits.
//
//                                 i_data[69:0]
//                                      |
//  +------------------------------------------------------------------------+
//  |                     optional input pipeline stages                     |
//  +------------------------------------------------------------------------+
//          |                           |                           |
//      data[69:64]                 data[63:32]                 data[31:0]
//          |                           |                           |
//  +----------------+          +----------------+          +----------------+
//  | secded_encoder |          | secded_encoder |          | secded_encoder |
//  +----------------+          +----------------+          +----------------+
//          |                           |                           |
//    encoded[88:78]              encoded[77:39]              encoded[38:0]
//          |                           |                           |
//  +------------------------------------------------------------------------+
//  |                     optional output pipeline stages                    |
//  +------------------------------------------------------------------------+
//                                      |
//                                o_encoded[88:0]
//
//  Required files:
//  - dla_acl_ecc_encoder.sv
//  - dla_acl_ecc_pkg.sv
//
//  Related files (to do the corresponding decoding that this file encodes):
//  - dla_acl_ecc_decoder.sv
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

`default_nettype none

//BEWARE: do not leave the "clock_enable" input port disconnected if any pipeline stages are used, it will default to 0 and nothing will go through

module dla_acl_ecc_encoder
import dla_acl_ecc_pkg::*;
#(
    parameter int DATA_WIDTH,                   //number of bits in the unencoded input data
    parameter int ECC_GROUP_SIZE,               //how many bits of unencoded data to group into one ecc block, see description in header comments
    parameter int INPUT_PIPELINE_STAGES = 0,    //number of pipeline stages between i_data and the ecc encoder
    parameter int OUTPUT_PIPELINE_STAGES = 0    //number of pipeline stages between the ecc encoder and o_encoded
)
(
    input  wire                                                          clock,         //clock is only needed if pipeline stages are nonzero
    input  wire                                                          clock_enable,  //set to 1 to sample i_data, intended for integration with altera_syncram, only needed if pipeline stages are nonzero
    input  wire  [DATA_WIDTH-1:0]                                        i_data,        //unencoded input data
    output logic [getEncodedBitsEccGroup(DATA_WIDTH,ECC_GROUP_SIZE)-1:0] o_encoded      //encoded output data
);

    //helper functions for determining number of bits are defined in dla_acl_ecc.svh
    localparam int ECC_NUM_GROUPS  = getNumGroups(DATA_WIDTH,ECC_GROUP_SIZE);           //how many groups to slice the data into
    localparam int LAST_GROUP_SIZE = getLastGroupSize(DATA_WIDTH,ECC_GROUP_SIZE);       //all groups have size ECC_GROUP_SIZE except possibly the last group which may be smaller since it gets the remaining bits
    localparam int ENCODED_BITS    = getEncodedBitsEccGroup(DATA_WIDTH,ECC_GROUP_SIZE);

    //internal signals
    genvar g;
    logic [DATA_WIDTH-1:0] data;
    logic [ENCODED_BITS-1:0] encoded;

    //input pipeline stages
    generate
    if (INPUT_PIPELINE_STAGES == 0) begin
        assign data = i_data;
    end
    else begin
        logic [DATA_WIDTH-1:0] data_pipe [INPUT_PIPELINE_STAGES-1:0];
        always_ff @(posedge clock) begin
            if (clock_enable) data_pipe[0] <= i_data;
        end
        for (g=1; g<INPUT_PIPELINE_STAGES; g++) begin : gen_input_pipe
            always_ff @(posedge clock) begin
                data_pipe[g] <= data_pipe[g-1];
            end
        end
        assign data = data_pipe[INPUT_PIPELINE_STAGES-1];
    end
    endgenerate

    //slice the data for each encoder
    generate
    for (g=0; g<ECC_NUM_GROUPS; g++) begin : gen_encoder
        localparam int RAW_BASE = ECC_GROUP_SIZE*g;
        localparam int ENC_BASE = getEncodedBits(ECC_GROUP_SIZE)*g;
        localparam int RAW_WIDTH = (g==ECC_NUM_GROUPS-1) ? LAST_GROUP_SIZE : ECC_GROUP_SIZE;
        localparam int ENC_WIDTH = getEncodedBits(RAW_WIDTH);

        secded_encoder #(
            .DATA_WIDTH (RAW_WIDTH)
        )
        secded_encoder_inst
        (
            .i_data     (data[RAW_BASE +: RAW_WIDTH]),
            .o_encoded  (encoded[ENC_BASE +: ENC_WIDTH])
        );
    end
    endgenerate

    //output pipeline stages
    generate
    if (OUTPUT_PIPELINE_STAGES == 0) begin
        assign o_encoded = encoded;
    end
    else begin
        logic [ENCODED_BITS-1:0] encoded_pipe [OUTPUT_PIPELINE_STAGES-1:0];
        if (INPUT_PIPELINE_STAGES == 0) begin    //this is the first pipeline stage
            always_ff @(posedge clock) begin
                if (clock_enable) encoded_pipe[0] <= encoded;
            end
        end
        else begin  //there was a previous pipeline in the input stage which would have captured the clock enable
            always_ff @(posedge clock) begin
                encoded_pipe[0] <= encoded;
            end
        end
        for (g=1; g<OUTPUT_PIPELINE_STAGES; g++) begin : gen_output_pipe
            always_ff @(posedge clock) begin
                encoded_pipe[g] <= encoded_pipe[g-1];
            end
        end
        assign o_encoded = encoded_pipe[OUTPUT_PIPELINE_STAGES-1];
    end
    endgenerate

endmodule
//end dla_acl_ecc_encoder


// Hamming code encoder, single error correct, double error detect
//
// This implementation follows the bit mapping as shown on Wikipedia, parity bits are added at power of 2 locations, data bits go in between
// For example, with DATA_WIDTH = 11, we have 4 Hamming parity bits and one overall parity bit, so the bit locations will looks like this, d means data, p means parity
// [0] = p0, [1] = p1, [2] = p2, [3] = d0, [4] = p3, [5] = d1, [6] = d2, [7] = d3, [8] = p4, [9] = d4, [10] = d5, [11] = d6, [12] = d7, [13] = d8, [14] = d9, [15] = d10

module secded_encoder
import dla_acl_ecc_pkg::*;
#(
    parameter int DATA_WIDTH                                    //number of bits in the unencoded input data
) (
    input  wire  [DATA_WIDTH-1:0]                 i_data,       //unencoded input data
    output logic [getEncodedBits(DATA_WIDTH)-1:0] o_encoded     //encoded output data
);

    //helper functions for determining number of bits are defined in dla_acl_ecc.svh
    localparam int PARITY_BITS = getParityBits(DATA_WIDTH);
    localparam int ENCODED_BITS = getEncodedBits(DATA_WIDTH);

    //parity bits go at power of 2 bit locations, data bits go in between
    //for example, with DATA_WIDTH = 11, we have 5 parity bits and the bit locations will looks like this, d means data, p means parity
    //[0] = p0, [1] = p1, [2] = p2, [3] = d0, [4] = p3, [5] = d1, [6] = d2, [7] = d3, [8] = p4, [9] = d4, [10] = d5, [11] = d6, [12] = d7, [13] = d8, [14] = d9, [15] = d10
    logic [ENCODED_BITS-1:0] data_expanded;
    always_comb begin
        for (int enc_index=0, data_index=0; enc_index<ENCODED_BITS; enc_index++) begin
            if (enc_index == 0 || (2**$clog2(enc_index)) == enc_index) begin    //enc_index is a power of 2
                data_expanded[enc_index] = 1'b0;
            end
            else begin
                data_expanded[enc_index] = i_data[data_index];
                data_index++;
            end
        end
    end

    //compute the parity bits
    logic [PARITY_BITS-1:0] parity;
    always_comb begin
        for (int parity_index=1; parity_index<PARITY_BITS; parity_index++) begin
            parity[parity_index] = 0;
            for (int enc_index=0; enc_index<ENCODED_BITS; enc_index++) begin
                if (enc_index & (1<<(parity_index-1))) begin   //bit parity_index-1 of enc_index is 1
                    parity[parity_index] = parity[parity_index] ^ data_expanded[enc_index]; //running xor
                end
            end
        end
        parity[0] = (^parity[PARITY_BITS-1:1]) ^ (^i_data);     //overall parity
    end

    //assemble the output data
    always_comb begin
        for (int enc_index=0, parity_index=0; enc_index<ENCODED_BITS; enc_index++) begin
            if (enc_index == 0 || (2**$clog2(enc_index)) == enc_index) begin    //enc_index is a power of 2
                o_encoded[enc_index] = parity[parity_index];
                parity_index++;
            end
            else begin
                o_encoded[enc_index] = data_expanded[enc_index];
            end
        end
    end

endmodule
//end secded_encoder

`default_nettype wire