1 files changed, 257 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_ecc_decoder.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_ecc_decoder.sv
new file mode 100644
index 0000000..960686d
--- /dev/null
+++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_acl_ecc_decoder.sv
@@ -0,0 +1,257 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//  ACL ECC DECODER
+//
+//  This module decodes data using a single error correct, double error detect Hamming code. As the data width get large,
+//  so will the xor network and that would limit fmax. To resolve this, we slice the data into smaller groups and decode
+//  each independently. Essentially we trade off more memory overhead for parity bits in order to limit the fmax
+//  degradation due to ECC.
+//
+//  The user must specify the data width and the slicing size. From this, one can compute the number of parity bits and
+//  total encoded bits (see the calculations in localparams below).
+//
+//  Error reporting: for each decoder (after slicing), there are 2 status signals: single error corrected, and double
+//  error detected. Each of these signal types are OR-ed together from all of the decoders (from slicing) before being
+//  reported to the outside world. Beware that if there are two bit errors but they are in separate slicing groups, two
+//  independent decoders can correct one bit each, so this will be reported as single error corrected.
+//
+//  Reset: there is no reset. Pipeline stages are purely feed-forward, the intent is that reset will propagate through.
+//
+//  This module is actually a wrapper around the actual ECC implementation in secded_decoder. Here is the architecture.
+//  For example, suppose DATA_WIDTH is 70 and ECC_GROUP_SIZE is 32, then we will slice input data into 32 + 32 + 6, and
+//  3 encoders are used to produce 39 + 39 + 11 encoded bits.
+//
+//                                i_encoded[88:0]
+//                                      |
+//  +------------------------------------------------------------------------+
+//  |                     optional input pipeline stages                     |
+//  +------------------------------------------------------------------------+
+//          |                           |                           |
+//    encoded[88:78]              encoded[77:39]              encoded[38:0]
+//          |                           |                           |
+//  +----------------+          +----------------+          +----------------+
+//  | secded_decoder |          | secded_decoder |          | secded_decoder |
+//  +----------------+          +----------------+          +----------------+
+//          |                           |                           |
+//      data[69:64]                 data[63:32]                 data[31:0]
+//          |                           |                           |
+//  +------------------------------------------------------------------------+
+//  |                     optional output pipeline stages                    |
+//  +------------------------------------------------------------------------+
+//                                      |
+//                                o_data[69:0]
+//
+//  Everything decoder related is contained within this file. The related file that does the corresponding encoding is
+//  dla_acl_ecc_encoder.sv. Note both encoder and decoder require dla_acl_ecc_pkg.sv.
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+`default_nettype none
+
+//BEWARE: do not leave the "clock_enable" input port disconnected if any pipeline stages are used, it will default to 0 and nothing will go through
+
+module dla_acl_ecc_decoder
+import dla_acl_ecc_pkg::*;
+#(
+    parameter int DATA_WIDTH,                   //number of bits in the decoded output data
+    parameter int ECC_GROUP_SIZE,               //how many bits of unencoded data to group into one ecc block, see description in header comments
+    parameter int INPUT_PIPELINE_STAGES = 0,    //number of pipeline stages between i_encoded and the ecc decoder
+    parameter int OUTPUT_PIPELINE_STAGES = 0,   //number of pipeline stages between the ecc decoder and o_data
+    parameter int STATUS_PIPELINE_STAGES = 0    //number of pipeline stages between the ecc decoder and o_single_error_corrected/o_double_error_detected
+)
+(
+    input  wire                                                          clock,                     //clock is only needed if pipeline stages are nonzero
+    input  wire                                                          clock_enable,              //set to 1 to sample i_encoded, intended for integration with altera_syncram, only needed if pipeline stages are nonzero
+    input  wire  [getEncodedBitsEccGroup(DATA_WIDTH,ECC_GROUP_SIZE)-1:0] i_encoded,                 //encoded input data
+    output logic [DATA_WIDTH-1:0]                                        o_data,                    //decoded output data
+    output logic                                                         o_single_error_corrected,  //at least one ecc decoder corrected a single bit error within their ecc group
+    output logic                                                         o_double_error_detected    //at least one ecc decoder detected a double bit error within their ecc group
+);
+
+    //helper functions for determining number of bits are defined in dla_acl_ecc.svh
+    localparam int ECC_NUM_GROUPS  = getNumGroups(DATA_WIDTH,ECC_GROUP_SIZE);           //how many groups to slice the data into
+    localparam int LAST_GROUP_SIZE = getLastGroupSize(DATA_WIDTH,ECC_GROUP_SIZE);       //all groups have size ECC_GROUP_SIZE except possibly the last group which may be smaller since it gets the remaining bits
+    localparam int ENCODED_BITS    = getEncodedBitsEccGroup(DATA_WIDTH,ECC_GROUP_SIZE);
+
+    //internal signals
+    genvar g;
+    logic [ENCODED_BITS-1:0] encoded;
+    logic [DATA_WIDTH-1:0] data;
+    logic [2*ECC_NUM_GROUPS-1:0] error_status;
+    logic [ECC_NUM_GROUPS-1:0] single_error_corrected;
+    logic [ECC_NUM_GROUPS-1:0] double_error_detected;
+
+    //input pipeline stages
+    generate
+    if (INPUT_PIPELINE_STAGES == 0) begin
+        assign encoded = i_encoded;
+    end
+    else begin
+        logic [ENCODED_BITS-1:0] encoded_pipe [INPUT_PIPELINE_STAGES-1:0];
+        always_ff @(posedge clock) begin    //only the first pipeline stage needs a clock enable, the remaining pipeline stages will load the same data when the clock enable propagates there
+            if (clock_enable) encoded_pipe[0] <= i_encoded;
+        end
+        for (g=1; g<INPUT_PIPELINE_STAGES; g++) begin : gen_input_pipe
+            always_ff @(posedge clock) begin
+                encoded_pipe[g] <= encoded_pipe[g-1];
+            end
+        end
+        assign encoded = encoded_pipe[INPUT_PIPELINE_STAGES-1];
+    end
+    endgenerate
+
+    //slice the data for each decoder
+    generate
+    for (g=0; g<ECC_NUM_GROUPS; g++) begin : gen_decoder
+        localparam int RAW_BASE = ECC_GROUP_SIZE*g;
+        localparam int ENC_BASE = getEncodedBits(ECC_GROUP_SIZE)*g;
+        localparam int RAW_WIDTH = (g==ECC_NUM_GROUPS-1) ? LAST_GROUP_SIZE : ECC_GROUP_SIZE;
+        localparam int ENC_WIDTH = getEncodedBits(RAW_WIDTH);
+
+        secded_decoder #(
+            .DATA_WIDTH               (RAW_WIDTH)
+        )
+        secded_encoder_inst
+        (
+            .i_encoded                (encoded[ENC_BASE +: ENC_WIDTH]),
+            .o_data                   (data[RAW_BASE +: RAW_WIDTH]),
+            .o_single_error_corrected (error_status[g]),
+            .o_double_error_detected  (error_status[g+ECC_NUM_GROUPS])
+        );
+    end
+    endgenerate
+
+    //output pipeline stages
+    generate
+    if (OUTPUT_PIPELINE_STAGES == 0) begin
+        assign o_data = data;
+    end
+    else begin
+        logic [DATA_WIDTH-1:0] data_pipe [OUTPUT_PIPELINE_STAGES-1:0];
+        if (INPUT_PIPELINE_STAGES == 0) begin    //this is the first pipeline stage
+            always_ff @(posedge clock) begin
+                if (clock_enable) data_pipe[0] <= data;
+            end
+        end
+        else begin  //there was a previous pipeline in the input stage which would have captured the clock enable
+            always_ff @(posedge clock) begin
+                data_pipe[0] <= data;
+            end
+        end
+        for (g=1; g<OUTPUT_PIPELINE_STAGES; g++) begin : gen_output_pipe
+            always_ff @(posedge clock) begin
+                data_pipe[g] <= data_pipe[g-1];
+            end
+        end
+        assign o_data = data_pipe[OUTPUT_PIPELINE_STAGES-1];
+    end
+    endgenerate
+
+    //error status pipeline stages
+    generate
+    if (STATUS_PIPELINE_STAGES == 0) begin
+        assign {double_error_detected, single_error_corrected} = error_status;
+    end
+    else begin
+        logic [2*ECC_NUM_GROUPS-1:0] error_status_pipe [STATUS_PIPELINE_STAGES-1:0];
+        if (INPUT_PIPELINE_STAGES == 0) begin    //this is the first pipeline stage
+            always_ff @(posedge clock) begin
+                if (clock_enable) error_status_pipe[0] <= error_status;
+            end
+        end
+        else begin  //there was a previous pipeline in the input stage which would have captured the clock enable
+            always_ff @(posedge clock) begin
+                error_status_pipe[0] <= error_status;
+            end
+        end
+        for (g=1; g<STATUS_PIPELINE_STAGES; g++) begin : gen_status_pipe
+            always_ff @(posedge clock) begin
+                error_status_pipe[g] <= error_status_pipe[g-1];
+            end
+        end
+        assign {double_error_detected, single_error_corrected} = error_status_pipe[STATUS_PIPELINE_STAGES-1];
+    end
+    endgenerate
+    assign o_single_error_corrected = |single_error_corrected;
+    assign o_double_error_detected = |double_error_detected;
+
+endmodule
+//end dla_acl_ecc_decoder
+
+
+
+
+
+// Hamming code decoder, single error correct, double error detect
+//
+// This implementation follows the bit mapping as shown on Wikipedia, parity bits are added at power of 2 locations, data bits go in between
+// For example, with DATA_WIDTH = 11, we have 4 Hamming parity bits and one overall parity bit, so the bit locations will looks like this, d means data, p means parity
+// [0] = p0, [1] = p1, [2] = p2, [3] = d0, [4] = p3, [5] = d1, [6] = d2, [7] = d3, [8] = p4, [9] = d4, [10] = d5, [11] = d6, [12] = d7, [13] = d8, [14] = d9, [15] = d10
+
+module secded_decoder
+import dla_acl_ecc_pkg::*;
+#(
+    parameter int DATA_WIDTH
+) (
+    input  wire  [getEncodedBits(DATA_WIDTH)-1:0] i_encoded,                //encoded input data
+    output logic [DATA_WIDTH-1:0]                 o_data,                   //decoded output data
+    output logic                                  o_single_error_corrected, //asserts when one bit of encoded data is wrong, this will be reported and corrected
+    output logic                                  o_double_error_detected   //asserts when two bits of encoded data are wrong, this will only be reported and not corrected
+);
+
+    //helper functions for determining number of bits are defined in dla_acl_ecc.svh
+    localparam int PARITY_BITS = getParityBits(DATA_WIDTH);
+    localparam int ENCODED_BITS = getEncodedBits(DATA_WIDTH);
+
+    //compute the parity bits
+    logic [PARITY_BITS-1:0] parity;
+    always_comb begin
+        for (int parity_index=1; parity_index<PARITY_BITS; parity_index++) begin
+            parity[parity_index] = 0;
+            for (int enc_index=0; enc_index<ENCODED_BITS; enc_index++) begin
+                if (enc_index & (1<<(parity_index-1))) begin   //bit parity_index-1 of enc_index is 1
+                    parity[parity_index] = parity[parity_index] ^ i_encoded[enc_index]; //running xor
+                end
+            end
+        end
+        parity[0] = ^i_encoded; //overall parity
+    end
+
+    //syndrome indicates which bits was wrong, if any
+    logic [PARITY_BITS-2:0] syndrome;
+    assign syndrome = parity[PARITY_BITS-1:1];
+
+    //report if there was 1 bit or 2 bit errors respectively
+    assign o_single_error_corrected = parity[0];    //odd number of errors, 1 error gets corrected, 3 errors is not correctable and mapping to the word of minimum hamming distance will give incorrect data
+    assign o_double_error_detected = ~parity[0] && (syndrome != 0);    //even number of errors, 0 errors results in syndrome == 0, 2 error will have a nonzero syndrome
+
+    //extract out the data bits, and correct if there is a single bit error
+    //parity bits are at power of 2 bit locations, data bits are in between
+    //for example, with DATA_WIDTH = 11, we have 5 parity bits and the bit locations will looks like this, d means data, p means parity
+    //[0] = p0, [1] = p1, [2] = p2, [3] = d0, [4] = p3, [5] = d1, [6] = d2, [7] = d3, [8] = p4, [9] = d4, [10] = d5, [11] = d6, [12] = d7, [13] = d8, [14] = d9, [15] = d10
+    always_comb begin
+        for (int enc_index=0, data_index=0; enc_index<ENCODED_BITS; enc_index++) begin
+            if (!(enc_index == 0 || (2**$clog2(enc_index)) == enc_index)) begin    //enc_index is not a power of 2
+                o_data[data_index] = (enc_index==syndrome) ? ~i_encoded[enc_index] : i_encoded[enc_index];
+                data_index++;
+            end
+        end
+    end
+
+endmodule
+//end secded_decoder
+
+`default_nettype wire