summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram_ecc.sv
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram_ecc.sv')
-rw-r--r--python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram_ecc.sv403
1 files changed, 403 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram_ecc.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram_ecc.sv
new file mode 100644
index 0000000..4f9289e
--- /dev/null
+++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram_ecc.sv
@@ -0,0 +1,403 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+//see dla_hld_ram.sv for a description of the parameters, ports, and general functionality of all the dla_hld_ram layers
+
+//this layer is adds error correction codes, specifically single error correct double error detect hamming codes
+
+`default_nettype none
+
+`include "dla_acl_parameter_assert.svh"
+
+module dla_hld_ram_ecc
+import dla_acl_ecc_pkg::*;
+#(
+ //geometry configuration
+ parameter int DEPTH,
+ parameter int WIDTH,
+ parameter int BE_WIDTH,
+
+ //geometry constants
+ parameter bit MINIMIZE_MEMORY_USAGE,
+ parameter int MIN_PHYSICAL_DEPTH,
+
+ //memory initialization
+ parameter bit USE_MEM_INIT_FILE,
+ parameter bit ZERO_INITIALIZE_MEM,
+ parameter MEM_INIT_NAME,
+
+ //error correction codes -- these parameters are consumed at this layer, layers below do not have them
+ parameter bit ENABLE_ECC,
+ parameter bit ECC_STATUS_TIME_STRETCH,
+ parameter bit ASYNC_RESET,
+ parameter bit SYNCHRONIZE_RESET,
+
+ //memory configuration
+ parameter RAM_BLOCK_TYPE,
+ parameter RAM_OPERATION_MODE,
+ parameter DEVICE_FAMILY,
+ parameter READ_DURING_WRITE,
+ parameter bit REGISTER_A_READDATA,
+ parameter bit REGISTER_B_ADDRESS,
+ parameter bit REGISTER_B_READDATA,
+
+ //try to use memory hardened logic
+ parameter bit USE_ENABLE,
+ parameter bit COMMON_IN_CLOCK_EN,
+ parameter bit COMMON_OUT_CLOCK_EN,
+
+ //derived parameters
+ localparam int ADDR = $clog2(DEPTH)
+) (
+ input wire clock,
+
+ //port a
+ input wire [ADDR-1:0] a_address,
+ input wire a_write,
+ input wire [WIDTH-1:0] a_writedata,
+ input wire [BE_WIDTH-1:0] a_byteenable,
+ output logic [WIDTH-1:0] a_readdata,
+ input wire a_in_clock_en,
+ input wire a_out_clock_en,
+ input wire a_read_enable,
+
+ //port b
+ input wire [ADDR-1:0] b_address,
+ input wire b_write,
+ input wire [WIDTH-1:0] b_writedata,
+ input wire [BE_WIDTH-1:0] b_byteenable,
+ output logic [WIDTH-1:0] b_readdata,
+ input wire b_in_clock_en,
+ input wire b_out_clock_en,
+ input wire b_read_enable,
+
+ //error correction code -- these signals are consumed at this layer, layers below do not have them
+ input wire resetn,
+ output logic [1:0] ecc_err_status
+);
+
+ ///////////////////////
+ // Legality checks //
+ ///////////////////////
+
+ generate
+ //width / be_width must divide evenly with no remainder
+ `DLA_ACL_PARAMETER_ASSERT(WIDTH % BE_WIDTH == 0)
+ endgenerate
+
+
+
+ //////////////////////////
+ // Derived parameters //
+ //////////////////////////
+
+ //note: the max group size of 32 has been deliberately chosen, after encoding this expands to 39 bits which maps nicely to the physical width of m20k and mlab
+
+ localparam int BITS_PER_ENABLE = WIDTH / BE_WIDTH; //how many bits of data are controlled by each byte enable signal, typically we think of this as 8 but really can be any value
+ localparam int MAX_ECC_GROUP_SIZE = 32; //if data is wide, slice into smaller sections and encode each section independently, limit the xor network size to maintain high fmax
+ localparam int ECC_GROUP_SIZE = (BITS_PER_ENABLE > MAX_ECC_GROUP_SIZE) ? MAX_ECC_GROUP_SIZE : BITS_PER_ENABLE; //cannot jointly encode data from different byte enable groups
+ localparam int ENCODED_BITS_PER_ENABLE = (ENABLE_ECC) ? getEncodedBitsEccGroup(BITS_PER_ENABLE, ECC_GROUP_SIZE) : BITS_PER_ENABLE; //how many encoded bits of data are controlled by each byte enable signal
+ localparam int ENCODED_WIDTH = ENCODED_BITS_PER_ENABLE * BE_WIDTH; //total data width after encoding
+
+
+
+ ////////////////////////////////////////////////////////////////
+ // Encode the write data, respecting byte enable boundaries //
+ ////////////////////////////////////////////////////////////////
+
+ genvar g;
+ logic [ENCODED_WIDTH-1:0] encoded_a_writedata, encoded_b_writedata;
+ logic [ENCODED_WIDTH-1:0] encoded_a_readdata, encoded_b_readdata;
+ logic [ENCODED_WIDTH-1:0] encoded_a_readdata_raw, encoded_b_readdata_raw;
+
+ generate
+ if (ENABLE_ECC) begin : ECC_ENCODE
+ //each byte enable signal controls BITS_PER_ENABLE bits of the data path, to respect this boundary never ecc encode across different groups
+ //it is possible that the group could be large, so we would want multiple ecc encoders within that group to limit the size of the xor network
+ //dla_acl_ecc_encoder already does that for us, however we may need non-uniform slicing, so we still have to deal with that here
+
+ //example scenario, suppose WIDTH = 98 and BE_WIDTH = 2, therefore BITS_PER_ENABLE = 49
+ //given that MAX_ECC_GROUP_SIZE = 32, this is how the data should be sliced up:
+ //dla_acl_ecc_encoder instance 0 -- ecc instance 0 handles bits 31:0, ecc instance 1 handles bits 48:32
+ //dla_acl_ecc_encoder instance 1 -- ecc instance 0 handles bits 80:49, ecc instance 1 handles bits 97:81
+
+ //the above layout cannot be achieved with only one instance of dla_acl_ecc_encoder, there is no way to alternate between slicing 32 and 17 bits
+
+ for (g=0; g<BE_WIDTH; g++) begin
+ dla_acl_ecc_encoder
+ #(
+ .DATA_WIDTH (BITS_PER_ENABLE),
+ .ECC_GROUP_SIZE (ECC_GROUP_SIZE),
+ .INPUT_PIPELINE_STAGES (0), //must use zero latency to maintain the conceptual clock enable model of dla_hld_ram
+ .OUTPUT_PIPELINE_STAGES (0) //likewise as above
+ )
+ dla_acl_ecc_encoder_inst_a
+ (
+ .clock (clock), //this currently has no effect since the number of pipeline stages is 0
+ .clock_enable (1'b1), //this currently has no effect since the number of pipeline stages is 0
+ .i_data (a_writedata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
+ .o_encoded (encoded_a_writedata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE])
+ );
+
+ dla_acl_ecc_encoder
+ #(
+ .DATA_WIDTH (BITS_PER_ENABLE),
+ .ECC_GROUP_SIZE (ECC_GROUP_SIZE),
+ .INPUT_PIPELINE_STAGES (0),
+ .OUTPUT_PIPELINE_STAGES (0)
+ )
+ dla_acl_ecc_encoder_inst_b
+ (
+ .clock (clock),
+ .clock_enable (1'b1),
+ .i_data (b_writedata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
+ .o_encoded (encoded_b_writedata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE])
+ );
+ end
+ end
+ else begin : NO_ENCODE
+ assign encoded_a_writedata = a_writedata;
+ assign encoded_b_writedata = b_writedata;
+ end
+ endgenerate
+
+
+
+ /////////////////////////////////////////////////
+ // Next layer in the instantiation hierarchy //
+ /////////////////////////////////////////////////
+
+ dla_hld_ram_tall_depth_stitch
+ #(
+ .DEPTH (DEPTH),
+ .WIDTH (ENCODED_WIDTH), //changed
+ .BE_WIDTH (BE_WIDTH),
+ .MINIMIZE_MEMORY_USAGE (MINIMIZE_MEMORY_USAGE),
+ .MIN_PHYSICAL_DEPTH (MIN_PHYSICAL_DEPTH),
+ .USE_MEM_INIT_FILE (USE_MEM_INIT_FILE),
+ .ZERO_INITIALIZE_MEM (ZERO_INITIALIZE_MEM),
+ .MEM_INIT_NAME (MEM_INIT_NAME),
+ .RAM_BLOCK_TYPE (RAM_BLOCK_TYPE),
+ .RAM_OPERATION_MODE (RAM_OPERATION_MODE),
+ .DEVICE_FAMILY (DEVICE_FAMILY),
+ .READ_DURING_WRITE (READ_DURING_WRITE),
+ .REGISTER_A_READDATA (REGISTER_A_READDATA),
+ .REGISTER_B_ADDRESS (REGISTER_B_ADDRESS),
+ .REGISTER_B_READDATA (REGISTER_B_READDATA),
+ .USE_ENABLE (USE_ENABLE),
+ .COMMON_IN_CLOCK_EN (COMMON_IN_CLOCK_EN),
+ .COMMON_OUT_CLOCK_EN (COMMON_OUT_CLOCK_EN)
+ )
+ dla_hld_ram_tall_depth_stitch_inst
+ (
+ .clock (clock),
+ .a_address (a_address),
+ .a_write (a_write),
+ .a_writedata (encoded_a_writedata), //changed
+ .a_byteenable (a_byteenable),
+ .a_readdata (encoded_a_readdata_raw), //changed
+ .a_in_clock_en (a_in_clock_en),
+ .a_out_clock_en (a_out_clock_en),
+ .a_read_enable (a_read_enable),
+ .b_address (b_address),
+ .b_write (b_write),
+ .b_writedata (encoded_b_writedata), //changed
+ .b_byteenable (b_byteenable),
+ .b_readdata (encoded_b_readdata_raw), //changed
+ .b_in_clock_en (b_in_clock_en),
+ .b_out_clock_en (b_out_clock_en),
+ .b_read_enable (b_read_enable)
+ );
+
+ //imitate the query functions in the software model
+ // synthesis translate_off
+ int NUM_PHYSICAL_M20K, NUM_PHYSICAL_MLAB;
+ assign NUM_PHYSICAL_M20K = dla_hld_ram_tall_depth_stitch_inst.NUM_PHYSICAL_M20K;
+ assign NUM_PHYSICAL_MLAB = dla_hld_ram_tall_depth_stitch_inst.NUM_PHYSICAL_MLAB;
+ // synthesis translate_on
+
+
+
+ ////////////////////////////////
+ // Sim-only error injection //
+ ////////////////////////////////
+
+ //leave a hook for injecting errors into the read data, intended for simulation only
+ logic [ENCODED_WIDTH-1:0] SIM_ONLY_a_inject_error, SIM_ONLY_b_inject_error;
+ assign SIM_ONLY_a_inject_error = 0; //these signals are forced by the testbench
+ assign SIM_ONLY_b_inject_error = 0;
+ assign encoded_a_readdata = encoded_a_readdata_raw ^ SIM_ONLY_a_inject_error;
+ assign encoded_b_readdata = encoded_b_readdata_raw ^ SIM_ONLY_b_inject_error;
+
+
+
+ ///////////////////////////////////////////////////////////
+ // Decode the read data and produce ECC status signals //
+ ///////////////////////////////////////////////////////////
+
+ localparam bit CONNECT_A_READDATA_TO_ECC = RAM_OPERATION_MODE == "TRUE_DUAL_PORT"; //ignore port a read data if simple dual port
+
+ generate
+ if (ENABLE_ECC) begin : ECC_DECODE
+ logic [BE_WIDTH-1:0] a_single_error, a_double_error;
+ logic [BE_WIDTH-1:0] b_single_error, b_double_error;
+ logic any_single_error, any_double_error;
+
+ for (g=0; g<BE_WIDTH; g++) begin
+ if (CONNECT_A_READDATA_TO_ECC) begin
+ dla_acl_ecc_decoder
+ #(
+ .DATA_WIDTH (BITS_PER_ENABLE),
+ .ECC_GROUP_SIZE (ECC_GROUP_SIZE),
+ .INPUT_PIPELINE_STAGES (0), //must use zero latency to maintain the conceptual clock enable model of dla_hld_ram
+ .OUTPUT_PIPELINE_STAGES (0), //likewise as above
+ .STATUS_PIPELINE_STAGES (0) //likewise as above
+ )
+ dla_acl_ecc_decoder_inst_a
+ (
+ .clock (clock), //this currently has no effect since the number of pipeline stages is 0
+ .clock_enable (1'b1), //this currently has no effect since the number of pipeline stages is 0
+ .i_encoded (encoded_a_readdata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE]),
+ .o_data (a_readdata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
+ .o_single_error_corrected (a_single_error[g]),
+ .o_double_error_detected (a_double_error[g])
+ );
+ end
+ else begin
+ assign a_readdata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE] = 'x;
+ assign a_single_error[g] = '0;
+ assign a_double_error[g] = '0;
+ end
+
+ dla_acl_ecc_decoder
+ #(
+ .DATA_WIDTH (BITS_PER_ENABLE),
+ .ECC_GROUP_SIZE (ECC_GROUP_SIZE),
+ .INPUT_PIPELINE_STAGES (0),
+ .OUTPUT_PIPELINE_STAGES (0),
+ .STATUS_PIPELINE_STAGES (0)
+ )
+ dla_acl_ecc_decoder_inst_b
+ (
+ .clock (clock),
+ .clock_enable (1'b1),
+ .i_encoded (encoded_b_readdata[g*ENCODED_BITS_PER_ENABLE +: ENCODED_BITS_PER_ENABLE]),
+ .o_data (b_readdata[g*BITS_PER_ENABLE +: BITS_PER_ENABLE]),
+ .o_single_error_corrected (b_single_error[g]),
+ .o_double_error_detected (b_double_error[g])
+ );
+ end
+
+ assign any_single_error = (|a_single_error) | (|b_single_error);
+ assign any_double_error = (|a_double_error) | (|b_double_error);
+
+ if (ECC_STATUS_TIME_STRETCH) begin
+ dla_hld_ram_ecc_pulse_stretch_and_sticky #(
+ .ASYNC_RESET (ASYNC_RESET),
+ .SYNCHRONIZE_RESET (SYNCHRONIZE_RESET),
+ .SINGLE_ERROR_PULSE_STRETCH (3) //this is existing behavior from dla_acl_altera_syncram_wrapped
+ )
+ dla_hld_ram_ecc_pulse_stretch_and_sticky_inst
+ (
+ .clock (clock),
+ .resetn (resetn),
+ .i_single_error_corrected (any_single_error),
+ .i_double_error_detected (any_double_error),
+ .o_ecc_err_status (ecc_err_status)
+ );
+ end
+ else begin
+ assign ecc_err_status = {any_single_error, any_double_error};
+ end
+ end
+ else begin : NO_DECODE
+ if (CONNECT_A_READDATA_TO_ECC) begin
+ assign a_readdata = encoded_a_readdata;
+ end
+ else begin
+ assign a_readdata = 'x;
+ end
+ assign b_readdata = encoded_b_readdata;
+ assign ecc_err_status = 2'h0;
+ end
+ endgenerate
+
+endmodule
+
+
+
+
+//this is a helper module to convert the raw signals from the ECC decoder into something suitable for lazy collection
+//assuming bit errors are rare, one way to monitor the ECC status signals from all memories is to simply OR the status signals from all instances
+//these may be physically spread across the FPGA, so pulse stretch them so that they can be collected on a slower clock (or by using a multicycle clock constraint)
+
+module dla_hld_ram_ecc_pulse_stretch_and_sticky #(
+ parameter bit ASYNC_RESET, //how do registers CONSUME reset, 1 = asynchronously, 0 = synchronously
+ parameter bit SYNCHRONIZE_RESET, //should be reset be synchronized BEFORE it is consumed, 1 = synchronize it, 0 = no change to reset before consumption
+ parameter int SINGLE_ERROR_PULSE_STRETCH //at least 1, how many clock cycles to pulse stretch any single bit error, a value of 3 means an input high for one clock cycle results in an output high for four clocks
+) (
+ input wire clock,
+ input wire resetn,
+ input wire i_single_error_corrected,
+ input wire i_double_error_detected,
+ output logic [1:0] o_ecc_err_status
+);
+
+ //the double error detected status is a sticky bit, only reset can clear it, the intent being one should probably restart the system if an uncorrectable error is seen
+ logic aclrn, sclrn;
+ dla_acl_reset_handler
+ #(
+ .ASYNC_RESET (ASYNC_RESET),
+ .USE_SYNCHRONIZER (SYNCHRONIZE_RESET),
+ .SYNCHRONIZE_ACLRN (SYNCHRONIZE_RESET),
+ .PULSE_EXTENSION (0),
+ .PIPE_DEPTH (1),
+ .NUM_COPIES (1)
+ )
+ dla_acl_reset_handler_inst
+ (
+ .clk (clock),
+ .i_resetn (resetn),
+ .o_aclrn (aclrn),
+ .o_resetn_synchronized (),
+ .o_sclrn (sclrn)
+ );
+
+ logic [SINGLE_ERROR_PULSE_STRETCH-1:0] single_error_history;
+ logic single_error_pulse_stretched;
+ logic double_error_latched;
+
+ always_ff @(posedge clock or negedge aclrn) begin
+ if (~aclrn) begin
+ single_error_history <= '0;
+ single_error_pulse_stretched <= 1'b0;
+ double_error_latched <= 1'b0;
+ end
+ else begin
+ single_error_history[0] <= i_single_error_corrected;
+ for (int i=1; i<SINGLE_ERROR_PULSE_STRETCH; i++) single_error_history[i] <= single_error_history[i-1];
+ single_error_pulse_stretched <= i_single_error_corrected | (|single_error_history);
+ double_error_latched <= double_error_latched | i_double_error_detected;
+ if (~sclrn) begin
+ single_error_history <= '0;
+ single_error_pulse_stretched <= 1'b0;
+ double_error_latched <= 1'b0;
+ end
+ end
+ end
+
+ assign o_ecc_err_status = {single_error_pulse_stretched, double_error_latched};
+
+endmodule
+
+`default_nettype wire