summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram.sv
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram.sv')
-rw-r--r--python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram.sv381
1 files changed, 381 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram.sv
new file mode 100644
index 0000000..3e4391c
--- /dev/null
+++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram.sv
@@ -0,0 +1,381 @@
+// Copyright 2020 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/**
+dla_hld_ram is a project to simplify the usage of on-chip FPGA memory. Firstly, a lot of complexities are from Quartus IP such as altera_syncram and altdpram are hidden from the user.
+Secondly, there are many value-added features such as using depth/width stitching to minimize the number of physical RAMs needed to create one logical RAM, which Quartus IP does
+not handle well for non-power-of-2 sizes.
+
+A very detailed powerpoint slide deck describing the internals can be found at: //depot/docs/hld/ip/dla_hld_ram introduction algorithm convergence.pptx
+
+Here is a summary of the responsibility of each layer, note that the functionality in each layer can be optionally bypassed:
+
+Level | Layer | Description summary | Description details
+------+-------------------------------------------+---------------------------------------------------+------------------------------------------------------------------------------------
+ 1 | dla_hld_ram | choose M20K/MLAB if user did not specify it
+------+-------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------
+ 2 | dla_hld_ram_ecc | adds error correction codes
+------+-------------------------------------------+---------------------------------------------------+------------------------------------------------------------------------------------
+ 3 | dla_hld_ram_tall_depth_stitch | minimize the number physical memories needed to | - depth stitch a multiple of 16k depth, 8k depth, and leftover depth
+ 4 | dla_hld_ram_remaining_width | construct one logical memory where the width and | - extract remaining width, e.g. if width=32 may want to handle 2 bits separately
+ 5 | dla_hld_ram_bits_per_enable | depth are not "nice" values (e.g. depth not a | - logical to physical mapping of byte enables and data
+ 6 | dla_hld_ram_short_depth_stitch | power of two), among configurations of minimal | - split depth into top/bottom, top depth is multiple of max physical depth
+ 7 | dla_hld_ram_bottom_width_stitch | memory try to minimize glue logic (e.g. size of | - split bottom width into left/right, large width will depth stitch in next layer
+ 8 | dla_hld_ram_bottom_depth_stitch | read data mux) | - implement the depth stitch for large width groups extracted in previous layer
+------+-------------------------------------------+---------------------------------------------------+------------------------------------------------------------------------------------
+ 9 | dla_hld_ram_lower | unified interface for M20K and MLAB, also models the number of physical memories used
+------+-------------------------------------------+---------------------------------------------------+------------------------------------------------------------------------------------
+ 10 | dla_hld_ram_lower_m20k_simple_dual_port, | wrappers around altera_syncram or altdpram,
+ | dla_hld_ram_lower_m20k_true_dual_port, | add soft logic when hardened logic lacks functionality
+ | dla_hld_ram_lower_mlab_simple_dual_port |
+*/
+
+`default_nettype none
+
+`include "dla_acl_parameter_assert.svh"
+
+module dla_hld_ram import dla_common_pkg::*; #(
+ //geometry configuration
+ parameter int DEPTH, //number of words of memory
+ parameter int WIDTH, //width of the data bus, both read and write data
+ parameter int BE_WIDTH, //width of the byte enable signal, beware that WIDTH / BE_WIDTH must divide evenly
+
+ //geometry constants
+ parameter bit MINIMIZE_MEMORY_USAGE, //0 = minimize glue logic between memories (may require more physical memories), 1 = minimize number of physical memories needed (may require more glue logic)
+ parameter int SIM_ONLY_MIN_PHYSICAL_DEPTH, //set to 0 to use real physical values, set to nonzero to allow simulation to use shallower memories to better test mixed port read during write
+
+ //memory initialization
+ parameter bit USE_MEM_INIT_FILE, //0 = do not use memory initialization file, 1 = use memory initialization file
+ parameter bit ZERO_INITIALIZE_MEM, //only when USE_MEM_INIT_FILE = 0, choose whether the memory contents power up to zero or don't care (MLAB can only power up to zero)
+ parameter MEM_INIT_NAME, //only when USE_MEM_INIT_FILE = 1, specify the original file name including the .mif extension, this file name is modified as memory contents are sliced
+
+ //error correction codes
+ parameter bit ENABLE_ECC, //whether to enable error correction codes
+ parameter bit ECC_STATUS_TIME_STRETCH, //0 = report the instantaneous ecc status, 1 = pulse stretch any single error corrected and latch (until reset) any double error detected
+ parameter bit ASYNC_RESET, //how do ecc status registers CONSUME reset, 1 = asynchronously, 0 = synchronously, no other registers consume reset
+ parameter bit SYNCHRONIZE_RESET, //should reset be synchronized BEFORE it is consumed by the ecc status registers, 1 = synchronize it, 0 = no change to reset before consumption
+
+ //memory configuration
+ parameter RAM_BLOCK_TYPE, //legal values are: "M20K", "MLAB", "DLA_HLD_RAM_TO_CHOOSE"
+ parameter RAM_OPERATION_MODE, //legal values are: "SIMPLE_DUAL_PORT", "TRUE_DUAL_PORT"
+ parameter device_family_t DEVICE_FAMILY, //legal values are: "Cyclone 10 GX", "Arria 10", "Stratix 10", "Agilex"
+ parameter READ_DURING_WRITE, //legal values are: "DONT_CARE", "OLD_DATA", "NEW_DATA"
+ parameter bit REGISTER_A_READDATA, //latency from a_address to a_readdata is 1 (if unregistered) or 2 (if registered)
+ parameter bit REGISTER_B_ADDRESS, //for mlab only choose whether to register or unregister the read address, for m20k this parameter is ignored since all inputs must be registered
+ parameter bit REGISTER_B_READDATA, //latency from b_address to b_readdata is REGISTER_B_ADDRESS + REGISTER_B_READDATA, can be from 0 to 2 inclusive for mlab, can be from 1 to 2 for m20k
+
+ //try to use memory hardened logic
+ parameter bit USE_ENABLE, // set to 0 if no clock enables are used
+ parameter bit COMMON_IN_CLOCK_EN, //set to 1 if a_in_clock_en and b_in_clock_en are driven by the same source
+ parameter bit COMMON_OUT_CLOCK_EN, //set to 1 if a_out_clock_en and b_out_clock_en are driven by the same source
+
+ //derived parameters that affect the module interface
+ localparam int ADDR = $clog2(DEPTH)
+) (
+ input wire clock,
+
+ //port a
+ input wire [ADDR-1:0] a_address, //address for read or write
+ input wire a_read_enable, //read enable signal
+ input wire a_write, //write enable, 1 = write, 0 = read
+ input wire [WIDTH-1:0] a_writedata, //data to write to memory
+ input wire [BE_WIDTH-1:0] a_byteenable, //which bytes of write data to commit to memory
+ output logic [WIDTH-1:0] a_readdata, //data read from memory
+ output wire a_read_valid, //latency matched read enable
+ input wire a_in_clock_en, //applies to all inputs: address, write enable, write data, byte enable
+ input wire a_out_clock_en, //applies to all outputs: read data
+
+ //port b
+ input wire [ADDR-1:0] b_address, //signals have the same meaning as port a
+ input wire b_read_enable,
+ input wire b_write,
+ input wire [WIDTH-1:0] b_writedata,
+ input wire [BE_WIDTH-1:0] b_byteenable,
+ output logic [WIDTH-1:0] b_readdata,
+ output wire b_read_valid,
+ input wire b_in_clock_en,
+ input wire b_out_clock_en,
+
+ //error correction code
+ input wire resetn, //reset is only used if ENABLE_ECC = 1 and ECC_STATUS_TIME_STRETCH = 1, and it is only used by the ecc status not the ram itself
+ output logic [1:0] ecc_err_status //bit 0 = double bit error detected (uncorrectable), bit 1 = single bit error corrected
+);
+
+ ///////////////////////
+ // Legality checks //
+ ///////////////////////
+
+ generate
+ //check for non-trivial dimensions
+ `DLA_ACL_PARAMETER_ASSERT(DEPTH >= 2)
+ `DLA_ACL_PARAMETER_ASSERT(WIDTH >= 1)
+ `DLA_ACL_PARAMETER_ASSERT(BE_WIDTH >= 1)
+
+ //width / be_width must divide evenly with no remainder
+ `DLA_ACL_PARAMETER_ASSERT(WIDTH % BE_WIDTH == 0)
+
+ //check for a legal value of ram block type
+ localparam bit RAM_BLOCK_TYPE_IS_M20K = RAM_BLOCK_TYPE == "M20K";
+ localparam bit RAM_BLOCK_TYPE_IS_MLAB = RAM_BLOCK_TYPE == "MLAB";
+ localparam bit RAM_BLOCK_TYPE_IS_HLD_RAM_TO_CHOOSE = RAM_BLOCK_TYPE == "DLA_HLD_RAM_TO_CHOOSE";
+ `DLA_ACL_PARAMETER_ASSERT(RAM_BLOCK_TYPE_IS_M20K || RAM_BLOCK_TYPE_IS_MLAB || RAM_BLOCK_TYPE_IS_HLD_RAM_TO_CHOOSE)
+
+ //check for a legal value of ram operation mode
+ localparam bit RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT = RAM_OPERATION_MODE == "SIMPLE_DUAL_PORT";
+ localparam bit RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT = RAM_OPERATION_MODE == "TRUE_DUAL_PORT";
+ `DLA_ACL_PARAMETER_ASSERT(RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT || RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)
+
+ //check for a legal value of device family
+ localparam DEVICE_FAMILY_STR =
+ (DEVICE_FAMILY == DEVICE_C10) ? "Cyclone 10 GX" :
+ (DEVICE_FAMILY == DEVICE_A10) ? "Arria 10" :
+ (DEVICE_FAMILY == DEVICE_S10) ? "Stratix 10" :
+ (DEVICE_FAMILY == DEVICE_AGX7) ? "Agilex" :
+ (DEVICE_FAMILY == DEVICE_AGX5) ? "Agilex" :
+ "Unknown";
+ localparam bit DEVICE_FAMILY_IS_C10 = DEVICE_FAMILY_STR == "Cyclone 10 GX";
+ localparam bit DEVICE_FAMILY_IS_A10 = DEVICE_FAMILY_STR == "Arria 10";
+ localparam bit DEVICE_FAMILY_IS_S10 = DEVICE_FAMILY_STR == "Stratix 10";
+ localparam bit DEVICE_FAMILY_IS_AGX = DEVICE_FAMILY_STR == "Agilex";
+ `DLA_ACL_PARAMETER_ASSERT(DEVICE_FAMILY_IS_C10 || DEVICE_FAMILY_IS_A10 || DEVICE_FAMILY_IS_S10 || DEVICE_FAMILY_IS_AGX)
+
+ //check for a legal value of mixed port read during write mode
+ localparam bit READ_DURING_WRITE_IS_DONT_CARE = READ_DURING_WRITE == "DONT_CARE";
+ localparam bit READ_DURING_WRITE_IS_OLD_DATA = READ_DURING_WRITE == "OLD_DATA";
+ localparam bit READ_DURING_WRITE_IS_NEW_DATA = READ_DURING_WRITE == "NEW_DATA";
+ `DLA_ACL_PARAMETER_ASSERT(READ_DURING_WRITE_IS_DONT_CARE || READ_DURING_WRITE_IS_OLD_DATA || READ_DURING_WRITE_IS_NEW_DATA)
+
+ //mlab and true dual port is illegal
+ `DLA_ACL_PARAMETER_ASSERT(!RAM_BLOCK_TYPE_IS_MLAB || !RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)
+
+ //unregistered address and true dual port is illegal
+ `DLA_ACL_PARAMETER_ASSERT(REGISTER_B_ADDRESS || !RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)
+
+ //m20k with unregistered address is illegal
+ `DLA_ACL_PARAMETER_ASSERT(!RAM_BLOCK_TYPE_IS_M20K || REGISTER_B_ADDRESS)
+ endgenerate
+
+
+
+ //////////////////////////
+ // Derived parameters //
+ //////////////////////////
+
+ //choose ram block type if user did not explicitly set it
+ localparam bit MUST_CHOOSE_M20K = (RAM_OPERATION_MODE == "TRUE_DUAL_PORT");
+ localparam bit MUST_CHOOSE_MLAB = (REGISTER_B_ADDRESS == 0);
+ localparam bit DEVICE_FAMILY_A10_OR_OLDER = (DEVICE_FAMILY_STR == "Cyclone 10 GX") || (DEVICE_FAMILY_STR == "Arria 10");
+ localparam int MLAB_MAX_PHYSICAL_DEPTH = (DEVICE_FAMILY_A10_OR_OLDER) ? 64 : 32;
+ localparam bit FAVOR_M20K = (DEPTH > MLAB_MAX_PHYSICAL_DEPTH) || USE_MEM_INIT_FILE; //m20k natively supports mem init inside a partial reconfig region
+ localparam HEURISTIC_RAM_BLOCK_TYPE = (MUST_CHOOSE_M20K) ? "M20K" : (MUST_CHOOSE_MLAB) ? "MLAB" : (FAVOR_M20K) ? "M20K" : "MLAB";
+ localparam CHOSEN_RAM_BLOCK_TYPE = (RAM_BLOCK_TYPE == "DLA_HLD_RAM_TO_CHOOSE") ? HEURISTIC_RAM_BLOCK_TYPE : RAM_BLOCK_TYPE;
+
+ //depth quantization - round up the depth to the nearest multiple of the minimum physical depth
+ //simulation may reduce the minimum physical depth to gain more test coverage of read-during-write behavior and depth stitching
+ localparam int MIN_PHYSICAL_DEPTH = (SIM_ONLY_MIN_PHYSICAL_DEPTH) ? SIM_ONLY_MIN_PHYSICAL_DEPTH : (CHOSEN_RAM_BLOCK_TYPE=="MLAB") ? 32 : 512;
+ localparam int MIN_USABLE_DEPTH = (RAM_OPERATION_MODE == "TRUE_DUAL_PORT") ? 2*MIN_PHYSICAL_DEPTH : MIN_PHYSICAL_DEPTH;
+ localparam int QUANTIZED_DEPTH = ((DEPTH + MIN_USABLE_DEPTH - 1) / MIN_USABLE_DEPTH) * MIN_USABLE_DEPTH;
+
+ //sanity check that the hardware is using the memory initialization files produced by the software model
+ localparam DLA_HLD_RAM_MEM_INIT_NAME = {MEM_INIT_NAME, ".hldram"};
+
+ //optionally skip all the upper layers of dla_hld_ram, intended to reduce the module count for Modelsim AE
+ //this can only be done when no features are used: no ECC, do whatever altera_syncram is going to do in terms of width/depth stitching, and no byte enables (therefore no bits per enable resizing)
+ localparam bit BYPASS_ALL_UPPER_LAYERS = !ENABLE_ECC && !MINIMIZE_MEMORY_USAGE && (BE_WIDTH == 1);
+
+
+
+ ////////////////////////
+ // Address resizing //
+ ////////////////////////
+
+ //if the address signal in this module is narrower than the port width of the instance, modelsim puts Z on the upper bits which is ambigious in terms of what address to access
+ localparam int QUANTIZED_ADDR = $clog2(QUANTIZED_DEPTH);
+ logic [QUANTIZED_ADDR-1:0] quantized_a_address, quantized_b_address;
+ generate
+ if (QUANTIZED_ADDR > ADDR) begin : ZERO_EXTEND_ADDRESS
+ assign quantized_a_address = {'0, a_address};
+ assign quantized_b_address = {'0, b_address};
+ end
+ else begin : ORIGINAL_ADDRESS //since QUANTIZED_DEPTH >= DEPTH therefore QUANTIZED_ADDR >= ADDR, strictly greater than was handled above, so this must be QUANTIZED_ADDR == ADDR
+ assign quantized_a_address = a_address;
+ assign quantized_b_address = b_address;
+ end
+ endgenerate
+
+
+
+ /////////////////////////////////////////////////
+ // Next layer in the instantiation hierarchy //
+ /////////////////////////////////////////////////
+
+ //imitate the query functions in the software model
+ // synthesis translate_off
+ int NUM_PHYSICAL_M20K, NUM_PHYSICAL_MLAB;
+ // synthesis translate_on
+
+ generate
+ if (BYPASS_ALL_UPPER_LAYERS) begin : GEN_LOWER
+ dla_hld_ram_lower
+ #(
+ .DEPTH (QUANTIZED_DEPTH), //changed
+ .WIDTH (WIDTH),
+ .BE_WIDTH (BE_WIDTH),
+ .UTILIZED_WIDTH (WIDTH), //assuming all bits of the data are used -- this is only for modelling the number of physical memories needed
+ .MINIMIZE_MEMORY_USAGE (MINIMIZE_MEMORY_USAGE),
+ .MIN_PHYSICAL_DEPTH (MIN_PHYSICAL_DEPTH),
+ .USE_MEM_INIT_FILE (USE_MEM_INIT_FILE),
+ .ZERO_INITIALIZE_MEM (ZERO_INITIALIZE_MEM),
+ .MEM_INIT_NAME (DLA_HLD_RAM_MEM_INIT_NAME), //changed
+ .RAM_BLOCK_TYPE (CHOSEN_RAM_BLOCK_TYPE), //changed
+ .RAM_OPERATION_MODE (RAM_OPERATION_MODE),
+ .DEVICE_FAMILY (DEVICE_FAMILY_STR),
+ .READ_DURING_WRITE (READ_DURING_WRITE),
+ .REGISTER_A_READDATA (REGISTER_A_READDATA),
+ .REGISTER_B_ADDRESS (REGISTER_B_ADDRESS),
+ .REGISTER_B_READDATA (REGISTER_B_READDATA),
+ .USE_ENABLE (USE_ENABLE),
+ .COMMON_IN_CLOCK_EN (COMMON_IN_CLOCK_EN),
+ .COMMON_OUT_CLOCK_EN (COMMON_OUT_CLOCK_EN)
+ )
+ dla_hld_ram_lower_inst
+ (
+
+ .clock (clock),
+ .a_address (quantized_a_address), //changed
+ .a_read_enable (a_read_enable),
+ .a_write (a_write),
+ .a_writedata (a_writedata),
+ .a_byteenable (a_byteenable),
+ .a_readdata (a_readdata),
+ .a_in_clock_en (a_in_clock_en),
+ .a_out_clock_en (a_out_clock_en),
+ .b_address (quantized_b_address), //changed
+ .b_read_enable (b_read_enable),
+ .b_write (b_write),
+ .b_writedata (b_writedata),
+ .b_byteenable (b_byteenable),
+ .b_readdata (b_readdata),
+ .b_in_clock_en (b_in_clock_en),
+ .b_out_clock_en (b_out_clock_en)
+ );
+
+ // synthesis translate_off
+ assign NUM_PHYSICAL_M20K = dla_hld_ram_lower_inst.NUM_PHYSICAL_M20K;
+ assign NUM_PHYSICAL_MLAB = dla_hld_ram_lower_inst.NUM_PHYSICAL_MLAB;
+ // synthesis translate_on
+ end
+ else begin : GEN_ECC
+ dla_hld_ram_ecc
+ #(
+ .DEPTH (QUANTIZED_DEPTH), //changed
+ .WIDTH (WIDTH),
+ .BE_WIDTH (BE_WIDTH),
+ .MINIMIZE_MEMORY_USAGE (MINIMIZE_MEMORY_USAGE),
+ .MIN_PHYSICAL_DEPTH (MIN_PHYSICAL_DEPTH),
+ .USE_MEM_INIT_FILE (USE_MEM_INIT_FILE),
+ .ZERO_INITIALIZE_MEM (ZERO_INITIALIZE_MEM),
+ .MEM_INIT_NAME (DLA_HLD_RAM_MEM_INIT_NAME), //changed
+ .ENABLE_ECC (ENABLE_ECC),
+ .ECC_STATUS_TIME_STRETCH(ECC_STATUS_TIME_STRETCH),
+ .ASYNC_RESET (ASYNC_RESET),
+ .SYNCHRONIZE_RESET (SYNCHRONIZE_RESET),
+ .RAM_BLOCK_TYPE (CHOSEN_RAM_BLOCK_TYPE), //changed
+ .RAM_OPERATION_MODE (RAM_OPERATION_MODE),
+ .DEVICE_FAMILY (DEVICE_FAMILY_STR),
+ .READ_DURING_WRITE (READ_DURING_WRITE),
+ .REGISTER_A_READDATA (REGISTER_A_READDATA),
+ .REGISTER_B_ADDRESS (REGISTER_B_ADDRESS),
+ .REGISTER_B_READDATA (REGISTER_B_READDATA),
+ .USE_ENABLE (USE_ENABLE),
+ .COMMON_IN_CLOCK_EN (COMMON_IN_CLOCK_EN),
+ .COMMON_OUT_CLOCK_EN (COMMON_OUT_CLOCK_EN)
+ )
+ dla_hld_ram_ecc_inst
+ (
+
+ .clock (clock),
+ .a_address (quantized_a_address), //changed
+ .a_write (a_write),
+ .a_writedata (a_writedata),
+ .a_byteenable (a_byteenable),
+ .a_readdata (a_readdata),
+ .a_in_clock_en (a_in_clock_en),
+ .a_out_clock_en (a_out_clock_en),
+ .a_read_enable (a_read_enable),
+ .b_address (quantized_b_address), //changed
+ .b_write (b_write),
+ .b_writedata (b_writedata),
+ .b_byteenable (b_byteenable),
+ .b_readdata (b_readdata),
+ .b_in_clock_en (b_in_clock_en),
+ .b_out_clock_en (b_out_clock_en),
+ .b_read_enable (b_read_enable),
+ .resetn (resetn),
+ .ecc_err_status (ecc_err_status)
+ );
+
+ // synthesis translate_off
+ assign NUM_PHYSICAL_M20K = dla_hld_ram_ecc_inst.NUM_PHYSICAL_M20K;
+ assign NUM_PHYSICAL_MLAB = dla_hld_ram_ecc_inst.NUM_PHYSICAL_MLAB;
+ // synthesis translate_on
+ end
+ endgenerate
+
+ logic a_read_enable_d1;
+ dla_delay #(
+ .WIDTH (1),
+ .DELAY (1),
+ .DEVICE (DEVICE_FAMILY)
+ ) a_delay_read_enable (
+ .clk (clock),
+ .clk_en (USE_ENABLE ? a_in_clock_en : 1'b1),
+ .i_data (a_read_enable),
+ .o_data (a_read_enable_d1)
+ );
+ dla_delay #(
+ .WIDTH (1),
+ .DELAY (REGISTER_A_READDATA),
+ .DEVICE (DEVICE_FAMILY)
+ ) a_delay_read_valid (
+ .clk (clock),
+ .clk_en (USE_ENABLE ? a_out_clock_en : 1'b1),
+ .i_data (a_read_enable_d1),
+ .o_data (a_read_valid)
+ );
+
+ logic b_read_enable_d1;
+ dla_delay #(
+ .WIDTH (1),
+ .DELAY (1),
+ .DEVICE (DEVICE_FAMILY)
+ ) b_delay_read_enable (
+ .clk (clock),
+ .clk_en (USE_ENABLE ? (COMMON_IN_CLOCK_EN ? a_in_clock_en : b_in_clock_en) : 1'b1),
+ .i_data (b_read_enable),
+ .o_data (b_read_enable_d1)
+ );
+ dla_delay #(
+ .WIDTH (1),
+ .DELAY (REGISTER_A_READDATA),
+ .DEVICE (DEVICE_FAMILY)
+ ) b_delay_read_valid (
+ .clk (clock),
+ .clk_en (USE_ENABLE ? (COMMON_OUT_CLOCK_EN ? a_out_clock_en : b_out_clock_en) : 1'b1),
+ .i_data (b_read_enable_d1),
+ .o_data (b_read_valid)
+ );
+
+endmodule
+
+`default_nettype wire