summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram.sv
blob: 3e4391cee2a178e87bf236c3b5440c59416f5dc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
// Copyright 2020 Intel Corporation.
//
// This software and the related documents are Intel copyrighted materials,
// and your use of them is governed by the express license under which they
// were provided to you ("License"). Unless the License provides otherwise,
// you may not use, modify, copy, publish, distribute, disclose or transmit
// this software or the related documents without Intel's prior written
// permission.
//
// This software and the related documents are provided as is, with no express
// or implied warranties, other than those that are expressly stated in the
// License.

/**
dla_hld_ram is a project to simplify the usage of on-chip FPGA memory. Firstly, a lot of complexities are from Quartus IP such as altera_syncram and altdpram are hidden from the user.
Secondly, there are many value-added features such as using depth/width stitching to minimize the number of physical RAMs needed to create one logical RAM, which Quartus IP does
not handle well for non-power-of-2 sizes.

A very detailed powerpoint slide deck describing the internals can be found at: //depot/docs/hld/ip/dla_hld_ram introduction algorithm convergence.pptx

Here is a summary of the responsibility of each layer, note that the functionality in each layer can be optionally bypassed:

Level | Layer                                     | Description summary                               | Description details
------+-------------------------------------------+---------------------------------------------------+------------------------------------------------------------------------------------
  1   | dla_hld_ram                                   | choose M20K/MLAB if user did not specify it
------+-------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------
  2   | dla_hld_ram_ecc                               | adds error correction codes
------+-------------------------------------------+---------------------------------------------------+------------------------------------------------------------------------------------
  3   | dla_hld_ram_tall_depth_stitch                 | minimize the number physical memories needed to   | - depth stitch a multiple of 16k depth, 8k depth, and leftover depth
  4   | dla_hld_ram_remaining_width                   | construct one logical memory where the width and  | - extract remaining width, e.g. if width=32 may want to handle 2 bits separately
  5   | dla_hld_ram_bits_per_enable                   | depth are not "nice" values (e.g. depth not a     | - logical to physical mapping of byte enables and data
  6   | dla_hld_ram_short_depth_stitch                | power of two), among configurations of minimal    | - split depth into top/bottom, top depth is multiple of max physical depth
  7   | dla_hld_ram_bottom_width_stitch               | memory try to minimize glue logic (e.g. size of   | - split bottom width into left/right, large width will depth stitch in next layer
  8   | dla_hld_ram_bottom_depth_stitch               | read data mux)                                    | - implement the depth stitch for large width groups extracted in previous layer
------+-------------------------------------------+---------------------------------------------------+------------------------------------------------------------------------------------
  9   | dla_hld_ram_lower                             | unified interface for M20K and MLAB, also models the number of physical memories used
------+-------------------------------------------+---------------------------------------------------+------------------------------------------------------------------------------------
 10   | dla_hld_ram_lower_m20k_simple_dual_port,      | wrappers around altera_syncram or altdpram,
      | dla_hld_ram_lower_m20k_true_dual_port,        | add soft logic when hardened logic lacks functionality
      | dla_hld_ram_lower_mlab_simple_dual_port       |
*/

`default_nettype none

`include "dla_acl_parameter_assert.svh"

module dla_hld_ram import dla_common_pkg::*; #(
    //geometry configuration
    parameter  int DEPTH,                           //number of words of memory
    parameter  int WIDTH,                           //width of the data bus, both read and write data
    parameter  int BE_WIDTH,                        //width of the byte enable signal, beware that WIDTH / BE_WIDTH must divide evenly

    //geometry constants
    parameter  bit MINIMIZE_MEMORY_USAGE,           //0 = minimize glue logic between memories (may require more physical memories), 1 = minimize number of physical memories needed (may require more glue logic)
    parameter  int SIM_ONLY_MIN_PHYSICAL_DEPTH,     //set to 0 to use real physical values, set to nonzero to allow simulation to use shallower memories to better test mixed port read during write

    //memory initialization
    parameter  bit USE_MEM_INIT_FILE,               //0 = do not use memory initialization file, 1 = use memory initialization file
    parameter  bit ZERO_INITIALIZE_MEM,             //only when USE_MEM_INIT_FILE = 0, choose whether the memory contents power up to zero or don't care (MLAB can only power up to zero)
    parameter      MEM_INIT_NAME,                   //only when USE_MEM_INIT_FILE = 1, specify the original file name including the .mif extension, this file name is modified as memory contents are sliced

    //error correction codes
    parameter  bit ENABLE_ECC,                      //whether to enable error correction codes
    parameter  bit ECC_STATUS_TIME_STRETCH,         //0 = report the instantaneous ecc status, 1 = pulse stretch any single error corrected and latch (until reset) any double error detected
    parameter  bit ASYNC_RESET,                     //how do ecc status registers CONSUME reset, 1 = asynchronously, 0 = synchronously, no other registers consume reset
    parameter  bit SYNCHRONIZE_RESET,               //should reset be synchronized BEFORE it is consumed by the ecc status registers, 1 = synchronize it, 0 = no change to reset before consumption

    //memory configuration
    parameter      RAM_BLOCK_TYPE,                  //legal values are: "M20K", "MLAB", "DLA_HLD_RAM_TO_CHOOSE"
    parameter      RAM_OPERATION_MODE,              //legal values are: "SIMPLE_DUAL_PORT", "TRUE_DUAL_PORT"
    parameter device_family_t DEVICE_FAMILY,        //legal values are: "Cyclone 10 GX", "Arria 10", "Stratix 10", "Agilex"
    parameter      READ_DURING_WRITE,               //legal values are: "DONT_CARE", "OLD_DATA", "NEW_DATA"
    parameter  bit REGISTER_A_READDATA,             //latency from a_address to a_readdata is 1 (if unregistered) or 2 (if registered)
    parameter  bit REGISTER_B_ADDRESS,              //for mlab only choose whether to register or unregister the read address, for m20k this parameter is ignored since all inputs must be registered
    parameter  bit REGISTER_B_READDATA,             //latency from b_address to b_readdata is REGISTER_B_ADDRESS + REGISTER_B_READDATA, can be from 0 to 2 inclusive for mlab, can be from 1 to 2 for m20k

    //try to use memory hardened logic
    parameter  bit USE_ENABLE,                      // set to 0 if no clock enables are used
    parameter  bit COMMON_IN_CLOCK_EN,              //set to 1 if a_in_clock_en and b_in_clock_en are driven by the same source
    parameter  bit COMMON_OUT_CLOCK_EN,             //set to 1 if a_out_clock_en and b_out_clock_en are driven by the same source

    //derived parameters that affect the module interface
    localparam int ADDR = $clog2(DEPTH)
) (
    input  wire                 clock,

    //port a
    input  wire      [ADDR-1:0] a_address,          //address for read or write
    input  wire                 a_read_enable,      //read enable signal
    input  wire                 a_write,            //write enable, 1 = write, 0 = read
    input  wire     [WIDTH-1:0] a_writedata,        //data to write to memory
    input  wire  [BE_WIDTH-1:0] a_byteenable,       //which bytes of write data to commit to memory
    output logic    [WIDTH-1:0] a_readdata,         //data read from memory
    output wire                 a_read_valid,       //latency matched read enable
    input  wire                 a_in_clock_en,      //applies to all inputs: address, write enable, write data, byte enable
    input  wire                 a_out_clock_en,     //applies to all outputs: read data

    //port b
    input  wire      [ADDR-1:0] b_address,          //signals have the same meaning as port a
    input  wire                 b_read_enable,
    input  wire                 b_write,
    input  wire     [WIDTH-1:0] b_writedata,
    input  wire  [BE_WIDTH-1:0] b_byteenable,
    output logic    [WIDTH-1:0] b_readdata,
    output wire                 b_read_valid,
    input  wire                 b_in_clock_en,
    input  wire                 b_out_clock_en,

    //error correction code
    input  wire                 resetn,             //reset is only used if ENABLE_ECC = 1 and ECC_STATUS_TIME_STRETCH = 1, and it is only used by the ecc status not the ram itself
    output logic          [1:0] ecc_err_status      //bit 0 = double bit error detected (uncorrectable), bit 1 = single bit error corrected
);

    ///////////////////////
    //  Legality checks  //
    ///////////////////////

    generate
    //check for non-trivial dimensions
    `DLA_ACL_PARAMETER_ASSERT(DEPTH >= 2)
    `DLA_ACL_PARAMETER_ASSERT(WIDTH >= 1)
    `DLA_ACL_PARAMETER_ASSERT(BE_WIDTH >= 1)

    //width / be_width must divide evenly with no remainder
    `DLA_ACL_PARAMETER_ASSERT(WIDTH % BE_WIDTH == 0)

    //check for a legal value of ram block type
    localparam bit RAM_BLOCK_TYPE_IS_M20K = RAM_BLOCK_TYPE == "M20K";
    localparam bit RAM_BLOCK_TYPE_IS_MLAB = RAM_BLOCK_TYPE == "MLAB";
    localparam bit RAM_BLOCK_TYPE_IS_HLD_RAM_TO_CHOOSE = RAM_BLOCK_TYPE == "DLA_HLD_RAM_TO_CHOOSE";
    `DLA_ACL_PARAMETER_ASSERT(RAM_BLOCK_TYPE_IS_M20K || RAM_BLOCK_TYPE_IS_MLAB || RAM_BLOCK_TYPE_IS_HLD_RAM_TO_CHOOSE)

    //check for a legal value of ram operation mode
    localparam bit RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT = RAM_OPERATION_MODE == "SIMPLE_DUAL_PORT";
    localparam bit RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT   = RAM_OPERATION_MODE == "TRUE_DUAL_PORT";
    `DLA_ACL_PARAMETER_ASSERT(RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT || RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)

    //check for a legal value of device family
    localparam     DEVICE_FAMILY_STR =
      (DEVICE_FAMILY == DEVICE_C10) ? "Cyclone 10 GX" :
      (DEVICE_FAMILY == DEVICE_A10) ? "Arria 10" :
      (DEVICE_FAMILY == DEVICE_S10) ? "Stratix 10" :
      (DEVICE_FAMILY == DEVICE_AGX7) ? "Agilex" :
      (DEVICE_FAMILY == DEVICE_AGX5) ? "Agilex" :
      "Unknown";
    localparam bit DEVICE_FAMILY_IS_C10 = DEVICE_FAMILY_STR == "Cyclone 10 GX";
    localparam bit DEVICE_FAMILY_IS_A10 = DEVICE_FAMILY_STR == "Arria 10";
    localparam bit DEVICE_FAMILY_IS_S10 = DEVICE_FAMILY_STR == "Stratix 10";
    localparam bit DEVICE_FAMILY_IS_AGX = DEVICE_FAMILY_STR == "Agilex";
    `DLA_ACL_PARAMETER_ASSERT(DEVICE_FAMILY_IS_C10 || DEVICE_FAMILY_IS_A10 || DEVICE_FAMILY_IS_S10 || DEVICE_FAMILY_IS_AGX)

    //check for a legal value of mixed port read during write mode
    localparam bit READ_DURING_WRITE_IS_DONT_CARE = READ_DURING_WRITE == "DONT_CARE";
    localparam bit READ_DURING_WRITE_IS_OLD_DATA  = READ_DURING_WRITE == "OLD_DATA";
    localparam bit READ_DURING_WRITE_IS_NEW_DATA  = READ_DURING_WRITE == "NEW_DATA";
    `DLA_ACL_PARAMETER_ASSERT(READ_DURING_WRITE_IS_DONT_CARE || READ_DURING_WRITE_IS_OLD_DATA || READ_DURING_WRITE_IS_NEW_DATA)

    //mlab and true dual port is illegal
    `DLA_ACL_PARAMETER_ASSERT(!RAM_BLOCK_TYPE_IS_MLAB || !RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)

    //unregistered address and true dual port is illegal
    `DLA_ACL_PARAMETER_ASSERT(REGISTER_B_ADDRESS || !RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)

    //m20k with unregistered address is illegal
    `DLA_ACL_PARAMETER_ASSERT(!RAM_BLOCK_TYPE_IS_M20K || REGISTER_B_ADDRESS)
    endgenerate



    //////////////////////////
    //  Derived parameters  //
    //////////////////////////

    //choose ram block type if user did not explicitly set it
    localparam bit MUST_CHOOSE_M20K           = (RAM_OPERATION_MODE == "TRUE_DUAL_PORT");
    localparam bit MUST_CHOOSE_MLAB           = (REGISTER_B_ADDRESS == 0);
    localparam bit DEVICE_FAMILY_A10_OR_OLDER = (DEVICE_FAMILY_STR == "Cyclone 10 GX") || (DEVICE_FAMILY_STR == "Arria 10");
    localparam int MLAB_MAX_PHYSICAL_DEPTH    = (DEVICE_FAMILY_A10_OR_OLDER) ? 64 : 32;
    localparam bit FAVOR_M20K                 = (DEPTH > MLAB_MAX_PHYSICAL_DEPTH) || USE_MEM_INIT_FILE; //m20k natively supports mem init inside a partial reconfig region
    localparam     HEURISTIC_RAM_BLOCK_TYPE   = (MUST_CHOOSE_M20K) ? "M20K" : (MUST_CHOOSE_MLAB) ? "MLAB" : (FAVOR_M20K) ? "M20K" : "MLAB";
    localparam     CHOSEN_RAM_BLOCK_TYPE      = (RAM_BLOCK_TYPE == "DLA_HLD_RAM_TO_CHOOSE") ? HEURISTIC_RAM_BLOCK_TYPE : RAM_BLOCK_TYPE;

    //depth quantization - round up the depth to the nearest multiple of the minimum physical depth
    //simulation may reduce the minimum physical depth to gain more test coverage of read-during-write behavior and depth stitching
    localparam int MIN_PHYSICAL_DEPTH = (SIM_ONLY_MIN_PHYSICAL_DEPTH) ? SIM_ONLY_MIN_PHYSICAL_DEPTH : (CHOSEN_RAM_BLOCK_TYPE=="MLAB") ? 32 : 512;
    localparam int MIN_USABLE_DEPTH   = (RAM_OPERATION_MODE == "TRUE_DUAL_PORT") ? 2*MIN_PHYSICAL_DEPTH : MIN_PHYSICAL_DEPTH;
    localparam int QUANTIZED_DEPTH    = ((DEPTH + MIN_USABLE_DEPTH - 1) / MIN_USABLE_DEPTH) * MIN_USABLE_DEPTH;

    //sanity check that the hardware is using the memory initialization files produced by the software model
    localparam     DLA_HLD_RAM_MEM_INIT_NAME = {MEM_INIT_NAME, ".hldram"};

    //optionally skip all the upper layers of dla_hld_ram, intended to reduce the module count for Modelsim AE
    //this can only be done when no features are used: no ECC, do whatever altera_syncram is going to do in terms of width/depth stitching, and no byte enables (therefore no bits per enable resizing)
    localparam bit BYPASS_ALL_UPPER_LAYERS = !ENABLE_ECC && !MINIMIZE_MEMORY_USAGE && (BE_WIDTH == 1);



    ////////////////////////
    //  Address resizing  //
    ////////////////////////

    //if the address signal in this module is narrower than the port width of the instance, modelsim puts Z on the upper bits which is ambigious in terms of what address to access
    localparam int QUANTIZED_ADDR = $clog2(QUANTIZED_DEPTH);
    logic [QUANTIZED_ADDR-1:0] quantized_a_address, quantized_b_address;
    generate
    if (QUANTIZED_ADDR > ADDR) begin : ZERO_EXTEND_ADDRESS
        assign quantized_a_address = {'0, a_address};
        assign quantized_b_address = {'0, b_address};
    end
    else begin : ORIGINAL_ADDRESS   //since QUANTIZED_DEPTH >= DEPTH therefore QUANTIZED_ADDR >= ADDR, strictly greater than was handled above, so this must be QUANTIZED_ADDR == ADDR
        assign quantized_a_address = a_address;
        assign quantized_b_address = b_address;
    end
    endgenerate



    /////////////////////////////////////////////////
    //  Next layer in the instantiation hierarchy  //
    /////////////////////////////////////////////////

    //imitate the query functions in the software model
    // synthesis translate_off
    int NUM_PHYSICAL_M20K, NUM_PHYSICAL_MLAB;
    // synthesis translate_on

    generate
    if (BYPASS_ALL_UPPER_LAYERS) begin : GEN_LOWER
        dla_hld_ram_lower
        #(
            .DEPTH                  (QUANTIZED_DEPTH),          //changed
            .WIDTH                  (WIDTH),
            .BE_WIDTH               (BE_WIDTH),
            .UTILIZED_WIDTH         (WIDTH),                    //assuming all bits of the data are used -- this is only for modelling the number of physical memories needed
            .MINIMIZE_MEMORY_USAGE  (MINIMIZE_MEMORY_USAGE),
            .MIN_PHYSICAL_DEPTH     (MIN_PHYSICAL_DEPTH),
            .USE_MEM_INIT_FILE      (USE_MEM_INIT_FILE),
            .ZERO_INITIALIZE_MEM    (ZERO_INITIALIZE_MEM),
            .MEM_INIT_NAME          (DLA_HLD_RAM_MEM_INIT_NAME),    //changed
            .RAM_BLOCK_TYPE         (CHOSEN_RAM_BLOCK_TYPE),    //changed
            .RAM_OPERATION_MODE     (RAM_OPERATION_MODE),
            .DEVICE_FAMILY          (DEVICE_FAMILY_STR),
            .READ_DURING_WRITE      (READ_DURING_WRITE),
            .REGISTER_A_READDATA    (REGISTER_A_READDATA),
            .REGISTER_B_ADDRESS     (REGISTER_B_ADDRESS),
            .REGISTER_B_READDATA    (REGISTER_B_READDATA),
            .USE_ENABLE             (USE_ENABLE),
            .COMMON_IN_CLOCK_EN     (COMMON_IN_CLOCK_EN),
            .COMMON_OUT_CLOCK_EN    (COMMON_OUT_CLOCK_EN)
        )
        dla_hld_ram_lower_inst
        (

            .clock                  (clock),
            .a_address              (quantized_a_address),      //changed
            .a_read_enable          (a_read_enable),
            .a_write                (a_write),
            .a_writedata            (a_writedata),
            .a_byteenable           (a_byteenable),
            .a_readdata             (a_readdata),
            .a_in_clock_en          (a_in_clock_en),
            .a_out_clock_en         (a_out_clock_en),
            .b_address              (quantized_b_address),      //changed
            .b_read_enable          (b_read_enable),
            .b_write                (b_write),
            .b_writedata            (b_writedata),
            .b_byteenable           (b_byteenable),
            .b_readdata             (b_readdata),
            .b_in_clock_en          (b_in_clock_en),
            .b_out_clock_en         (b_out_clock_en)
        );

        // synthesis translate_off
        assign NUM_PHYSICAL_M20K = dla_hld_ram_lower_inst.NUM_PHYSICAL_M20K;
        assign NUM_PHYSICAL_MLAB = dla_hld_ram_lower_inst.NUM_PHYSICAL_MLAB;
        // synthesis translate_on
    end
    else begin : GEN_ECC
        dla_hld_ram_ecc
        #(
            .DEPTH                  (QUANTIZED_DEPTH),          //changed
            .WIDTH                  (WIDTH),
            .BE_WIDTH               (BE_WIDTH),
            .MINIMIZE_MEMORY_USAGE  (MINIMIZE_MEMORY_USAGE),
            .MIN_PHYSICAL_DEPTH     (MIN_PHYSICAL_DEPTH),
            .USE_MEM_INIT_FILE      (USE_MEM_INIT_FILE),
            .ZERO_INITIALIZE_MEM    (ZERO_INITIALIZE_MEM),
            .MEM_INIT_NAME          (DLA_HLD_RAM_MEM_INIT_NAME),    //changed
            .ENABLE_ECC             (ENABLE_ECC),
            .ECC_STATUS_TIME_STRETCH(ECC_STATUS_TIME_STRETCH),
            .ASYNC_RESET            (ASYNC_RESET),
            .SYNCHRONIZE_RESET      (SYNCHRONIZE_RESET),
            .RAM_BLOCK_TYPE         (CHOSEN_RAM_BLOCK_TYPE),    //changed
            .RAM_OPERATION_MODE     (RAM_OPERATION_MODE),
            .DEVICE_FAMILY          (DEVICE_FAMILY_STR),
            .READ_DURING_WRITE      (READ_DURING_WRITE),
            .REGISTER_A_READDATA    (REGISTER_A_READDATA),
            .REGISTER_B_ADDRESS     (REGISTER_B_ADDRESS),
            .REGISTER_B_READDATA    (REGISTER_B_READDATA),
            .USE_ENABLE             (USE_ENABLE),
            .COMMON_IN_CLOCK_EN     (COMMON_IN_CLOCK_EN),
            .COMMON_OUT_CLOCK_EN    (COMMON_OUT_CLOCK_EN)
        )
        dla_hld_ram_ecc_inst
        (

            .clock                  (clock),
            .a_address              (quantized_a_address),      //changed
            .a_write                (a_write),
            .a_writedata            (a_writedata),
            .a_byteenable           (a_byteenable),
            .a_readdata             (a_readdata),
            .a_in_clock_en          (a_in_clock_en),
            .a_out_clock_en         (a_out_clock_en),
            .a_read_enable          (a_read_enable),
            .b_address              (quantized_b_address),      //changed
            .b_write                (b_write),
            .b_writedata            (b_writedata),
            .b_byteenable           (b_byteenable),
            .b_readdata             (b_readdata),
            .b_in_clock_en          (b_in_clock_en),
            .b_out_clock_en         (b_out_clock_en),
            .b_read_enable          (b_read_enable),
            .resetn                 (resetn),
            .ecc_err_status         (ecc_err_status)
        );

        // synthesis translate_off
        assign NUM_PHYSICAL_M20K = dla_hld_ram_ecc_inst.NUM_PHYSICAL_M20K;
        assign NUM_PHYSICAL_MLAB = dla_hld_ram_ecc_inst.NUM_PHYSICAL_MLAB;
        // synthesis translate_on
    end
    endgenerate

    logic a_read_enable_d1;
    dla_delay #(
      .WIDTH  (1),
      .DELAY  (1),
      .DEVICE (DEVICE_FAMILY)
    ) a_delay_read_enable (
      .clk    (clock),
      .clk_en (USE_ENABLE ? a_in_clock_en : 1'b1),
      .i_data (a_read_enable),
      .o_data (a_read_enable_d1)
    );
    dla_delay #(
      .WIDTH  (1),
      .DELAY  (REGISTER_A_READDATA),
      .DEVICE (DEVICE_FAMILY)
    ) a_delay_read_valid (
      .clk    (clock),
      .clk_en (USE_ENABLE ? a_out_clock_en : 1'b1),
      .i_data (a_read_enable_d1),
      .o_data (a_read_valid)
    );

    logic b_read_enable_d1;
    dla_delay #(
      .WIDTH  (1),
      .DELAY  (1),
      .DEVICE (DEVICE_FAMILY)
    ) b_delay_read_enable (
      .clk    (clock),
      .clk_en (USE_ENABLE ? (COMMON_IN_CLOCK_EN ? a_in_clock_en : b_in_clock_en) : 1'b1),
      .i_data (b_read_enable),
      .o_data (b_read_enable_d1)
    );
    dla_delay #(
      .WIDTH  (1),
      .DELAY  (REGISTER_A_READDATA),
      .DEVICE (DEVICE_FAMILY)
    ) b_delay_read_valid (
      .clk    (clock),
      .clk_en (USE_ENABLE ? (COMMON_OUT_CLOCK_EN ? a_out_clock_en : b_out_clock_en) : 1'b1),
      .i_data (b_read_enable_d1),
      .o_data (b_read_valid)
    );

endmodule

`default_nettype wire