summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_hld_ram_lower.sv
blob: 1a8323859cac980f35f5c709bd2c4f99d4d3404d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
// Copyright 2020 Intel Corporation.
//
// This software and the related documents are Intel copyrighted materials,
// and your use of them is governed by the express license under which they
// were provided to you ("License"). Unless the License provides otherwise,
// you may not use, modify, copy, publish, distribute, disclose or transmit
// this software or the related documents without Intel's prior written
// permission.
//
// This software and the related documents are provided as is, with no express
// or implied warranties, other than those that are expressly stated in the
// License.

//see dla_hld_ram.sv for a description of the parameters, ports, and general functionality of all the dla_hld_ram layers

//this is the top level for the bottom layers of dla_hld_ram
//the upper layers deal with value-added features like width/depth stitching to minimize physical memory usage
//the lower layers deal with hiding the complexity of Quartus IP and adding soft logic when hardened logic lacks functionality
//this layer selects which of the specific Quartus IP wrappers to use, and it models how much physical memory will be used

`default_nettype none

`include "dla_acl_parameter_assert.svh"

module dla_hld_ram_lower #(
    //geometry configuration
    parameter  int DEPTH,
    parameter  int WIDTH,
    parameter  int BE_WIDTH,
    parameter  int UTILIZED_WIDTH,  //this is for modelling the number of physical memories used, created at the bits per enable layer, adjusted as fewer geometries are allowed as we go down the layers

    //geometry constants
    parameter  bit MINIMIZE_MEMORY_USAGE,
    parameter  int MIN_PHYSICAL_DEPTH,

    //memory initialization
    parameter  bit USE_MEM_INIT_FILE,
    parameter  bit ZERO_INITIALIZE_MEM,
    parameter      MEM_INIT_NAME,

    //memory configuration
    parameter      RAM_BLOCK_TYPE,
    parameter      RAM_OPERATION_MODE,
    parameter      DEVICE_FAMILY,
    parameter      READ_DURING_WRITE,
    parameter  bit REGISTER_A_READDATA,
    parameter  bit REGISTER_B_ADDRESS,
    parameter  bit REGISTER_B_READDATA,

    //try to use memory hardened logic
    parameter  bit USE_ENABLE,
    parameter  bit COMMON_IN_CLOCK_EN,
    parameter  bit COMMON_OUT_CLOCK_EN,

    //derived parameters
    localparam int ADDR = $clog2(DEPTH)
) (
    input  wire                 clock,
    //no reset

    //port a
    input  wire      [ADDR-1:0] a_address,
    input  wire                 a_read_enable,
    input  wire                 a_write,
    input  wire     [WIDTH-1:0] a_writedata,
    input  wire  [BE_WIDTH-1:0] a_byteenable,
    output logic    [WIDTH-1:0] a_readdata,
    input  wire                 a_in_clock_en,
    input  wire                 a_out_clock_en,

    //port b
    input  wire      [ADDR-1:0] b_address,
    input  wire                 b_read_enable,
    input  wire                 b_write,
    input  wire     [WIDTH-1:0] b_writedata,
    input  wire  [BE_WIDTH-1:0] b_byteenable,
    output logic    [WIDTH-1:0] b_readdata,
    input  wire                 b_in_clock_en,
    input  wire                 b_out_clock_en
);

    ///////////////////////
    //  Legality checks  //
    ///////////////////////

    generate
    //check for non-trivial dimensions
    `DLA_ACL_PARAMETER_ASSERT(WIDTH >= 1)
    `DLA_ACL_PARAMETER_ASSERT(DEPTH >= 2)
    `DLA_ACL_PARAMETER_ASSERT(BE_WIDTH >= 1)

    //width / be_width must divide evenly with no remainder
    `DLA_ACL_PARAMETER_ASSERT(WIDTH % BE_WIDTH == 0)

    //if using byte enables, bits per enable must be physically supported
    `DLA_ACL_PARAMETER_ASSERT(BE_WIDTH == 1 || (WIDTH/BE_WIDTH) == 10)

    //depth must be a multiple of min physical depth
    `DLA_ACL_PARAMETER_ASSERT((DEPTH / MIN_PHYSICAL_DEPTH) * MIN_PHYSICAL_DEPTH == DEPTH);

    //check for a legal value of ram block type
    localparam bit RAM_BLOCK_TYPE_IS_M20K = RAM_BLOCK_TYPE == "M20K";
    localparam bit RAM_BLOCK_TYPE_IS_MLAB = RAM_BLOCK_TYPE == "MLAB";
    `DLA_ACL_PARAMETER_ASSERT(RAM_BLOCK_TYPE_IS_M20K || RAM_BLOCK_TYPE_IS_MLAB)

    //check for a legal value of ram operation mode
    localparam bit RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT = RAM_OPERATION_MODE == "SIMPLE_DUAL_PORT";
    localparam bit RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT   = RAM_OPERATION_MODE == "TRUE_DUAL_PORT";
    `DLA_ACL_PARAMETER_ASSERT(RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT || RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)

    //check for a legal value of device family
    localparam bit DEVICE_FAMILY_IS_C10 = DEVICE_FAMILY == "Cyclone 10 GX";
    localparam bit DEVICE_FAMILY_IS_A10 = DEVICE_FAMILY == "Arria 10";
    localparam bit DEVICE_FAMILY_IS_S10 = DEVICE_FAMILY == "Stratix 10";
    localparam bit DEVICE_FAMILY_IS_AGX = DEVICE_FAMILY == "Agilex";
    `DLA_ACL_PARAMETER_ASSERT(DEVICE_FAMILY_IS_C10 || DEVICE_FAMILY_IS_A10 || DEVICE_FAMILY_IS_S10 || DEVICE_FAMILY_IS_AGX)

    //check for a legal value of mixed port read during write mode
    localparam bit READ_DURING_WRITE_IS_DONT_CARE = READ_DURING_WRITE == "DONT_CARE";
    localparam bit READ_DURING_WRITE_IS_OLD_DATA  = READ_DURING_WRITE == "OLD_DATA";
    localparam bit READ_DURING_WRITE_IS_NEW_DATA  = READ_DURING_WRITE == "NEW_DATA";
    `DLA_ACL_PARAMETER_ASSERT(READ_DURING_WRITE_IS_DONT_CARE || READ_DURING_WRITE_IS_OLD_DATA || READ_DURING_WRITE_IS_NEW_DATA)

    //mlab and true dual port is illegal
    `DLA_ACL_PARAMETER_ASSERT(!RAM_BLOCK_TYPE_IS_MLAB || !RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)

    //m20k with unregistered address is illegal
    `DLA_ACL_PARAMETER_ASSERT(!RAM_BLOCK_TYPE_IS_M20K || REGISTER_B_ADDRESS)
    endgenerate



    //////////////////////////
    //  Derived parameters  //
    //////////////////////////

    //finish constructing the memory initialization file name by appending the .mif extension after the name modification done by upper layers
    localparam     MEM_INIT_FILE_NAME = {MEM_INIT_NAME, ".mif"};

    //limit the max physical depth used by altera_syncram, e.g. if we want 8k x 10, better to build it from 4k x 5 (tiled as 2x2) instead of 8k x 2 (tiled as 1x5)
    localparam bit DEVICE_FAMILY_A10_OR_OLDER = (DEVICE_FAMILY == "Cyclone 10 GX") || (DEVICE_FAMILY == "Arria 10");
    localparam int MAXIMUM_DEPTH = (MINIMIZE_MEMORY_USAGE && DEVICE_FAMILY_A10_OR_OLDER) ? 8*MIN_PHYSICAL_DEPTH : 0;  //if trying to minimize memory usage, altera_syncram physical depth should not exceed 4k



    ///////////////////////////////////////////////////
    //  Model how many physical memories are needed  //
    ///////////////////////////////////////////////////

    //determine the physical depth of the underlying hardened memory
    localparam int M20K_MAX_PHYSICAL_DEPTH = (MAXIMUM_DEPTH) ? MAXIMUM_DEPTH : (DEVICE_FAMILY_A10_OR_OLDER) ? 32*MIN_PHYSICAL_DEPTH : 4*MIN_PHYSICAL_DEPTH;
    localparam int MLAB_MAX_PHYSICAL_DEPTH = (DEVICE_FAMILY_A10_OR_OLDER) ? 2*MIN_PHYSICAL_DEPTH : MIN_PHYSICAL_DEPTH;
    localparam int MAX_PHYSICAL_DEPTH = (RAM_BLOCK_TYPE == "M20K") ? M20K_MAX_PHYSICAL_DEPTH : MLAB_MAX_PHYSICAL_DEPTH;
    localparam int DEPTH_ROUNDED_UP_TO_NEAREST_POWER_OF_TWO = 1 << $clog2(DEPTH);
    localparam int PHYSICAL_DEPTH = (DEPTH_ROUNDED_UP_TO_NEAREST_POWER_OF_TWO > MAX_PHYSICAL_DEPTH) ? MAX_PHYSICAL_DEPTH : DEPTH_ROUNDED_UP_TO_NEAREST_POWER_OF_TWO;

    //determine the physical width of the underlying hardened memory
    localparam int M = MIN_PHYSICAL_DEPTH;  //shorten the names to make enumerating the cases more compact, min physical depth is a power of 2 (either 512 or 32 for m20k or mlab, or overriden by simulation)
    localparam int D = PHYSICAL_DEPTH;      //guaranteed this is a power of 2, depth was rounded up to nearest power of 2, and max physical depth must be a power of 2
    localparam int M20K_PHYSICAL_WIDTH = (D==M) ? 40 : (D==2*M) ? 20 : (D==4*M) ? 10 : (D==8*M) ? 5 : (D==16*M)? 2 : 1; //if true dual port, depth was quantized to 2 * min physical depth, so width limited to 20
    localparam int MLAB_PHYSICAL_WIDTH = (D==M) ? 20 : 10;
    localparam int PHYSICAL_WIDTH = (RAM_BLOCK_TYPE == "M20K") ? M20K_PHYSICAL_WIDTH : MLAB_PHYSICAL_WIDTH;

    //how many physical copies are tiled in the x and y directions to cover the width and depth
    //using the raw width can be misleading, for example at depth 4k the physical width is 5, altera_syncram does not allow 5 bits per enable, so pad the data to 10 bits per enable as a workaround
    //if there were 2 byte enable signals (WIDTH=20 whereas UTILIZED_WIDTH=10), the width makes it look like 4 M20K are needed but actually only 2 M20K are synthesized
    localparam int DEPTH_PHYSICAL_TILING = (DEPTH + PHYSICAL_DEPTH - 1) / PHYSICAL_DEPTH;
    localparam int WIDTH_PHYSICAL_TILING = (UTILIZED_WIDTH + PHYSICAL_WIDTH - 1) / PHYSICAL_WIDTH;

    //resource usage
    localparam int NUM_PHYSICAL_M20K = (RAM_BLOCK_TYPE != "M20K") ? 0 : DEPTH_PHYSICAL_TILING * WIDTH_PHYSICAL_TILING;
    localparam int NUM_PHYSICAL_MLAB = (RAM_BLOCK_TYPE != "MLAB") ? 0 : DEPTH_PHYSICAL_TILING * WIDTH_PHYSICAL_TILING;

    //the layers above consume these localparam values by assigning them to an integer, intended for simulation only



    /////////////////////////////////////////////////
    //  Next layer in the instantiation hierarchy  //
    /////////////////////////////////////////////////

    generate
    if (RAM_BLOCK_TYPE_IS_M20K && RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT) begin : M20K_TDP
        dla_hld_ram_lower_m20k_true_dual_port
        #(
            .DEPTH                  (DEPTH),
            .WIDTH                  (WIDTH),
            .BE_WIDTH               (BE_WIDTH),
            .MAXIMUM_DEPTH          (MAXIMUM_DEPTH),
            .DEVICE_FAMILY          (DEVICE_FAMILY),
            .READ_DURING_WRITE      (READ_DURING_WRITE),
            .USE_ENABLE             (USE_ENABLE),
            .COMMON_IN_CLOCK_EN     (COMMON_IN_CLOCK_EN),
            .COMMON_OUT_CLOCK_EN    (COMMON_OUT_CLOCK_EN),
            .REGISTER_A_READDATA    (REGISTER_A_READDATA),
            .REGISTER_B_READDATA    (REGISTER_B_READDATA),
            .USE_MEM_INIT_FILE      (USE_MEM_INIT_FILE),
            .ZERO_INITIALIZE_MEM    (ZERO_INITIALIZE_MEM),
            .MEM_INIT_FILE_NAME     (MEM_INIT_FILE_NAME)
        )
        dla_hld_ram_lower_m20k_true_dual_port_inst
        (
            .clock                  (clock),
            .a_address              (a_address),
            .a_read_enable          (a_read_enable),
            .a_write                (a_write),
            .a_writedata            (a_writedata),
            .a_byteenable           (a_byteenable),
            .a_readdata             (a_readdata),
            .a_in_clock_en          (a_in_clock_en),
            .a_out_clock_en         (a_out_clock_en),
            .b_address              (b_address),
            .b_read_enable          (b_read_enable),
            .b_write                (b_write),
            .b_writedata            (b_writedata),
            .b_byteenable           (b_byteenable),
            .b_readdata             (b_readdata),
            .b_in_clock_en          (b_in_clock_en),
            .b_out_clock_en         (b_out_clock_en)
        );
    end
    if (RAM_BLOCK_TYPE_IS_M20K && RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT) begin : M20K_SDP
        dla_hld_ram_lower_m20k_simple_dual_port
        #(
            .DEPTH                  (DEPTH),
            .WIDTH                  (WIDTH),
            .BE_WIDTH               (BE_WIDTH),
            .MAXIMUM_DEPTH          (MAXIMUM_DEPTH),
            .DEVICE_FAMILY          (DEVICE_FAMILY),
            .READ_DURING_WRITE      (READ_DURING_WRITE),
            .USE_ENABLE             (USE_ENABLE),
            .COMMON_IN_CLOCK_EN     (COMMON_IN_CLOCK_EN),
            .REGISTER_B_READDATA    (REGISTER_B_READDATA),
            .USE_MEM_INIT_FILE      (USE_MEM_INIT_FILE),
            .ZERO_INITIALIZE_MEM    (ZERO_INITIALIZE_MEM),
            .MEM_INIT_FILE_NAME     (MEM_INIT_FILE_NAME)
        )
        dla_hld_ram_lower_m20k_simple_dual_port_inst
        (
            .clock                  (clock),
            .a_address              (a_address),
            .a_write                (a_write),
            .a_writedata            (a_writedata),
            .a_byteenable           (a_byteenable),
            .a_in_clock_en          (a_in_clock_en),
            .b_address              (b_address),
            .b_read_enable          (b_read_enable),
            .b_readdata             (b_readdata),
            .b_in_clock_en          (b_in_clock_en),
            .b_out_clock_en         (b_out_clock_en)
        );
    end
    if (RAM_BLOCK_TYPE_IS_MLAB && RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT) begin : MLAB
        dla_hld_ram_lower_mlab_simple_dual_port
        #(
            .DEPTH                  (DEPTH),
            .WIDTH                  (WIDTH),
            .BE_WIDTH               (BE_WIDTH),
            .DEVICE_FAMILY          (DEVICE_FAMILY),
            .READ_DURING_WRITE      (READ_DURING_WRITE),
            .REGISTER_B_ADDRESS     (REGISTER_B_ADDRESS),
            .REGISTER_B_READDATA    (REGISTER_B_READDATA),
            .USE_MEM_INIT_FILE      (USE_MEM_INIT_FILE),
            .MEM_INIT_FILE_NAME     (MEM_INIT_FILE_NAME)
        )
        dla_hld_ram_lower_mlab_simple_dual_port_inst
        (
            .clock                  (clock),
            .a_address              (a_address),
            .a_write                (a_write),
            .a_writedata            (a_writedata),
            .a_byteenable           (a_byteenable),
            .a_in_clock_en          (a_in_clock_en),
            .b_address              (b_address),
            .b_read_enable          (b_read_enable),
            .b_readdata             (b_readdata),
            .b_in_clock_en          (b_in_clock_en),
            .b_out_clock_en         (b_out_clock_en)
        );
    end
    endgenerate

endmodule

`default_nettype wire