1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
|
// Copyright 2020 Intel Corporation.
//
// This software and the related documents are Intel copyrighted materials,
// and your use of them is governed by the express license under which they
// were provided to you ("License"). Unless the License provides otherwise,
// you may not use, modify, copy, publish, distribute, disclose or transmit
// this software or the related documents without Intel's prior written
// permission.
//
// This software and the related documents are provided as is, with no express
// or implied warranties, other than those that are expressly stated in the
// License.
//see dla_hld_ram.sv for a description of the parameters, ports, and general functionality of all the dla_hld_ram layers
//this is the top level for the bottom layers of dla_hld_ram
//the upper layers deal with value-added features like width/depth stitching to minimize physical memory usage
//the lower layers deal with hiding the complexity of Quartus IP and adding soft logic when hardened logic lacks functionality
//this layer selects which of the specific Quartus IP wrappers to use, and it models how much physical memory will be used
`default_nettype none
`include "dla_acl_parameter_assert.svh"
module dla_hld_ram_lower #(
//geometry configuration
parameter int DEPTH,
parameter int WIDTH,
parameter int BE_WIDTH,
parameter int UTILIZED_WIDTH, //this is for modelling the number of physical memories used, created at the bits per enable layer, adjusted as fewer geometries are allowed as we go down the layers
//geometry constants
parameter bit MINIMIZE_MEMORY_USAGE,
parameter int MIN_PHYSICAL_DEPTH,
//memory initialization
parameter bit USE_MEM_INIT_FILE,
parameter bit ZERO_INITIALIZE_MEM,
parameter MEM_INIT_NAME,
//memory configuration
parameter RAM_BLOCK_TYPE,
parameter RAM_OPERATION_MODE,
parameter DEVICE_FAMILY,
parameter READ_DURING_WRITE,
parameter bit REGISTER_A_READDATA,
parameter bit REGISTER_B_ADDRESS,
parameter bit REGISTER_B_READDATA,
//try to use memory hardened logic
parameter bit USE_ENABLE,
parameter bit COMMON_IN_CLOCK_EN,
parameter bit COMMON_OUT_CLOCK_EN,
//derived parameters
localparam int ADDR = $clog2(DEPTH)
) (
input wire clock,
//no reset
//port a
input wire [ADDR-1:0] a_address,
input wire a_read_enable,
input wire a_write,
input wire [WIDTH-1:0] a_writedata,
input wire [BE_WIDTH-1:0] a_byteenable,
output logic [WIDTH-1:0] a_readdata,
input wire a_in_clock_en,
input wire a_out_clock_en,
//port b
input wire [ADDR-1:0] b_address,
input wire b_read_enable,
input wire b_write,
input wire [WIDTH-1:0] b_writedata,
input wire [BE_WIDTH-1:0] b_byteenable,
output logic [WIDTH-1:0] b_readdata,
input wire b_in_clock_en,
input wire b_out_clock_en
);
///////////////////////
// Legality checks //
///////////////////////
generate
//check for non-trivial dimensions
`DLA_ACL_PARAMETER_ASSERT(WIDTH >= 1)
`DLA_ACL_PARAMETER_ASSERT(DEPTH >= 2)
`DLA_ACL_PARAMETER_ASSERT(BE_WIDTH >= 1)
//width / be_width must divide evenly with no remainder
`DLA_ACL_PARAMETER_ASSERT(WIDTH % BE_WIDTH == 0)
//if using byte enables, bits per enable must be physically supported
`DLA_ACL_PARAMETER_ASSERT(BE_WIDTH == 1 || (WIDTH/BE_WIDTH) == 10)
//depth must be a multiple of min physical depth
`DLA_ACL_PARAMETER_ASSERT((DEPTH / MIN_PHYSICAL_DEPTH) * MIN_PHYSICAL_DEPTH == DEPTH);
//check for a legal value of ram block type
localparam bit RAM_BLOCK_TYPE_IS_M20K = RAM_BLOCK_TYPE == "M20K";
localparam bit RAM_BLOCK_TYPE_IS_MLAB = RAM_BLOCK_TYPE == "MLAB";
`DLA_ACL_PARAMETER_ASSERT(RAM_BLOCK_TYPE_IS_M20K || RAM_BLOCK_TYPE_IS_MLAB)
//check for a legal value of ram operation mode
localparam bit RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT = RAM_OPERATION_MODE == "SIMPLE_DUAL_PORT";
localparam bit RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT = RAM_OPERATION_MODE == "TRUE_DUAL_PORT";
`DLA_ACL_PARAMETER_ASSERT(RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT || RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)
//check for a legal value of device family
localparam bit DEVICE_FAMILY_IS_C10 = DEVICE_FAMILY == "Cyclone 10 GX";
localparam bit DEVICE_FAMILY_IS_A10 = DEVICE_FAMILY == "Arria 10";
localparam bit DEVICE_FAMILY_IS_S10 = DEVICE_FAMILY == "Stratix 10";
localparam bit DEVICE_FAMILY_IS_AGX = DEVICE_FAMILY == "Agilex";
`DLA_ACL_PARAMETER_ASSERT(DEVICE_FAMILY_IS_C10 || DEVICE_FAMILY_IS_A10 || DEVICE_FAMILY_IS_S10 || DEVICE_FAMILY_IS_AGX)
//check for a legal value of mixed port read during write mode
localparam bit READ_DURING_WRITE_IS_DONT_CARE = READ_DURING_WRITE == "DONT_CARE";
localparam bit READ_DURING_WRITE_IS_OLD_DATA = READ_DURING_WRITE == "OLD_DATA";
localparam bit READ_DURING_WRITE_IS_NEW_DATA = READ_DURING_WRITE == "NEW_DATA";
`DLA_ACL_PARAMETER_ASSERT(READ_DURING_WRITE_IS_DONT_CARE || READ_DURING_WRITE_IS_OLD_DATA || READ_DURING_WRITE_IS_NEW_DATA)
//mlab and true dual port is illegal
`DLA_ACL_PARAMETER_ASSERT(!RAM_BLOCK_TYPE_IS_MLAB || !RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT)
//m20k with unregistered address is illegal
`DLA_ACL_PARAMETER_ASSERT(!RAM_BLOCK_TYPE_IS_M20K || REGISTER_B_ADDRESS)
endgenerate
//////////////////////////
// Derived parameters //
//////////////////////////
//finish constructing the memory initialization file name by appending the .mif extension after the name modification done by upper layers
localparam MEM_INIT_FILE_NAME = {MEM_INIT_NAME, ".mif"};
//limit the max physical depth used by altera_syncram, e.g. if we want 8k x 10, better to build it from 4k x 5 (tiled as 2x2) instead of 8k x 2 (tiled as 1x5)
localparam bit DEVICE_FAMILY_A10_OR_OLDER = (DEVICE_FAMILY == "Cyclone 10 GX") || (DEVICE_FAMILY == "Arria 10");
localparam int MAXIMUM_DEPTH = (MINIMIZE_MEMORY_USAGE && DEVICE_FAMILY_A10_OR_OLDER) ? 8*MIN_PHYSICAL_DEPTH : 0; //if trying to minimize memory usage, altera_syncram physical depth should not exceed 4k
///////////////////////////////////////////////////
// Model how many physical memories are needed //
///////////////////////////////////////////////////
//determine the physical depth of the underlying hardened memory
localparam int M20K_MAX_PHYSICAL_DEPTH = (MAXIMUM_DEPTH) ? MAXIMUM_DEPTH : (DEVICE_FAMILY_A10_OR_OLDER) ? 32*MIN_PHYSICAL_DEPTH : 4*MIN_PHYSICAL_DEPTH;
localparam int MLAB_MAX_PHYSICAL_DEPTH = (DEVICE_FAMILY_A10_OR_OLDER) ? 2*MIN_PHYSICAL_DEPTH : MIN_PHYSICAL_DEPTH;
localparam int MAX_PHYSICAL_DEPTH = (RAM_BLOCK_TYPE == "M20K") ? M20K_MAX_PHYSICAL_DEPTH : MLAB_MAX_PHYSICAL_DEPTH;
localparam int DEPTH_ROUNDED_UP_TO_NEAREST_POWER_OF_TWO = 1 << $clog2(DEPTH);
localparam int PHYSICAL_DEPTH = (DEPTH_ROUNDED_UP_TO_NEAREST_POWER_OF_TWO > MAX_PHYSICAL_DEPTH) ? MAX_PHYSICAL_DEPTH : DEPTH_ROUNDED_UP_TO_NEAREST_POWER_OF_TWO;
//determine the physical width of the underlying hardened memory
localparam int M = MIN_PHYSICAL_DEPTH; //shorten the names to make enumerating the cases more compact, min physical depth is a power of 2 (either 512 or 32 for m20k or mlab, or overriden by simulation)
localparam int D = PHYSICAL_DEPTH; //guaranteed this is a power of 2, depth was rounded up to nearest power of 2, and max physical depth must be a power of 2
localparam int M20K_PHYSICAL_WIDTH = (D==M) ? 40 : (D==2*M) ? 20 : (D==4*M) ? 10 : (D==8*M) ? 5 : (D==16*M)? 2 : 1; //if true dual port, depth was quantized to 2 * min physical depth, so width limited to 20
localparam int MLAB_PHYSICAL_WIDTH = (D==M) ? 20 : 10;
localparam int PHYSICAL_WIDTH = (RAM_BLOCK_TYPE == "M20K") ? M20K_PHYSICAL_WIDTH : MLAB_PHYSICAL_WIDTH;
//how many physical copies are tiled in the x and y directions to cover the width and depth
//using the raw width can be misleading, for example at depth 4k the physical width is 5, altera_syncram does not allow 5 bits per enable, so pad the data to 10 bits per enable as a workaround
//if there were 2 byte enable signals (WIDTH=20 whereas UTILIZED_WIDTH=10), the width makes it look like 4 M20K are needed but actually only 2 M20K are synthesized
localparam int DEPTH_PHYSICAL_TILING = (DEPTH + PHYSICAL_DEPTH - 1) / PHYSICAL_DEPTH;
localparam int WIDTH_PHYSICAL_TILING = (UTILIZED_WIDTH + PHYSICAL_WIDTH - 1) / PHYSICAL_WIDTH;
//resource usage
localparam int NUM_PHYSICAL_M20K = (RAM_BLOCK_TYPE != "M20K") ? 0 : DEPTH_PHYSICAL_TILING * WIDTH_PHYSICAL_TILING;
localparam int NUM_PHYSICAL_MLAB = (RAM_BLOCK_TYPE != "MLAB") ? 0 : DEPTH_PHYSICAL_TILING * WIDTH_PHYSICAL_TILING;
//the layers above consume these localparam values by assigning them to an integer, intended for simulation only
/////////////////////////////////////////////////
// Next layer in the instantiation hierarchy //
/////////////////////////////////////////////////
generate
if (RAM_BLOCK_TYPE_IS_M20K && RAM_OPERATION_MODE_IS_TRUE_DUAL_PORT) begin : M20K_TDP
dla_hld_ram_lower_m20k_true_dual_port
#(
.DEPTH (DEPTH),
.WIDTH (WIDTH),
.BE_WIDTH (BE_WIDTH),
.MAXIMUM_DEPTH (MAXIMUM_DEPTH),
.DEVICE_FAMILY (DEVICE_FAMILY),
.READ_DURING_WRITE (READ_DURING_WRITE),
.USE_ENABLE (USE_ENABLE),
.COMMON_IN_CLOCK_EN (COMMON_IN_CLOCK_EN),
.COMMON_OUT_CLOCK_EN (COMMON_OUT_CLOCK_EN),
.REGISTER_A_READDATA (REGISTER_A_READDATA),
.REGISTER_B_READDATA (REGISTER_B_READDATA),
.USE_MEM_INIT_FILE (USE_MEM_INIT_FILE),
.ZERO_INITIALIZE_MEM (ZERO_INITIALIZE_MEM),
.MEM_INIT_FILE_NAME (MEM_INIT_FILE_NAME)
)
dla_hld_ram_lower_m20k_true_dual_port_inst
(
.clock (clock),
.a_address (a_address),
.a_read_enable (a_read_enable),
.a_write (a_write),
.a_writedata (a_writedata),
.a_byteenable (a_byteenable),
.a_readdata (a_readdata),
.a_in_clock_en (a_in_clock_en),
.a_out_clock_en (a_out_clock_en),
.b_address (b_address),
.b_read_enable (b_read_enable),
.b_write (b_write),
.b_writedata (b_writedata),
.b_byteenable (b_byteenable),
.b_readdata (b_readdata),
.b_in_clock_en (b_in_clock_en),
.b_out_clock_en (b_out_clock_en)
);
end
if (RAM_BLOCK_TYPE_IS_M20K && RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT) begin : M20K_SDP
dla_hld_ram_lower_m20k_simple_dual_port
#(
.DEPTH (DEPTH),
.WIDTH (WIDTH),
.BE_WIDTH (BE_WIDTH),
.MAXIMUM_DEPTH (MAXIMUM_DEPTH),
.DEVICE_FAMILY (DEVICE_FAMILY),
.READ_DURING_WRITE (READ_DURING_WRITE),
.USE_ENABLE (USE_ENABLE),
.COMMON_IN_CLOCK_EN (COMMON_IN_CLOCK_EN),
.REGISTER_B_READDATA (REGISTER_B_READDATA),
.USE_MEM_INIT_FILE (USE_MEM_INIT_FILE),
.ZERO_INITIALIZE_MEM (ZERO_INITIALIZE_MEM),
.MEM_INIT_FILE_NAME (MEM_INIT_FILE_NAME)
)
dla_hld_ram_lower_m20k_simple_dual_port_inst
(
.clock (clock),
.a_address (a_address),
.a_write (a_write),
.a_writedata (a_writedata),
.a_byteenable (a_byteenable),
.a_in_clock_en (a_in_clock_en),
.b_address (b_address),
.b_read_enable (b_read_enable),
.b_readdata (b_readdata),
.b_in_clock_en (b_in_clock_en),
.b_out_clock_en (b_out_clock_en)
);
end
if (RAM_BLOCK_TYPE_IS_MLAB && RAM_OPERATION_MODE_IS_SIMPLE_DUAL_PORT) begin : MLAB
dla_hld_ram_lower_mlab_simple_dual_port
#(
.DEPTH (DEPTH),
.WIDTH (WIDTH),
.BE_WIDTH (BE_WIDTH),
.DEVICE_FAMILY (DEVICE_FAMILY),
.READ_DURING_WRITE (READ_DURING_WRITE),
.REGISTER_B_ADDRESS (REGISTER_B_ADDRESS),
.REGISTER_B_READDATA (REGISTER_B_READDATA),
.USE_MEM_INIT_FILE (USE_MEM_INIT_FILE),
.MEM_INIT_FILE_NAME (MEM_INIT_FILE_NAME)
)
dla_hld_ram_lower_mlab_simple_dual_port_inst
(
.clock (clock),
.a_address (a_address),
.a_write (a_write),
.a_writedata (a_writedata),
.a_byteenable (a_byteenable),
.a_in_clock_en (a_in_clock_en),
.b_address (b_address),
.b_read_enable (b_read_enable),
.b_readdata (b_readdata),
.b_in_clock_en (b_in_clock_en),
.b_out_clock_en (b_out_clock_en)
);
end
endgenerate
endmodule
`default_nettype wire
|