summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_dma.sv
blob: a3a133e1cb5a91f60e43d0f05cf0b1580999100e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
// Copyright 2020-2022 Intel Corporation.
//
// This software and the related documents are Intel copyrighted materials,
// and your use of them is governed by the express license under which they
// were provided to you ("License"). Unless the License provides otherwise,
// you may not use, modify, copy, publish, distribute, disclose or transmit
// this software or the related documents without Intel's prior written
// permission.
//
// This software and the related documents are provided as is, with no express
// or implied warranties, other than those that are expressly stated in the
// License.


// This is the top level module for coreDLA DMA.
//
// This module includes the following:
// - CSR (which contains interrupt generating logic as well as descriptor queue)
// - config reader (which provides input data to config network)
// - filter reader (which provides input data to filter caches)
// - feature reader (which provides input data to stream buffers)
// - DDR arbitration of the readers
// - feature writer (which accepts output data to write to DDR)
//
// The FD and a highly illustrative design review are available at:
// https://sharepoint.amr.ith.intel.com/sites/DLA/Shared%20Documents/Forms/AllItems.aspx?RootFolder=%2Fsites%2FDLA%2FShared%20Documents%2FcoreDLA%2FFDs%5FTest%5FPlans%2FDMA%5FFD
//
// Outside of coreDLA DMA, one still needs to arbitrate DDR between PCIe and DLA.
// All interfaces of coreDLA DMA are AXI, which means if the outside world favors
// AvalonMM then the conversion of interfaces must be done outside of coreDLA.
// The testbench uses AXI BFMs for DDR and PCIe, inside these bus functional models
// are synthesizable code that converts between AXI and Avalon and then uses an
// Avalon BFM.
//
// DMA IP depends on other IP blocks, which are listed below:
// - dla/fpga/hld_lsu/rtl
// - dla/fpga/hld_fifo/rtl
// - dla/fpga/acl_dcfifo/rtl
// - dla/fpga/acl_reset_handler/rtl
//
// Not all AXI ports have been implemented. The following assumptions have been made:
// - id can be assumed to be 0 if not provided (only arid on the ddr interface is provided because there are 3 readers)
// - burst length is limited to 2**DDR_BURST_WIDTH words in a burst even through the signal width is 8 (as per the axi4 spec)
// - lock is not used (no atomic access), encoded as 2'b00
// - cache is not used, device non-bufferable is encoded as 4'b0000
// - prot is not used, no need to protect against illegal transactions, encode as 3'b000
// - qos is not used, encode as 4'b0000 to indicate no participation in any quality of service scheme
// - region is not used, no need for multiple regions, encode as 4'b0000
// - user sideband signals are not used
// - if wstrb (byte enable) is not provided, assume all bytes are updated during a write, encode as all ones
// - response value is ignored for both reads and writes, status assumed to be okay, encoded as 2'h00
// - read last (last word in a burst) value is ignored since number of words in each burst is tracked internally
// - low power interface signals are not used
//
// Unrelated to the AXI spec itself, beware of the following restrictions on the AXI interfaces:
// - all addresses are word aligned (required by AvalonMM, not required by AXI)
//   - csr is a slave, it expects the master to produce such transactions
//   - ddr is a master, this simplification can be exploited by the slave e.g. an AXI to AvalonMM converter
// - bursts will never cross a burst boundary, e.g. if max burst size is 1024 bytes, no burst will ever cross a 1024 byte boundary
//
// This module will internally bias in favour of writes to DDR rather than reads.  This is a performance optimization
// for the IP core.  The readers in the core will happily continue reading even if the compute engine is stalled due
// to a full output fifo, so it is believed that keeping the (expensive) output fifo empty is more important than
// pre-loading the input buffers (which tend to be large anyways in order to avoid DDR reads entirely).  (Limited
// anecdotal evidence supports this hypothesis, as well).
//
// Note that the CoreDLA IP has never been tested without the write-over-read priority.  It is not impossible that
// there is a subtle hidden functional requirement for this prioritization (but it seems unlikely except in the
// case of an extreme imbalance against writes).
//

`resetall
`undefineall
`default_nettype none
`include "dla_acl_parameter_assert.svh"

module dla_dma import dla_dma_pkg::*; #(
    parameter int CSR_ADDR_WIDTH,               //width of the byte address signal, determines CSR address space size, e.g. 11 bit address = 2048 bytes, the largest size that uses only 1 M20K
    parameter int CSR_DATA_BYTES,               //width of the CSR data path, typically 4 bytes
    parameter int CONFIG_DATA_BYTES,            //data width of the config network output port, typically 4 bytes
    parameter int CONFIG_READER_DATA_BYTES,     //data width of the config network input port, typically 8 bytes
    parameter int FILTER_READER_DATA_BYTES,     //data width of the filter reader, typically a whole DDR word (assuming block floating point, C_VECTOR=16 so 4 filter words packed into 1 DDR word)
    parameter int FEATURE_READER_DATA_BYTES,    //data width of the feature reader, typically half of a DDR word for C_VECTOR=16 (assuming FP16 or smaller)
    parameter int FEATURE_WRITER_DATA_BYTES,    //data width of the feature writer, typically half of a DDR word for C_VECTOR=16 (assuming FP16 or smaller)
    parameter int DDR_ADDR_WIDTH,               //width of all byte address signals to global memory, 32 would allow 4 GB of addressable memory
    parameter int DDR_BURST_WIDTH,              //internal width of the axi burst length signal, typically 4, max number of words in a burst = 2**DDR_BURST_WIDTH
    parameter int DDR_DATA_BYTES,               //width of the global memory data path, must be a power of 2, typically 64 bytes
    parameter int DDR_READ_ID_WIDTH,            //width of the AXI ID signal for DDR reads, must be 2 since there are 3 read masters

    parameter bit ENABLE_INPUT_STREAMING,
    parameter bit ENABLE_OUTPUT_STREAMING,

    parameter dla_common_pkg::device_family_t DEVICE, //enumerated device value, required for dma writer
    parameter dla_lt_pkg::lt_arch_t LT_ARCH = '{default:0},        // the arch for the dedicated layout transform (if it exists)
    parameter bit ENABLE_ON_CHIP_PARAMETERS,    // Whether configs and filters are on-chip, meaning config reader and filter reader can be disabled

    //derived parameters and constants
    localparam int AXI_BURST_LENGTH_WIDTH = 8,  //width of the axi burst length signal as per the axi4 spec
    localparam int AXI_BURST_SIZE_WIDTH = 3,    //width of the axi burst size signal as per the axi4 spec
    localparam int AXI_BURST_TYPE_WIDTH = 2     //width of the axi burst type signal as per the axi4 spec
) (
    input  wire                                     clk_ddr,
    input  wire                                     clk_dla,
    input  wire                                     clk_pcie,
    input  wire                                     i_resetn_async,     //active low reset that has NOT been synchronized to any clock

    //interrupt request, AXI4 stream master without data, runs on pcie clock
    output logic                                    o_interrupt_level,

    //dla can report an error to dma csr by asserting this for one clock cycle, runs on ddr clock
    input  wire                                     i_token_error,

    //CSR can request a reset of the DLA IP by asserting this signal (held until reset), runs on ddr clock
    output logic                                    o_request_ip_reset,

    input wire                                      i_stream_started, //indicates that the first word of the input stream is being read this cycle
    input wire                                      i_stream_done, //indcates that the output streamer is done writing the output feature

    // Indicates when input feeder received the first word (aka input streamer sent the first word)
    // and xbar sent the last word (aka output streamer received the last word)
    input wire                                      i_stream_received_first_word,
    input wire                                      i_stream_sent_last_word,

    //CSR, AXI4 lite slave, runs on ddr clock
    input  wire                                     i_csr_arvalid,
    input  wire                [CSR_ADDR_WIDTH-1:0] i_csr_araddr,
    output logic                                    o_csr_arready,
    output logic                                    o_csr_rvalid,
    output logic             [8*CSR_DATA_BYTES-1:0] o_csr_rdata,
    input  wire                                     i_csr_rready,
    input  wire                                     i_csr_awvalid,
    input  wire                [CSR_ADDR_WIDTH-1:0] i_csr_awaddr,
    output logic                                    o_csr_awready,
    input  wire                                     i_csr_wvalid,
    input  wire              [8*CSR_DATA_BYTES-1:0] i_csr_wdata,
    output logic                                    o_csr_wready,
    output logic                                    o_csr_bvalid,
    input  wire                                     i_csr_bready,

    //config reader data, AXI4 stream master, runs on dla clock
    output logic                                    o_config_reader_valid,
    output logic   [8*CONFIG_READER_DATA_BYTES-1:0] o_config_reader_data,
    input  wire                                     i_config_reader_ready,

    //config for filter reader, AXI4 stream slave, runs on ddr clock
    input  wire                                     i_config_filter_reader_valid,
    input  wire           [8*CONFIG_DATA_BYTES-1:0] i_config_filter_reader_data,
    output logic                                    o_config_filter_reader_ready,

    //filter reader data, AXI4 stream master, runs on dla clock
    output logic                                    o_filter_reader_valid,
    output logic   [8*FILTER_READER_DATA_BYTES-1:0] o_filter_reader_data,
    input  wire                                     i_filter_reader_ready,

    //config for feature reader, AXI4 stream slave, runs on ddr clock
    input  wire                                     i_config_feature_reader_valid,
    input  wire           [8*CONFIG_DATA_BYTES-1:0] i_config_feature_reader_data,
    output logic                                    o_config_feature_reader_ready,

    //config for layout transform
    input wire                                      i_config_lt_reader_valid,
    input wire            [8*CONFIG_DATA_BYTES-1:0] i_config_lt_reader_data,
    output logic                                    o_config_lt_reader_ready,

    //feature reader data, AXI4 stream master, runs on dla clock
    output logic                                    o_feature_reader_valid,
    output logic  [8*FEATURE_READER_DATA_BYTES-1:0] o_feature_reader_data,
    input  wire                                     i_feature_reader_ready,

    //config for feature writer, AXI4 stream slave, runs on ddr clock
    input  wire                                     i_config_feature_writer_valid,
    input  wire           [8*CONFIG_DATA_BYTES-1:0] i_config_feature_writer_data,
    output logic                                    o_config_feature_writer_ready,

    //feature writer data, AXI4 stream slave, runs on ddr clock
    input  wire                                     i_feature_writer_valid,
    input  wire   [8*FEATURE_WRITER_DATA_BYTES-1:0] i_feature_writer_data,
    output logic                                    o_feature_writer_ready,

    //debug network AXI-4 lite interface, read request and read response channels, runs on dla_clock
    output logic                                    o_debug_network_arvalid,
    output logic             [8*CSR_DATA_BYTES-1:0] o_debug_network_araddr,
    input  wire                                     i_debug_network_arready,
    input  wire                                     i_debug_network_rvalid,
    input  wire              [8*CSR_DATA_BYTES-1:0] i_debug_network_rdata,
    output logic                                    o_debug_network_rready,

    //global memory, AXI4 master, runs on ddr clock
    output logic                                    o_ddr_arvalid,
    output logic               [DDR_ADDR_WIDTH-1:0] o_ddr_araddr,
    output logic       [AXI_BURST_LENGTH_WIDTH-1:0] o_ddr_arlen,
    output logic         [AXI_BURST_SIZE_WIDTH-1:0] o_ddr_arsize,
    output logic         [AXI_BURST_TYPE_WIDTH-1:0] o_ddr_arburst,
    output logic            [DDR_READ_ID_WIDTH-1:0] o_ddr_arid,
    input  wire                                     i_ddr_arready,
    input  wire                                     i_ddr_rvalid,
    input  wire              [8*DDR_DATA_BYTES-1:0] i_ddr_rdata,
    input  wire             [DDR_READ_ID_WIDTH-1:0] i_ddr_rid,
    output logic                                    o_ddr_rready,
    output logic                                    o_ddr_awvalid,
    output logic               [DDR_ADDR_WIDTH-1:0] o_ddr_awaddr,
    output logic       [AXI_BURST_LENGTH_WIDTH-1:0] o_ddr_awlen,
    output logic         [AXI_BURST_SIZE_WIDTH-1:0] o_ddr_awsize,
    output logic         [AXI_BURST_TYPE_WIDTH-1:0] o_ddr_awburst,
    input  wire                                     i_ddr_awready,
    output logic                                    o_ddr_wvalid,
    output logic             [8*DDR_DATA_BYTES-1:0] o_ddr_wdata,
    output logic               [DDR_DATA_BYTES-1:0] o_ddr_wstrb,
    output logic                                    o_ddr_wlast,
    input  wire                                     i_ddr_wready,
    input  wire                                     i_ddr_bvalid,
    output logic                                    o_ddr_bready,
    output logic                                    o_streaming_active
);

    /////////////////////////////////
    //  Parameter legality checks  //
    /////////////////////////////////
    //do not allow number of words per burst to exceed the axi spec (even through the LSU will behave just fine)
    `DLA_ACL_PARAMETER_ASSERT(DDR_BURST_WIDTH <= AXI_BURST_LENGTH_WIDTH)

    //id width on the ddr interface is a parameter instead of localparam only so that if the value changes,
    //then it can be changed in one place instead all everywhere the signal width is used
    //3 readers requires
    `DLA_ACL_PARAMETER_ASSERT(DDR_READ_ID_WIDTH == 2)

    //data width is limited by the axi spec
    `DLA_ACL_PARAMETER_ASSERT(DDR_DATA_BYTES >= 1 && DDR_DATA_BYTES <= 128)

    //load-store units require a power of 2 width for the global memory interface
    `DLA_ACL_PARAMETER_ASSERT(DDR_DATA_BYTES == 2**$clog2(DDR_DATA_BYTES))


    ///////////////
    //  Signals  //
    ///////////////

    //reset
    logic                           ddr_sclrn;

    //feature writer reports it is done, goes to csr and feature reader
    logic                           token_done_csr, token_done_reader;

    logic                           license_flag;
    logic                           writer_error;

    //csr to config reader or to ddrfree config network (langsu: latter is not implemented yet)
    logic [8*CONFIG_DATA_BYTES-1:0] csr_config_data;
    logic                           csr_config_valid, csr_config_for_intercept, csr_config_ready;

    //lsu to read arb
    logic                           lsu_ddr_arvalid [NUM_READERS-1:0];
    logic      [DDR_ADDR_WIDTH-1:0] lsu_ddr_araddr  [NUM_READERS-1:0];
    logic     [DDR_BURST_WIDTH-1:0] lsu_ddr_arlen   [NUM_READERS-1:0];
    logic                           lsu_ddr_arready [NUM_READERS-1:0];
    logic                           lsu_ddr_rvalid  [NUM_READERS-1:0];
    logic    [8*DDR_DATA_BYTES-1:0] lsu_ddr_rdata   [NUM_READERS-1:0];
    logic                           lsu_ddr_rready  [NUM_READERS-1:0];

    //favor writes over reads for ddr; see the comment block by the
    //combinational logic associated with these signals for the explanation
    //of their naming.
    logic                           dma_prevcycle_read_not_acknowledged;
    logic                           write_overrides_read;
    logic                           rawp_ddr_awvalid, rawp_ddr_wvalid;
    logic                           rawf_ddr_awready, rawf_ddr_wready;
    logic                           rawp_ddr_arvalid;
    logic                           rawf_ddr_arready;

    //axi spec requires a signal width of 8 for burst length
    logic     [DDR_BURST_WIDTH-1:0] raw_ddr_arlen;
    logic     [DDR_BURST_WIDTH-1:0] raw_ddr_awlen;

    //used to backpressure ddrfree config network read
    logic                           streaming_reload;
    logic                           lt_param_error;

    /////////////////////////////
    //  Reset Synchronization  //
    /////////////////////////////

    dla_reset_handler_simple #(
        .USE_SYNCHRONIZER   (RESET_USE_SYNCHRONIZER),
        .PIPE_DEPTH         (RESET_PIPE_DEPTH),
        .NUM_COPIES         (RESET_NUM_COPIES)
    )
    ddr_reset_synchronizer
    (
        .clk                (clk_ddr),
        .i_resetn           (i_resetn_async),
        .o_sclrn            (ddr_sclrn)
    );



    ///////////
    //  CSR  //
    ///////////

    //includes register interface for host control as well as interrupt
    //contains the descriptor queue for providing work to the config reader

    dla_dma_csr #(
        .CSR_ADDR_WIDTH             (CSR_ADDR_WIDTH),
        .CSR_DATA_BYTES             (CSR_DATA_BYTES),
        .CONFIG_DATA_BYTES          (CONFIG_DATA_BYTES),
        .CONFIG_READER_DATA_BYTES   (CONFIG_READER_DATA_BYTES),
        .ENABLE_INPUT_STREAMING     (ENABLE_INPUT_STREAMING),
        .ENABLE_OUTPUT_STREAMING    (ENABLE_OUTPUT_STREAMING),
        .ENABLE_ON_CHIP_PARAMETERS  (ENABLE_ON_CHIP_PARAMETERS)
    )
    csr
    (
        .clk_ddr                    (clk_ddr),
        .clk_pcie                   (clk_pcie),
        .clk_dla                    (clk_dla),
        .i_sclrn_ddr                (ddr_sclrn),
        .i_resetn_async             (i_resetn_async),
        .i_token_done               (token_done_csr | (i_stream_done & ENABLE_OUTPUT_STREAMING)),
        .i_token_stream_started     (i_stream_started),
        .i_token_error              (i_token_error | lt_param_error),
        .i_stream_received_first_word (i_stream_received_first_word),
        .i_stream_sent_last_word      (i_stream_sent_last_word),
        .i_license_flag             (license_flag),
        .i_token_out_of_inferences  (writer_error),
        .i_input_feature_rvalid    (lsu_ddr_rvalid[FEATURE_READER_ID]),
        .i_input_feature_rready    (lsu_ddr_rready[FEATURE_READER_ID]),
        .i_input_filter_rvalid     (lsu_ddr_rvalid[FILTER_READER_ID]),
        .i_input_filter_rready     (lsu_ddr_rready[FILTER_READER_ID]),
        .i_output_feature_wvalid   (rawp_ddr_wvalid),
        .i_output_feature_wready   (rawf_ddr_wready),
        .o_interrupt_level          (o_interrupt_level),
        .o_config_valid             (csr_config_valid),
        .o_config_data              (csr_config_data),
        .o_config_for_intercept     (csr_config_for_intercept),
        .i_config_ready             (csr_config_ready),
        .o_debug_network_arvalid    (o_debug_network_arvalid),
        .o_debug_network_araddr     (o_debug_network_araddr),
        .i_debug_network_arready    (i_debug_network_arready),
        .i_debug_network_rvalid     (i_debug_network_rvalid),
        .i_debug_network_rdata      (i_debug_network_rdata),
        .o_debug_network_rready     (o_debug_network_rready),
        .i_csr_arvalid              (i_csr_arvalid),
        .i_csr_araddr               (i_csr_araddr),
        .o_csr_arready              (o_csr_arready),
        .o_csr_rvalid               (o_csr_rvalid),
        .o_csr_rdata                (o_csr_rdata),
        .i_csr_rready               (i_csr_rready),
        .i_csr_awvalid              (i_csr_awvalid),
        .i_csr_awaddr               (i_csr_awaddr),
        .o_csr_awready              (o_csr_awready),
        .i_csr_wvalid               (i_csr_wvalid),
        .i_csr_wdata                (i_csr_wdata),
        .o_csr_wready               (o_csr_wready),
        .o_csr_bvalid               (o_csr_bvalid),
        .i_csr_bready               (i_csr_bready),
        .o_request_ip_reset         (o_request_ip_reset),
        .o_streaming_active         (o_streaming_active)
    );



    /////////////////////
    //  Config reader  //
    /////////////////////

    //the config interface of the generic dma reader comes from the descriptor queue inside the csr
    //output data interface of the generic dma reader serves as the input for the config network

    if (~ENABLE_ON_CHIP_PARAMETERS) begin
        dla_dma_reader #(
            .READER_WRITER_SEL          (CONFIG_READER_ID),
            .IS_CONFIG_READER           (1),
            .NUM_DIMENSIONS             (CONFIG_READER_NUM_DIMENSIONS),
            .CONFIG_DATA_BYTES          (CONFIG_DATA_BYTES),
            .READER_DATA_BYTES          (CONFIG_READER_DATA_BYTES),
            .DDR_ADDR_WIDTH             (DDR_ADDR_WIDTH),
            .DDR_DATA_BYTES             (DDR_DATA_BYTES),
            .DDR_BURST_WIDTH            (DDR_BURST_WIDTH),
            .LT_ARCH                    (LT_ARCH)
        )
        config_reader
        (
            .clk_ddr                    (clk_ddr),
            .clk_dla                    (clk_dla),
            .i_sclrn_ddr                (ddr_sclrn),
            .i_resetn_async             (i_resetn_async),
            .i_config_valid             (csr_config_valid),
            .i_config_data              (csr_config_data),
            .i_config_for_intercept     (csr_config_for_intercept),
            .o_config_ready             (csr_config_ready),  // config reader is ready to receive data from csr
            .i_token_can_start          (1'b0), //config data is read only, there is no data dependency that would prevent the config reader from starting
            .o_reader_valid             (o_config_reader_valid),  // config data read is valid
            .o_reader_data              (o_config_reader_data),   // config data read from reader
            .i_reader_ready             (i_config_reader_ready),  // config network is ready to receive config data
            .o_ddr_arvalid              (lsu_ddr_arvalid[CONFIG_READER_ID]),
            .o_ddr_araddr               (lsu_ddr_araddr [CONFIG_READER_ID]),
            .o_ddr_arlen                (lsu_ddr_arlen  [CONFIG_READER_ID]),
            .i_ddr_arready              (lsu_ddr_arready[CONFIG_READER_ID]),
            .i_ddr_rvalid               (lsu_ddr_rvalid [CONFIG_READER_ID]),
            .i_ddr_rdata                (lsu_ddr_rdata  [CONFIG_READER_ID]),
            .o_ddr_rready               (lsu_ddr_rready [CONFIG_READER_ID])
        );
    end else begin
        // Indicate config_reader is ready to receive data, but we don't care.
        assign lsu_ddr_rready [CONFIG_READER_ID] = 1'b0;
        // we don't care if read addr to arbitar is valid or not
        assign lsu_ddr_arvalid[CONFIG_READER_ID] = 1'b0;
        // Don't care. config_network doesn't check valid to start ddrfree config read
        assign o_config_reader_valid = 1'b0;
        // Set to 1 to not stall descriptor_queue
        assign csr_config_ready = 1'b1;
    end


    /////////////////////
    //  Filter reader  //
    /////////////////////

    if (~ENABLE_ON_CHIP_PARAMETERS) begin
        dla_dma_reader #(
            .READER_WRITER_SEL          (FILTER_READER_ID),
            .IS_CONFIG_READER           (0),
            .NUM_DIMENSIONS             (FILTER_READER_NUM_DIMENSIONS),
            .CONFIG_DATA_BYTES          (CONFIG_DATA_BYTES),
            .READER_DATA_BYTES          (FILTER_READER_DATA_BYTES),
            .DDR_ADDR_WIDTH             (DDR_ADDR_WIDTH),
            .DDR_DATA_BYTES             (DDR_DATA_BYTES),
            .DDR_BURST_WIDTH            (DDR_BURST_WIDTH),
            .LT_ARCH                    (LT_ARCH)
        )
        filter_reader
        (
            .clk_ddr                    (clk_ddr),
            .clk_dla                    (clk_dla),
            .i_sclrn_ddr                (ddr_sclrn),
            .i_resetn_async             (i_resetn_async),
            .i_config_valid             (i_config_filter_reader_valid),
            .i_config_data              (i_config_filter_reader_data),
            .i_config_for_intercept     (1'b0), //since this is not the config reader, all config data goes to the address generator
            .o_config_ready             (o_config_filter_reader_ready), // filter reader module is ready to receive config data from config network
            .i_token_can_start          (1'b0), //filter data is read only, there is no data dependency that would prevent the filter reader from starting
            .o_reader_valid             (o_filter_reader_valid),
            .o_reader_data              (o_filter_reader_data),
            .i_reader_ready             (i_filter_reader_ready),
            .o_ddr_arvalid              (lsu_ddr_arvalid[FILTER_READER_ID]),
            .o_ddr_araddr               (lsu_ddr_araddr [FILTER_READER_ID]),
            .o_ddr_arlen                (lsu_ddr_arlen  [FILTER_READER_ID]),
            .i_ddr_arready              (lsu_ddr_arready[FILTER_READER_ID]),
            .i_ddr_rvalid               (lsu_ddr_rvalid [FILTER_READER_ID]),
            .i_ddr_rdata                (lsu_ddr_rdata  [FILTER_READER_ID]),
            .o_ddr_rready               (lsu_ddr_rready [FILTER_READER_ID])
        );
    end else begin
        // Indicate filter_reader is ready to receive data, but we don't care.
        assign lsu_ddr_rready [FILTER_READER_ID] = 1'b0;
        // we don't care if read addr to arbitar is valid or not
        assign lsu_ddr_arvalid[FILTER_READER_ID] = 1'b0;
        // Don't care. Sequencer ignores this
        assign o_filter_reader_valid = 1'b0;
        // Indicate filter reader is ready to receive configs,
        // so that config network fifo pops filter reader configs out.
        // We don't really use them because we don't have any filter_reader
        assign o_config_filter_reader_ready = 1'b1;
    end


    //////////////////////
    //  Feature reader  //
    //////////////////////

    dla_dma_reader #(
        .READER_WRITER_SEL          (FEATURE_READER_ID),
        .IS_CONFIG_READER           (0),
        .DO_LAYOUT_TRANSFORM        (LT_ARCH.ENABLE_LT & ~ENABLE_INPUT_STREAMING),
        .NUM_DIMENSIONS             (FEATURE_READER_NUM_DIMENSIONS),
        .CONFIG_DATA_BYTES          (CONFIG_DATA_BYTES),
        .READER_DATA_BYTES          (FEATURE_READER_DATA_BYTES),
        .DDR_ADDR_WIDTH             (DDR_ADDR_WIDTH),
        .DDR_DATA_BYTES             (DDR_DATA_BYTES),
        .DDR_BURST_WIDTH            (DDR_BURST_WIDTH),
        .LT_ARCH                    (LT_ARCH)
    )
    feature_reader
    (
        .clk_ddr                    (clk_ddr),
        .clk_dla                    (clk_dla),
        .i_sclrn_ddr                (ddr_sclrn),
        .i_resetn_async             (i_resetn_async),
        .i_config_valid             (i_config_feature_reader_valid),
        .i_config_data              (i_config_feature_reader_data),
        .i_config_for_intercept     (1'b0), //since this is not the config reader, all config data goes to the address generator
        .o_config_ready             (o_config_feature_reader_ready),
        .i_config_lt_valid          (i_config_lt_reader_valid),
        .i_config_lt_data           (i_config_lt_reader_data),
        .o_config_lt_ready          (o_config_lt_reader_ready),
        .i_token_can_start          (token_done_reader),
        .o_reader_valid             (o_feature_reader_valid),
        .o_reader_data              (o_feature_reader_data),
        .i_reader_ready             (i_feature_reader_ready),
        .o_ddr_arvalid              (lsu_ddr_arvalid[FEATURE_READER_ID]),
        .o_ddr_araddr               (lsu_ddr_araddr [FEATURE_READER_ID]),
        .o_ddr_arlen                (lsu_ddr_arlen  [FEATURE_READER_ID]),
        .i_ddr_arready              (lsu_ddr_arready[FEATURE_READER_ID]),
        .i_ddr_rvalid               (lsu_ddr_rvalid [FEATURE_READER_ID]),
        .i_ddr_rdata                (lsu_ddr_rdata  [FEATURE_READER_ID]),
        .o_ddr_rready               (lsu_ddr_rready [FEATURE_READER_ID]),
        .o_param_error              (lt_param_error)
    );



    ///////////////////////////////////////////////////////////////////////////
    //  Arbitrate read requests and steer read data for all DLA DMA readers  //
    ///////////////////////////////////////////////////////////////////////////

    //note there is another DDR arbiter between PCIe and DLA that lives outside of DLA

    dla_dma_read_arb #(
        .NUM_PORTS                  (NUM_READERS),
        .DDR_ADDR_WIDTH             (DDR_ADDR_WIDTH),
        .DDR_BURST_WIDTH            (DDR_BURST_WIDTH),
        .DDR_DATA_BYTES             (DDR_DATA_BYTES)
    )
    read_arb
    (
        .clk                        (clk_ddr),
        .i_sclrn                    (ddr_sclrn),

        // Read requests from config, filter, and feature readers
        .i_lsu_arvalid              (lsu_ddr_arvalid),
        .i_lsu_araddr               (lsu_ddr_araddr),
        .i_lsu_arlen                (lsu_ddr_arlen),
        .o_lsu_arready              (lsu_ddr_arready),

        // Read address to the external world; tagged with an arid
        .o_ddr_arvalid              (rawp_ddr_arvalid),
        .o_ddr_araddr               (o_ddr_araddr),
        .o_ddr_arlen                (raw_ddr_arlen),
        .o_ddr_arid                 (o_ddr_arid),
        .i_ddr_arready              (rawf_ddr_arready),

        // Read data from external world
        .i_ddr_rvalid               (i_ddr_rvalid),
        .i_ddr_rdata                (i_ddr_rdata),
        .i_ddr_rid                  (i_ddr_rid),
        .o_ddr_rready               (o_ddr_rready),

        // Read data config, filter, and feature readers
        .o_lsu_rvalid               (lsu_ddr_rvalid),
        .o_lsu_rdata                (lsu_ddr_rdata),
        .i_lsu_rready               (lsu_ddr_rready)
    );

    // Prioritize writes over reads; believed to improve performance.
    //
    // We do a couple things here:
    // 1) If the prev cycle was an unacknowledged read (arvalid HIGH *and* arready LOW), then
    //    we must keep the read request outstanding.  In this case, block any writes (by
    //    forcing awvalid, wvalid, awready, and wready to LOW).
    // 2) Otherwise, if there is a write request, then block a read request if a new request
    //    has been made (do this by forcing arready and arvalid to LOW).  (Note that we
    //    do not touch rready or rvalid, since if data happens to arrive, then there is
    //    no reason not to accept it if we can).
    // 3) Lastly, allow a read request through (if any).
    //
    // rawp_ -- these are the internal signals prior to our forcing logic.
    // rawf_ -- these are the internal signals after our forcing logic.
    //
    // Where the forcing logic drives directly to the external world, we simply use the
    // external signal name.
    //

    // 1: Block writes, if necessary.
    //
    // If the previous cycle was a read (whether an acknowledged read or an unacknowledged read),
    // then neither o_ddr_awvalid nor o_ddr_wvalid were HIGH.
    // Therefore it is okay to keep them forced to LOW, regardless of rawp_ddr_awvalid/rawp_ddr_wvalid.
    assign o_ddr_awvalid = rawp_ddr_awvalid & ~dma_prevcycle_read_not_acknowledged;
    assign o_ddr_wvalid = rawp_ddr_wvalid & ~dma_prevcycle_read_not_acknowledged;
    assign rawf_ddr_awready = i_ddr_awready & ~dma_prevcycle_read_not_acknowledged;
    assign rawf_ddr_wready = i_ddr_wready & ~dma_prevcycle_read_not_acknowledged;

    // 2: Is a write going to over-ride today's read request?
    //
    // Note that if dma_prevcycle_read_not_acknowledged is HIGH, then both o_ddr_awvalid
    // and o_ddr_wvalid will already be forced LOW by the logic above, so
    // "write_overrides_read" can never happen if the previous cycle was an unacknowledged
    // read.
    assign write_overrides_read = o_ddr_awvalid | o_ddr_wvalid;

    assign o_ddr_arvalid = rawp_ddr_arvalid & ~write_overrides_read;
    assign rawf_ddr_arready = i_ddr_arready & ~write_overrides_read;

    //axi spec requires a signal width of 8 for burst length
    assign o_ddr_arlen = { {(AXI_BURST_LENGTH_WIDTH-DDR_BURST_WIDTH){1'h0}}, raw_ddr_arlen };

    //tie off constant axi signals
    assign o_ddr_arsize = $clog2(DDR_DATA_BYTES);   //burst size is always maximal, e.g. all bytes within a word should be transferred
    assign o_ddr_arburst = 2'h1;                    //burst type is incrementing, this value comes from the axi spec

    // If the previous cycle was an unacknowledged read, then we must continue to assert arvalid
    // until it is acknowledged by arready.  We are not allowed to randomly deassert arvalid.  Use
    // dma_prevcycle_read_not_acknowledged to track this condition.
    always_ff @(posedge clk_ddr) begin
        dma_prevcycle_read_not_acknowledged <= o_ddr_arvalid & ~i_ddr_arready;

        if (~ddr_sclrn) begin
            // If we are in reset, then o_ddr_arvalid will be LOW since the read_arb
            // is in reset as well (forcing rawp_ddr_arvalid to LOW).  The read_arb
            // shares our reset, namely ddr_sclrn.  This means that we can safely
            // reset to ~dma_prevcycle_read_not_acknowledged.
            //
            // We can make no assumption about i_ddr_arready while in reset.
            dma_prevcycle_read_not_acknowledged <= 1'b0;
        end
    end

    //////////////////////
    //  Feature writer  //
    //////////////////////

    dla_dma_writer #(
        .READER_WRITER_SEL          (FEATURE_WRITER_ID),
        .NUM_DIMENSIONS             (FEATURE_WRITER_NUM_DIMENSIONS),
        .CONFIG_DATA_BYTES          (CONFIG_DATA_BYTES),
        .WRITER_DATA_BYTES          (FEATURE_WRITER_DATA_BYTES),
        .DDR_ADDR_WIDTH             (DDR_ADDR_WIDTH),
        .DDR_DATA_BYTES             (DDR_DATA_BYTES),
        .DDR_BURST_WIDTH            (DDR_BURST_WIDTH),
        .DEVICE                     (DEVICE)
    )
    feature_writer
    (
        .clk_ddr                    (clk_ddr),
        .i_sclrn_ddr                (ddr_sclrn),
        .i_resetn_async             (i_resetn_async),
        .i_config_valid             (i_config_feature_writer_valid),
        .i_config_data              (i_config_feature_writer_data),
        .o_config_ready             (o_config_feature_writer_ready),
        .o_token_done_csr           (token_done_csr),
        .o_token_done_reader        (token_done_reader),
        .o_license_flag             (license_flag),
        .o_writer_err               (writer_error),
        .i_writer_valid             (i_feature_writer_valid),
        .i_writer_data              (i_feature_writer_data),
        .o_writer_ready             (o_feature_writer_ready),
        .o_ddr_awvalid              (rawp_ddr_awvalid),
        .o_ddr_awaddr               (o_ddr_awaddr),
        .o_ddr_awlen                (raw_ddr_awlen),
        .i_ddr_awready              (rawf_ddr_awready),
        .o_ddr_wvalid               (rawp_ddr_wvalid),
        .o_ddr_wdata                (o_ddr_wdata),
        .o_ddr_wstrb                (o_ddr_wstrb),
        .o_ddr_wlast                (o_ddr_wlast),
        .i_ddr_wready               (rawf_ddr_wready),
        .i_ddr_bvalid               (i_ddr_bvalid),
        .o_ddr_bready               (o_ddr_bready)
    );

    //axi spec requires a signal width of 8 for burst length
    assign o_ddr_awlen = { {(AXI_BURST_LENGTH_WIDTH-DDR_BURST_WIDTH){1'h0}}, raw_ddr_awlen };

    //tie off constant axi signals, use the same settings as read address channel
    assign o_ddr_awsize =  o_ddr_arsize;
    assign o_ddr_awburst = o_ddr_arburst;

endmodule