summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/streaming/ed0_streaming_example/system_console_script.tcl
blob: ab78d2ec14b0b45061f7191b2f4f0b7fe0a7b480 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
# This design example only supports an AXI Width of 128 bits = 16 bytes
variable AXI_STREAM_DATA_WIDTH_BYTES 16
# This design example has a limit to ingress on-chip memory size in bytes
variable INGRESS_ON_CHIP_MEMORY_SIZE_BYTES 524288
# This design example has a limit to egress on-chip memory size in bytes
variable EGRESS_ON_CHIP_MEMORY_SIZE_BYTES 131072

# DDR-Free ED Address Map Constants
variable DLA_IP_0_CSR_ADDR 0x00038000
variable INGRESS_SGDMA_CSR_ADDR 0x00030000
variable INGRESS_SGDMA_DESCRIPTOR_ADDR 0x00030020
variable EGRESS_SGDMA_CSR_ADDR 0x00030040
variable EGRESS_SGDMA_DESCRIPTOR_ADDR 0x00030060


# Process to validate arguments to script
proc validate_args {input_file num_inferences} {
  global INGRESS_ON_CHIP_MEMORY_SIZE_BYTES
  global AXI_STREAM_DATA_WIDTH_BYTES
  # Make sure user requested number of inferences is valid
  if {$num_inferences < 0} {
    puts "Number of inferences must be greater than 0."
    exit 1
  }

  # Check if the file exists 
  if {![file exists $input_file]} {
      puts "Error: The file '$input_file' does not exist."
      exit 1
  }

  # Get the size of the file in bytes
  set file_size [file size $input_file]

  # Make sure the input file can fit into on-chip memory
  if {$file_size > $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES} {
      puts "Input file '$input_file' is too large to fully fit into on-chip memory of size 
      $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES bytes. Input file will be partitioned and transferred partially.\n"
  }
  
  # Make sure the input file is aligned to the mSGDMA/FPGA AI Suite stream width
  if {[expr {$file_size % $AXI_STREAM_DATA_WIDTH_BYTES}] != 0} {
      puts "Error: this design example only supports input sizes aligned to 128 bits. Please pad accordingly."
      exit 1
  }

  # Format input file size into hex representation
  set file_size_hex [format "0x%X" $file_size]

  return $file_size
}


# Process to calculate # of AXI transfers that will be sent out of output streamer
# The output streamer will send out a number of AXI transfers based on the output shape
# H, W, C and AXI stream data width
proc calulate_egress_axi_transfers {C H W} {
  global EGRESS_ON_CHIP_MEMORY_SIZE_BYTES
  global AXI_STREAM_DATA_WIDTH_BYTES

  # Calculation for # of AXI transfers from output streamer
  # # of transfers in bytes = H * W * ceil(C/8)*16
  set output_streamer_transfers_bytes [expr {
    $H * $W * (int(($C + 7) / 8) * 16)
  }]

  # Make sure output streamer # of transfer bytes is aligned to AXI_STREAM_DATA_WIDTH
  if {$output_streamer_transfers_bytes <=0 || [expr {$output_streamer_transfers_bytes % $AXI_STREAM_DATA_WIDTH_BYTES}] != 0} {
    puts "Error with egress AXI transfer calculation. Please check your output shape size arguments (C H W)"
    exit 1
  }

  # Ensure output inference result can fit into on-chip memory
  if {$output_streamer_transfers_bytes > $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES} {
      puts "Output inference results is too large to fully fit into on-chip memory of size 
      $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES bytes. Output inference results will be partitioned and transferred partially.\n"
  }
  # Format input file size into hex representation
  set output_streamer_transfers_hex [format "0x%X" $output_streamer_transfers_bytes]
  puts "Expecting $output_streamer_transfers_hex bytes to be transferred by FPGA AI Suite output streamer"

  return $output_streamer_transfers_bytes
}


# Initiate reset via source/probe IP
proc assert_reset {} {
  set issp_index 0
  set issp [lindex [get_service_paths issp] 0]
  set claimed_issp [claim_service issp $issp mylib]
  set source_data 0x0
  issp_write_source_data $claimed_issp $source_data
  set source_data 0x1
  issp_write_source_data $claimed_issp $source_data
}


# Initializing coreDLA (register map: fpga/csr/rtl/inc/dla_csr_constants.svh)
proc initialize_coredla {master_path} {
  global DLA_IP_0_CSR_ADDR
  global INGRESS_SGDMA_CSR_ADDR
  global EGRESS_SGDMA_CSR_ADDR

  set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x220}]
  master_write_32 $master_path $csr_register_addr 0

  set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x204}]
  master_write_32 $master_path $csr_register_addr 0

  set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x200}]
  master_write_32 $master_path $csr_register_addr 3

  # Writing 0x1 to this register will instruct DLA to accept input until register is cleared
  set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x22c}]
  master_write_32 $master_path $csr_register_addr 1

  # Reset egress SGDMA
  set csr_register_addr [expr {$EGRESS_SGDMA_CSR_ADDR + 0x4}]
  master_write_32 $master_path $csr_register_addr 0x2

  # Reset ingress SGDMA
  set csr_register_addr [expr {$INGRESS_SGDMA_CSR_ADDR + 0x4}]
  master_write_32 $master_path $csr_register_addr 0x2
}


proc stage_input {input_file master_path} {
  # Initializing rom with input image
  master_write_from_file $master_path $input_file 0x00200000
}


# Adding descriptor to egress streaming mSGDMA
proc queue_egress_descriptor {master_path size} {
  global EGRESS_SGDMA_DESCRIPTOR_ADDR

  # Destination addr
  set csr_register_addr [expr {$EGRESS_SGDMA_DESCRIPTOR_ADDR + 0x4}]
  master_write_32 $master_path $csr_register_addr 0x00280000

  # Length should be 128 bit aligned
  set csr_register_addr [expr {$EGRESS_SGDMA_DESCRIPTOR_ADDR + 0x8}]
  master_write_32 $master_path $csr_register_addr $size

  # Queue descriptor (Writing 0x8000_0000)
  set csr_register_addr [expr {$EGRESS_SGDMA_DESCRIPTOR_ADDR + 0xc}]
  master_write_32 $master_path $csr_register_addr 0x80000000
}


# Adding descriptor to ingress streaming mSGDMA
proc queue_ingress_descriptor {master_path size} {
  global INGRESS_SGDMA_DESCRIPTOR_ADDR

  # Source addr
  master_write_32 $master_path $INGRESS_SGDMA_DESCRIPTOR_ADDR 0x00200000

  # Transfer length in bytes (input size)
  set csr_register_addr [expr {$INGRESS_SGDMA_DESCRIPTOR_ADDR + 0x8}]
  master_write_32 $master_path $csr_register_addr $size

  # Queue descriptor
  set csr_register_addr [expr {$INGRESS_SGDMA_DESCRIPTOR_ADDR + 0xc}]
  master_write_32 $master_path $csr_register_addr 0x80000000
}


# Read output from on-chip memory
proc read_output {master_path output_file size} {
  master_read_to_file $master_path $output_file 0x00280000 $size
}


# Read output from on-chip memory
proc check_inference_count {master_path iteration} {
  global DLA_IP_0_CSR_ADDR
  # Completion counter assert from index
  set completion_counter_assert 0x00000000
  set completion_counter_assert [expr {$completion_counter_assert + $iteration}]
  set formatted_counter_assert [format "0x%08X" $completion_counter_assert]

  # Check what completion counter CSR in HW is set to
  set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x224}]
  set completion_counter_result [master_read_32 $master_path $csr_register_addr 1]
  puts "Completion counter from HW: $completion_counter_result"
  if {$completion_counter_result != $formatted_counter_assert} {
    error "Error: completion counter should be equal to $formatted_counter_assert but instead is $completion_counter_result"
  }
}


# This process handles creating a binary file from input partition data 
proc create_input_bin {partition_data index} {
  set temp_file "chunk_$index.bin"
  set temp_fh [open $temp_file "wb"]
  fconfigure $temp_fh -translation binary
  puts -nonewline $temp_fh $partition_data
  close $temp_fh
  return $temp_file
}


# This process polls a register and returns if assertion is true within a timeout window  
proc poll_register {master_path register_addr register_val_assert} {
  # Set timeout to be 30 seconds (in centi-seconds)
  set timeout_count 3000
  while {$timeout_count > 0} {
    set register_val [master_read_32 $master_path $register_addr 1]
    if {$register_val == $register_val_assert} {
      break
    }
    set timeout_count [expr {$timeout_count - 1}]
    after 10
  }
  if {$timeout_count == 0} {
    puts "Register polling timeout. CSR addr: $register_addr = $register_val \nRegister should be = $register_val_assert"
    exit 1
  }
}


# Printing usage process
proc print_usage {} {
  puts "Usage: system-console --script system_console_script.tcl <input.bin file> <# of inferences> 
  <output channels> <output height> <output width>"
  exit 1
}


# Main Function
proc main {argc argv} {
  global INGRESS_ON_CHIP_MEMORY_SIZE_BYTES
  global EGRESS_ON_CHIP_MEMORY_SIZE_BYTES
  global AXI_STREAM_DATA_WIDTH_BYTES
  global INGRESS_SGDMA_DESCRIPTOR_ADDR
  global EGRESS_SGDMA_DESCRIPTOR_ADDR
  global INGRESS_SGDMA_CSR_ADDR
  global EGRESS_SGDMA_CSR_ADDR

  # Check if the script should display help information
  if {$argc > 0} {
      set firstArg [lindex $argv 0]
      if {[string equal $firstArg "help"] || [string equal $firstArg "--help"] || [string equal $firstArg "-help"]} {
          print_usage
      }
  }

  # Check the total number of arguments
  if {$argc != 5} {
      print_usage
  }

  # Setting script arguments to variables
  set input_file [lindex $argv 0]
  set num_inferences [lindex $argv 1]
  set C [lindex $argv 2]
  set H [lindex $argv 3]
  set W [lindex $argv 4]

  # Validating script arguments. Return input file size in bytes
  set file_size [validate_args $input_file $num_inferences]
  set file_size_hex [format "0x%X" $file_size]

  # Calculate # of AXI transfers from FPGA AI Suite IP output streamer in bytes
  set output_streamer_transfers [calulate_egress_axi_transfers $C $H $W]

  puts "\nInput file provided: $input_file and is of size $file_size_hex bytes"
  puts "Number of inferences: $num_inferences"

  # Claim service path to System Console
  set mpath [lindex [get_service_paths master] 0]
  set master_path [claim_service master $mpath ""]

  puts "\n________________________________________________________________________________"
  puts "                    STARTING FPGA AI SUITE INFERENCE                            "
  puts "________________________________________________________________________________\n"

  # Assert resetn using source/probe IP
  assert_reset
  # Initialize coreDLA's CSR registers
  initialize_coredla $master_path

  # Open the input binary file for reading
 
  for {set i 1} {$i <= $num_inferences} {incr i} {
    # Open input file per iteration due to the potential partioning in the case where input file > INGRESS_ON_CHIP_MEMORY_SIZE_BYTES.
    set input_fh [open $input_file "rb"]
    fconfigure $input_fh -translation binary

    # Create an output file every iteration of inferences
    set combined_fh [open "output$i.bin" "wb"]
    fconfigure $combined_fh -translation binary

    # Logic to ensure input image can fully fit into ingress on-chip memory
    # If not, must partition input data into chunks at a time. This allows us to queue
    # descriptors for partial input sizes. 
    set num_input_partition [expr {int(($file_size + $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES - 1) / $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES)}]
    for {set j 0} {$j < $num_input_partition} {incr j} {
      set offset [expr {$j * $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES}]
      set size [
        expr {($file_size - $offset) < $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES ? ($file_size - $offset) : $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES}
      ]

      # Seek to the offset and read the chunk
      # Need to catch an error if offset > file size
      if {[catch {seek $input_fh $offset} err]} {
        puts "Error seeking to offset $offset: $err"
        close $input_fh
        exit 1
      }

      # Begin partioning the input data to INGRESS_ON_CHIP_MEMORY_SIZE_BYTES chunks 
      set partition_data [read $input_fh $size]
      set partition_data_file_name [create_input_bin $partition_data $j]
      stage_input $partition_data_file_name $master_path
      queue_ingress_descriptor $master_path $size
      file delete $partition_data_file_name

      # Poll SGDMA register to check if input data streaming is complete
      set sgdma_csr_assert 0x00000002
      poll_register $master_path $INGRESS_SGDMA_CSR_ADDR $sgdma_csr_assert
    }

    close $input_fh

    # Logic to ensure output inference results can fully fit into egress on-chip memory
    # If not, must partition output data into chunks at a time. This allows us to queue
    # descriptors for partial output sizes. 
    set num_output_partition [expr {int(($output_streamer_transfers + $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES - 1) / $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES)}]
    for {set j 0} {$j < $num_output_partition} {incr j} {
      set offset [expr {$j * $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES}]
      set size [
        expr {($output_streamer_transfers - $offset) < $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES ? ($output_streamer_transfers - $offset) : $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES}
      ]
      # Queue chunks of EGRESS_ON_CHIP_MEMORY_SIZE_BYTES at a time to ensure a fit in egress on-chip memory
      queue_egress_descriptor $master_path $size
      
      # Poll SGDMA register to check if output data streaming is complete
      set sgdma_csr_assert 0x00000002
      poll_register $master_path $EGRESS_SGDMA_CSR_ADDR $sgdma_csr_assert

      # Write a partition of the inference result to the partition file
      set output_file "partition_out_$j.bin"
      read_output $master_path $output_file $size

      # Open partioned output inference result
      set bin_fh [open $output_file "rb"]
      fconfigure $bin_fh -translation binary
      set bin_data [read $bin_fh]

      # Append smaller partition of inference result to larger output$i.bin file for inference iteration
      puts -nonewline $combined_fh $bin_data
      close $bin_fh
      file delete $output_file
    }
    # Ensure inference count has gone up
    check_inference_count $master_path $i
    close $combined_fh
  }

  puts "\n$num_inferences inferences successfully completed"
}

# Main function call
main $argc $argv