summaryrefslogtreecommitdiff
path: root/python/openvino/runtime/streaming/ed0_streaming_example
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/runtime/streaming/ed0_streaming_example')
-rw-r--r--python/openvino/runtime/streaming/ed0_streaming_example/README.md14
-rw-r--r--python/openvino/runtime/streaming/ed0_streaming_example/system_console_script.tcl365
-rw-r--r--python/openvino/runtime/streaming/ed0_streaming_example/system_console_script_perf.tcl190
3 files changed, 569 insertions, 0 deletions
diff --git a/python/openvino/runtime/streaming/ed0_streaming_example/README.md b/python/openvino/runtime/streaming/ed0_streaming_example/README.md
new file mode 100644
index 0000000..1cc241a
--- /dev/null
+++ b/python/openvino/runtime/streaming/ed0_streaming_example/README.md
@@ -0,0 +1,14 @@
+This directory contains an example system-console tcl script for the hostless
+streaming example design on the Agilex 7 I-series Development Kit.
+
+The system-console tcl script does the following:
+ 1. Initialize path to JTAG Avalon Master IP
+ 2. Initiates a reset via sources IP
+ 3. Writes to coreDLA's CSR registers to prime for inference
+ 4. Streams input data (img.bin) into on-chip memory via JTAG
+ 5. Writes a descriptor into egress DMA (coreDLA -> on-chip memory)
+ 6. Writes a descriptor into ingress DMA - beginning streaming process
+ from on-chip memory to DLA
+ 7. Streams output from onchip memory to output.bin via JTAG
+
+This tcl script serves as an example for a specific CNN model. To understand how this "runtime" script can be extended to support your graph, please consult the Getting Started Guide.
diff --git a/python/openvino/runtime/streaming/ed0_streaming_example/system_console_script.tcl b/python/openvino/runtime/streaming/ed0_streaming_example/system_console_script.tcl
new file mode 100644
index 0000000..ab78d2e
--- /dev/null
+++ b/python/openvino/runtime/streaming/ed0_streaming_example/system_console_script.tcl
@@ -0,0 +1,365 @@
+# This design example only supports an AXI Width of 128 bits = 16 bytes
+variable AXI_STREAM_DATA_WIDTH_BYTES 16
+# This design example has a limit to ingress on-chip memory size in bytes
+variable INGRESS_ON_CHIP_MEMORY_SIZE_BYTES 524288
+# This design example has a limit to egress on-chip memory size in bytes
+variable EGRESS_ON_CHIP_MEMORY_SIZE_BYTES 131072
+
+# DDR-Free ED Address Map Constants
+variable DLA_IP_0_CSR_ADDR 0x00038000
+variable INGRESS_SGDMA_CSR_ADDR 0x00030000
+variable INGRESS_SGDMA_DESCRIPTOR_ADDR 0x00030020
+variable EGRESS_SGDMA_CSR_ADDR 0x00030040
+variable EGRESS_SGDMA_DESCRIPTOR_ADDR 0x00030060
+
+
+# Process to validate arguments to script
+proc validate_args {input_file num_inferences} {
+ global INGRESS_ON_CHIP_MEMORY_SIZE_BYTES
+ global AXI_STREAM_DATA_WIDTH_BYTES
+ # Make sure user requested number of inferences is valid
+ if {$num_inferences < 0} {
+ puts "Number of inferences must be greater than 0."
+ exit 1
+ }
+
+ # Check if the file exists
+ if {![file exists $input_file]} {
+ puts "Error: The file '$input_file' does not exist."
+ exit 1
+ }
+
+ # Get the size of the file in bytes
+ set file_size [file size $input_file]
+
+ # Make sure the input file can fit into on-chip memory
+ if {$file_size > $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES} {
+ puts "Input file '$input_file' is too large to fully fit into on-chip memory of size
+ $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES bytes. Input file will be partitioned and transferred partially.\n"
+ }
+
+ # Make sure the input file is aligned to the mSGDMA/FPGA AI Suite stream width
+ if {[expr {$file_size % $AXI_STREAM_DATA_WIDTH_BYTES}] != 0} {
+ puts "Error: this design example only supports input sizes aligned to 128 bits. Please pad accordingly."
+ exit 1
+ }
+
+ # Format input file size into hex representation
+ set file_size_hex [format "0x%X" $file_size]
+
+ return $file_size
+}
+
+
+# Process to calculate # of AXI transfers that will be sent out of output streamer
+# The output streamer will send out a number of AXI transfers based on the output shape
+# H, W, C and AXI stream data width
+proc calulate_egress_axi_transfers {C H W} {
+ global EGRESS_ON_CHIP_MEMORY_SIZE_BYTES
+ global AXI_STREAM_DATA_WIDTH_BYTES
+
+ # Calculation for # of AXI transfers from output streamer
+ # # of transfers in bytes = H * W * ceil(C/8)*16
+ set output_streamer_transfers_bytes [expr {
+ $H * $W * (int(($C + 7) / 8) * 16)
+ }]
+
+ # Make sure output streamer # of transfer bytes is aligned to AXI_STREAM_DATA_WIDTH
+ if {$output_streamer_transfers_bytes <=0 || [expr {$output_streamer_transfers_bytes % $AXI_STREAM_DATA_WIDTH_BYTES}] != 0} {
+ puts "Error with egress AXI transfer calculation. Please check your output shape size arguments (C H W)"
+ exit 1
+ }
+
+ # Ensure output inference result can fit into on-chip memory
+ if {$output_streamer_transfers_bytes > $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES} {
+ puts "Output inference results is too large to fully fit into on-chip memory of size
+ $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES bytes. Output inference results will be partitioned and transferred partially.\n"
+ }
+ # Format input file size into hex representation
+ set output_streamer_transfers_hex [format "0x%X" $output_streamer_transfers_bytes]
+ puts "Expecting $output_streamer_transfers_hex bytes to be transferred by FPGA AI Suite output streamer"
+
+ return $output_streamer_transfers_bytes
+}
+
+
+# Initiate reset via source/probe IP
+proc assert_reset {} {
+ set issp_index 0
+ set issp [lindex [get_service_paths issp] 0]
+ set claimed_issp [claim_service issp $issp mylib]
+ set source_data 0x0
+ issp_write_source_data $claimed_issp $source_data
+ set source_data 0x1
+ issp_write_source_data $claimed_issp $source_data
+}
+
+
+# Initializing coreDLA (register map: fpga/csr/rtl/inc/dla_csr_constants.svh)
+proc initialize_coredla {master_path} {
+ global DLA_IP_0_CSR_ADDR
+ global INGRESS_SGDMA_CSR_ADDR
+ global EGRESS_SGDMA_CSR_ADDR
+
+ set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x220}]
+ master_write_32 $master_path $csr_register_addr 0
+
+ set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x204}]
+ master_write_32 $master_path $csr_register_addr 0
+
+ set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x200}]
+ master_write_32 $master_path $csr_register_addr 3
+
+ # Writing 0x1 to this register will instruct DLA to accept input until register is cleared
+ set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x22c}]
+ master_write_32 $master_path $csr_register_addr 1
+
+ # Reset egress SGDMA
+ set csr_register_addr [expr {$EGRESS_SGDMA_CSR_ADDR + 0x4}]
+ master_write_32 $master_path $csr_register_addr 0x2
+
+ # Reset ingress SGDMA
+ set csr_register_addr [expr {$INGRESS_SGDMA_CSR_ADDR + 0x4}]
+ master_write_32 $master_path $csr_register_addr 0x2
+}
+
+
+proc stage_input {input_file master_path} {
+ # Initializing rom with input image
+ master_write_from_file $master_path $input_file 0x00200000
+}
+
+
+# Adding descriptor to egress streaming mSGDMA
+proc queue_egress_descriptor {master_path size} {
+ global EGRESS_SGDMA_DESCRIPTOR_ADDR
+
+ # Destination addr
+ set csr_register_addr [expr {$EGRESS_SGDMA_DESCRIPTOR_ADDR + 0x4}]
+ master_write_32 $master_path $csr_register_addr 0x00280000
+
+ # Length should be 128 bit aligned
+ set csr_register_addr [expr {$EGRESS_SGDMA_DESCRIPTOR_ADDR + 0x8}]
+ master_write_32 $master_path $csr_register_addr $size
+
+ # Queue descriptor (Writing 0x8000_0000)
+ set csr_register_addr [expr {$EGRESS_SGDMA_DESCRIPTOR_ADDR + 0xc}]
+ master_write_32 $master_path $csr_register_addr 0x80000000
+}
+
+
+# Adding descriptor to ingress streaming mSGDMA
+proc queue_ingress_descriptor {master_path size} {
+ global INGRESS_SGDMA_DESCRIPTOR_ADDR
+
+ # Source addr
+ master_write_32 $master_path $INGRESS_SGDMA_DESCRIPTOR_ADDR 0x00200000
+
+ # Transfer length in bytes (input size)
+ set csr_register_addr [expr {$INGRESS_SGDMA_DESCRIPTOR_ADDR + 0x8}]
+ master_write_32 $master_path $csr_register_addr $size
+
+ # Queue descriptor
+ set csr_register_addr [expr {$INGRESS_SGDMA_DESCRIPTOR_ADDR + 0xc}]
+ master_write_32 $master_path $csr_register_addr 0x80000000
+}
+
+
+# Read output from on-chip memory
+proc read_output {master_path output_file size} {
+ master_read_to_file $master_path $output_file 0x00280000 $size
+}
+
+
+# Read output from on-chip memory
+proc check_inference_count {master_path iteration} {
+ global DLA_IP_0_CSR_ADDR
+ # Completion counter assert from index
+ set completion_counter_assert 0x00000000
+ set completion_counter_assert [expr {$completion_counter_assert + $iteration}]
+ set formatted_counter_assert [format "0x%08X" $completion_counter_assert]
+
+ # Check what completion counter CSR in HW is set to
+ set csr_register_addr [expr {$DLA_IP_0_CSR_ADDR + 0x224}]
+ set completion_counter_result [master_read_32 $master_path $csr_register_addr 1]
+ puts "Completion counter from HW: $completion_counter_result"
+ if {$completion_counter_result != $formatted_counter_assert} {
+ error "Error: completion counter should be equal to $formatted_counter_assert but instead is $completion_counter_result"
+ }
+}
+
+
+# This process handles creating a binary file from input partition data
+proc create_input_bin {partition_data index} {
+ set temp_file "chunk_$index.bin"
+ set temp_fh [open $temp_file "wb"]
+ fconfigure $temp_fh -translation binary
+ puts -nonewline $temp_fh $partition_data
+ close $temp_fh
+ return $temp_file
+}
+
+
+# This process polls a register and returns if assertion is true within a timeout window
+proc poll_register {master_path register_addr register_val_assert} {
+ # Set timeout to be 30 seconds (in centi-seconds)
+ set timeout_count 3000
+ while {$timeout_count > 0} {
+ set register_val [master_read_32 $master_path $register_addr 1]
+ if {$register_val == $register_val_assert} {
+ break
+ }
+ set timeout_count [expr {$timeout_count - 1}]
+ after 10
+ }
+ if {$timeout_count == 0} {
+ puts "Register polling timeout. CSR addr: $register_addr = $register_val \nRegister should be = $register_val_assert"
+ exit 1
+ }
+}
+
+
+# Printing usage process
+proc print_usage {} {
+ puts "Usage: system-console --script system_console_script.tcl <input.bin file> <# of inferences>
+ <output channels> <output height> <output width>"
+ exit 1
+}
+
+
+# Main Function
+proc main {argc argv} {
+ global INGRESS_ON_CHIP_MEMORY_SIZE_BYTES
+ global EGRESS_ON_CHIP_MEMORY_SIZE_BYTES
+ global AXI_STREAM_DATA_WIDTH_BYTES
+ global INGRESS_SGDMA_DESCRIPTOR_ADDR
+ global EGRESS_SGDMA_DESCRIPTOR_ADDR
+ global INGRESS_SGDMA_CSR_ADDR
+ global EGRESS_SGDMA_CSR_ADDR
+
+ # Check if the script should display help information
+ if {$argc > 0} {
+ set firstArg [lindex $argv 0]
+ if {[string equal $firstArg "help"] || [string equal $firstArg "--help"] || [string equal $firstArg "-help"]} {
+ print_usage
+ }
+ }
+
+ # Check the total number of arguments
+ if {$argc != 5} {
+ print_usage
+ }
+
+ # Setting script arguments to variables
+ set input_file [lindex $argv 0]
+ set num_inferences [lindex $argv 1]
+ set C [lindex $argv 2]
+ set H [lindex $argv 3]
+ set W [lindex $argv 4]
+
+ # Validating script arguments. Return input file size in bytes
+ set file_size [validate_args $input_file $num_inferences]
+ set file_size_hex [format "0x%X" $file_size]
+
+ # Calculate # of AXI transfers from FPGA AI Suite IP output streamer in bytes
+ set output_streamer_transfers [calulate_egress_axi_transfers $C $H $W]
+
+ puts "\nInput file provided: $input_file and is of size $file_size_hex bytes"
+ puts "Number of inferences: $num_inferences"
+
+ # Claim service path to System Console
+ set mpath [lindex [get_service_paths master] 0]
+ set master_path [claim_service master $mpath ""]
+
+ puts "\n________________________________________________________________________________"
+ puts " STARTING FPGA AI SUITE INFERENCE "
+ puts "________________________________________________________________________________\n"
+
+ # Assert resetn using source/probe IP
+ assert_reset
+ # Initialize coreDLA's CSR registers
+ initialize_coredla $master_path
+
+ # Open the input binary file for reading
+
+ for {set i 1} {$i <= $num_inferences} {incr i} {
+ # Open input file per iteration due to the potential partioning in the case where input file > INGRESS_ON_CHIP_MEMORY_SIZE_BYTES.
+ set input_fh [open $input_file "rb"]
+ fconfigure $input_fh -translation binary
+
+ # Create an output file every iteration of inferences
+ set combined_fh [open "output$i.bin" "wb"]
+ fconfigure $combined_fh -translation binary
+
+ # Logic to ensure input image can fully fit into ingress on-chip memory
+ # If not, must partition input data into chunks at a time. This allows us to queue
+ # descriptors for partial input sizes.
+ set num_input_partition [expr {int(($file_size + $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES - 1) / $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES)}]
+ for {set j 0} {$j < $num_input_partition} {incr j} {
+ set offset [expr {$j * $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES}]
+ set size [
+ expr {($file_size - $offset) < $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES ? ($file_size - $offset) : $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES}
+ ]
+
+ # Seek to the offset and read the chunk
+ # Need to catch an error if offset > file size
+ if {[catch {seek $input_fh $offset} err]} {
+ puts "Error seeking to offset $offset: $err"
+ close $input_fh
+ exit 1
+ }
+
+ # Begin partioning the input data to INGRESS_ON_CHIP_MEMORY_SIZE_BYTES chunks
+ set partition_data [read $input_fh $size]
+ set partition_data_file_name [create_input_bin $partition_data $j]
+ stage_input $partition_data_file_name $master_path
+ queue_ingress_descriptor $master_path $size
+ file delete $partition_data_file_name
+
+ # Poll SGDMA register to check if input data streaming is complete
+ set sgdma_csr_assert 0x00000002
+ poll_register $master_path $INGRESS_SGDMA_CSR_ADDR $sgdma_csr_assert
+ }
+
+ close $input_fh
+
+ # Logic to ensure output inference results can fully fit into egress on-chip memory
+ # If not, must partition output data into chunks at a time. This allows us to queue
+ # descriptors for partial output sizes.
+ set num_output_partition [expr {int(($output_streamer_transfers + $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES - 1) / $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES)}]
+ for {set j 0} {$j < $num_output_partition} {incr j} {
+ set offset [expr {$j * $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES}]
+ set size [
+ expr {($output_streamer_transfers - $offset) < $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES ? ($output_streamer_transfers - $offset) : $EGRESS_ON_CHIP_MEMORY_SIZE_BYTES}
+ ]
+ # Queue chunks of EGRESS_ON_CHIP_MEMORY_SIZE_BYTES at a time to ensure a fit in egress on-chip memory
+ queue_egress_descriptor $master_path $size
+
+ # Poll SGDMA register to check if output data streaming is complete
+ set sgdma_csr_assert 0x00000002
+ poll_register $master_path $EGRESS_SGDMA_CSR_ADDR $sgdma_csr_assert
+
+ # Write a partition of the inference result to the partition file
+ set output_file "partition_out_$j.bin"
+ read_output $master_path $output_file $size
+
+ # Open partioned output inference result
+ set bin_fh [open $output_file "rb"]
+ fconfigure $bin_fh -translation binary
+ set bin_data [read $bin_fh]
+
+ # Append smaller partition of inference result to larger output$i.bin file for inference iteration
+ puts -nonewline $combined_fh $bin_data
+ close $bin_fh
+ file delete $output_file
+ }
+ # Ensure inference count has gone up
+ check_inference_count $master_path $i
+ close $combined_fh
+ }
+
+ puts "\n$num_inferences inferences successfully completed"
+}
+
+# Main function call
+main $argc $argv \ No newline at end of file
diff --git a/python/openvino/runtime/streaming/ed0_streaming_example/system_console_script_perf.tcl b/python/openvino/runtime/streaming/ed0_streaming_example/system_console_script_perf.tcl
new file mode 100644
index 0000000..f0cd5f7
--- /dev/null
+++ b/python/openvino/runtime/streaming/ed0_streaming_example/system_console_script_perf.tcl
@@ -0,0 +1,190 @@
+# Initiate reset via source/probe IP
+proc assert_reset {} {
+ set issp_index 0
+ set issp [lindex [get_service_paths issp] 0]
+ set claimed_issp [claim_service issp $issp mylib]
+ set source_data 0x0
+ issp_write_source_data $claimed_issp $source_data
+ set source_data 0x1
+ issp_write_source_data $claimed_issp $source_data
+}
+
+# Initializing coreDLA (register map: fpga/csr/rtl/inc/dla_csr_constants.svh)
+proc initialize_coredla {master_path} {
+ master_write_32 $master_path 0x00038220 0
+ master_write_32 $master_path 0x00038204 0
+ master_write_32 $master_path 0x00038200 3
+ # Writing 0x1 to this register will instruct DLA to accept input until register is cleared
+ master_write_32 $master_path 0x0003822c 1
+
+ # Reset egress descriptor
+ master_write_32 $master_path 0x00030044 0x2
+ # Stop the descriptor
+ master_write_32 $master_path 0x00030044 0x20
+
+ # Reset ingress descriptor
+ master_write_32 $master_path 0x00030004 0x2
+ # Stop the descriptor
+ master_write_32 $master_path 0x00030004 0x20
+}
+
+proc start_stream {master_path} {
+ # Start the egress descriptor
+ master_write_32 $master_path 0x00030044 0x00
+
+ # Start the ingress descriptor
+ master_write_32 $master_path 0x00030004 0x00
+}
+
+# This checks if the descriptor buffers are full
+proc check_descriptor_buffer_full {master_path} {
+ set egress_descriptor_status [master_read_32 $master_path 0x00030040 1]
+ set ingress_descriptor_status [master_read_32 $master_path 0x00030000 1]
+
+ if {$egress_descriptor_status & 0x4} {
+ error "Egress descriptor is full."
+ }
+ if {$ingress_descriptor_status & 0x4} {
+ error "Ingress descriptor is full."
+ }
+}
+
+proc stage_input {input_file master_path} {
+ # Initializing rom with input image
+ master_write_from_file $master_path $input_file 0x00200000
+}
+
+# Adding descriptor to egress streaming mSGDMA
+proc queue_egress_descriptor {master_path} {
+ # Destination addr
+ master_write_32 $master_path 0x00030064 0x00280000
+ # Length should be 128 bit aligned
+ master_write_32 $master_path 0x00030068 0xA800
+ # Queue descriptor
+ master_write_32 $master_path 0x0003006c 0x80000000
+}
+
+# Adding descriptor to ingress streaming mSGDMA
+proc queue_ingress_descriptor {master_path} {
+ # Source addr
+ master_write_32 $master_path 0x00030020 0x00200000
+ # Transfer length in bytes (input size)
+ master_write_32 $master_path 0x00030028 0x17A00
+ # Queue descriptor
+ master_write_32 $master_path 0x0003002c 0x80000000
+}
+
+# Copying input and output to file
+proc copy_input_for_validation {master_path} {
+ master_read_to_file $master_path input.bin 0x00200000 0x17A00
+}
+
+# Read inference counter values to get performance
+# There is an assumption here that the clk_ddr is attached to 100MHz
+proc get_performance {master_path num_inferences} {
+ set active_clk_lo [master_read_32 $master_path 0x00038240 1]
+ set active_clk_hi [master_read_32 $master_path 0x00038244 1]
+ set total_active_clk_count [expr { $active_clk_lo | ($active_clk_hi << 32) }]
+ set active_clk_count_per_inference [expr {$total_active_clk_count / $num_inferences}]
+ puts "Total active clk cycles: 0x$total_active_clk_count"
+
+ set all_active_clk_lo [master_read_32 $master_path 0x00038248 1]
+ set all_active_clk_hi [master_read_32 $master_path 0x0003824c 1]
+ set all_active_clk_count [expr { $all_active_clk_lo | ($all_active_clk_hi << 32) }]
+ set all_active_clk_count_per_inference [expr {$all_active_clk_count / $num_inferences}]
+
+ set core_active_clk_lo [master_read_32 $master_path 0x0003827c 1]
+ set core_active_clk_hi [master_read_32 $master_path 0x00038280 1]
+ set total_core_active_clk_count [expr { $core_active_clk_lo | ($core_active_clk_hi << 32) }]
+ set core_active_clk_count_per_inference [expr {$total_core_active_clk_count / $num_inferences}]
+ puts "Total core active clk cycles (without input and output streamer): 0x$total_core_active_clk_count"
+
+ set clk_period [expr { 1.0 / 100000000.0 }]
+ set final_fps [expr { 1 / ($clk_period * $active_clk_count_per_inference) }]
+ set final_latency [expr { 1 / ($clk_period * $all_active_clk_count_per_inference) }]
+
+ puts "--------------------------------------------------------"
+ puts "Final Throughput: $final_fps fps assuming 100MHz clk_ddr"
+}
+
+# Poll the completion counter until it reaches the expected number of inferences
+proc wait_for_completion_counter {master_path num_inferences} {
+ # Set timeout to be 30 seconds (in centi-seconds)
+ set timeout_count 3000
+ while {$timeout_count > 0} {
+ set completion_counter_result [master_read_32 $master_path 0x00038224 1]
+ if {$completion_counter_result == $num_inferences} {
+ break
+ }
+ set timeout_count [expr {$timeout_count - 1}]
+ after 10
+ }
+ if {$timeout_count == 0} {
+ error "Timeout hit at 30 seconds. Increase the timeout if the inference is expected to take longer."
+ }
+}
+
+# Read output from on-chip memory
+proc read_last_output {master_path num_inference} {
+ # Completion counter assert form index
+ set completion_counter_assert 0x00000000
+ set completion_counter_assert [expr {$completion_counter_assert + $num_inference}]
+ set formatted_counter_assert [format "0x%08X" $completion_counter_assert]
+
+ # Check what completion counter CSR in HW is set to
+ set completion_counter_result [master_read_32 $master_path 0x00038224 1]
+ puts "Completion counter from HW: $completion_counter_result"
+ if {$completion_counter_result == $formatted_counter_assert} {
+ master_read_to_file $master_path output0.bin 0x00280000 0xA800
+ } else {
+ error "Error: completion counter should be equal to $formatted_counter_assert but instead is $completion_counter_result"
+ }
+}
+# This design example has a limit to ingress on-chip memory size in bytes
+set INGRESS_ON_CHIP_MEMORY_SIZE_BYTES 524288
+
+# Main Function
+if {$argc != 1} {
+ error "Usage: system-console --script system_console_script_perf.tcl <input.bin file>"
+}
+set input_file [lindex $argv 0]
+puts "Input file provided: $input_file"
+
+set file_size [file size $input_file]
+
+# Make sure the input file can fit into on-chip memory
+if {$file_size > $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES} {
+ puts "Input file '$input_file' is too large to fully fit into on-chip memory of size
+ $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES bytes.\nThe `system_console_script.tcl` file will
+ partition the input file for partial transfers to solve this problem but it should not
+ be used for performance testing. Please increase the on-chip memory size for performance
+ testing.\n"
+ exit 1
+}
+
+set mpath [lindex [get_service_paths master] 0]
+set master_path [claim_service master $mpath ""]
+
+# Assert resetn using source/probe IP
+assert_reset
+# Stage input file into on-chip memory
+stage_input $input_file $master_path
+# Initialize coreDLA's CSR registers
+initialize_coredla $master_path
+
+# Number of inferences cannot exceed the descriptor queue FIFO size
+set num_inferences 32
+for {set i 1} {$i <= $num_inferences} {incr i} {
+ check_descriptor_buffer_full $master_path
+ # Queue egress descriptor into mSGDMA
+ queue_egress_descriptor $master_path
+ # Queue egress descriptor into mSGDMA
+ queue_ingress_descriptor $master_path
+}
+
+start_stream $master_path
+wait_for_completion_counter $master_path $num_inferences
+get_performance $master_path $num_inferences
+read_last_output $master_path $num_inferences
+
+puts "\n$num_inferences inferences successfully completed"