# Initiate reset via source/probe IP proc assert_reset {} { set issp_index 0 set issp [lindex [get_service_paths issp] 0] set claimed_issp [claim_service issp $issp mylib] set source_data 0x0 issp_write_source_data $claimed_issp $source_data set source_data 0x1 issp_write_source_data $claimed_issp $source_data } # Initializing coreDLA (register map: fpga/csr/rtl/inc/dla_csr_constants.svh) proc initialize_coredla {master_path} { master_write_32 $master_path 0x00038220 0 master_write_32 $master_path 0x00038204 0 master_write_32 $master_path 0x00038200 3 # Writing 0x1 to this register will instruct DLA to accept input until register is cleared master_write_32 $master_path 0x0003822c 1 # Reset egress descriptor master_write_32 $master_path 0x00030044 0x2 # Stop the descriptor master_write_32 $master_path 0x00030044 0x20 # Reset ingress descriptor master_write_32 $master_path 0x00030004 0x2 # Stop the descriptor master_write_32 $master_path 0x00030004 0x20 } proc start_stream {master_path} { # Start the egress descriptor master_write_32 $master_path 0x00030044 0x00 # Start the ingress descriptor master_write_32 $master_path 0x00030004 0x00 } # This checks if the descriptor buffers are full proc check_descriptor_buffer_full {master_path} { set egress_descriptor_status [master_read_32 $master_path 0x00030040 1] set ingress_descriptor_status [master_read_32 $master_path 0x00030000 1] if {$egress_descriptor_status & 0x4} { error "Egress descriptor is full." } if {$ingress_descriptor_status & 0x4} { error "Ingress descriptor is full." } } proc stage_input {input_file master_path} { # Initializing rom with input image master_write_from_file $master_path $input_file 0x00200000 } # Adding descriptor to egress streaming mSGDMA proc queue_egress_descriptor {master_path} { # Destination addr master_write_32 $master_path 0x00030064 0x00280000 # Length should be 128 bit aligned master_write_32 $master_path 0x00030068 0xA800 # Queue descriptor master_write_32 $master_path 0x0003006c 0x80000000 } # Adding descriptor to ingress streaming mSGDMA proc queue_ingress_descriptor {master_path} { # Source addr master_write_32 $master_path 0x00030020 0x00200000 # Transfer length in bytes (input size) master_write_32 $master_path 0x00030028 0x17A00 # Queue descriptor master_write_32 $master_path 0x0003002c 0x80000000 } # Copying input and output to file proc copy_input_for_validation {master_path} { master_read_to_file $master_path input.bin 0x00200000 0x17A00 } # Read inference counter values to get performance # There is an assumption here that the clk_ddr is attached to 100MHz proc get_performance {master_path num_inferences} { set active_clk_lo [master_read_32 $master_path 0x00038240 1] set active_clk_hi [master_read_32 $master_path 0x00038244 1] set total_active_clk_count [expr { $active_clk_lo | ($active_clk_hi << 32) }] set active_clk_count_per_inference [expr {$total_active_clk_count / $num_inferences}] puts "Total active clk cycles: 0x$total_active_clk_count" set all_active_clk_lo [master_read_32 $master_path 0x00038248 1] set all_active_clk_hi [master_read_32 $master_path 0x0003824c 1] set all_active_clk_count [expr { $all_active_clk_lo | ($all_active_clk_hi << 32) }] set all_active_clk_count_per_inference [expr {$all_active_clk_count / $num_inferences}] set core_active_clk_lo [master_read_32 $master_path 0x0003827c 1] set core_active_clk_hi [master_read_32 $master_path 0x00038280 1] set total_core_active_clk_count [expr { $core_active_clk_lo | ($core_active_clk_hi << 32) }] set core_active_clk_count_per_inference [expr {$total_core_active_clk_count / $num_inferences}] puts "Total core active clk cycles (without input and output streamer): 0x$total_core_active_clk_count" set clk_period [expr { 1.0 / 100000000.0 }] set final_fps [expr { 1 / ($clk_period * $active_clk_count_per_inference) }] set final_latency [expr { 1 / ($clk_period * $all_active_clk_count_per_inference) }] puts "--------------------------------------------------------" puts "Final Throughput: $final_fps fps assuming 100MHz clk_ddr" } # Poll the completion counter until it reaches the expected number of inferences proc wait_for_completion_counter {master_path num_inferences} { # Set timeout to be 30 seconds (in centi-seconds) set timeout_count 3000 while {$timeout_count > 0} { set completion_counter_result [master_read_32 $master_path 0x00038224 1] if {$completion_counter_result == $num_inferences} { break } set timeout_count [expr {$timeout_count - 1}] after 10 } if {$timeout_count == 0} { error "Timeout hit at 30 seconds. Increase the timeout if the inference is expected to take longer." } } # Read output from on-chip memory proc read_last_output {master_path num_inference} { # Completion counter assert form index set completion_counter_assert 0x00000000 set completion_counter_assert [expr {$completion_counter_assert + $num_inference}] set formatted_counter_assert [format "0x%08X" $completion_counter_assert] # Check what completion counter CSR in HW is set to set completion_counter_result [master_read_32 $master_path 0x00038224 1] puts "Completion counter from HW: $completion_counter_result" if {$completion_counter_result == $formatted_counter_assert} { master_read_to_file $master_path output0.bin 0x00280000 0xA800 } else { error "Error: completion counter should be equal to $formatted_counter_assert but instead is $completion_counter_result" } } # This design example has a limit to ingress on-chip memory size in bytes set INGRESS_ON_CHIP_MEMORY_SIZE_BYTES 524288 # Main Function if {$argc != 1} { error "Usage: system-console --script system_console_script_perf.tcl " } set input_file [lindex $argv 0] puts "Input file provided: $input_file" set file_size [file size $input_file] # Make sure the input file can fit into on-chip memory if {$file_size > $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES} { puts "Input file '$input_file' is too large to fully fit into on-chip memory of size $INGRESS_ON_CHIP_MEMORY_SIZE_BYTES bytes.\nThe `system_console_script.tcl` file will partition the input file for partial transfers to solve this problem but it should not be used for performance testing. Please increase the on-chip memory size for performance testing.\n" exit 1 } set mpath [lindex [get_service_paths master] 0] set master_path [claim_service master $mpath ""] # Assert resetn using source/probe IP assert_reset # Stage input file into on-chip memory stage_input $input_file $master_path # Initialize coreDLA's CSR registers initialize_coredla $master_path # Number of inferences cannot exceed the descriptor queue FIFO size set num_inferences 32 for {set i 1} {$i <= $num_inferences} {incr i} { check_descriptor_buffer_full $master_path # Queue egress descriptor into mSGDMA queue_egress_descriptor $master_path # Queue egress descriptor into mSGDMA queue_ingress_descriptor $master_path } start_stream $master_path wait_for_completion_counter $master_path $num_inferences get_performance $master_path $num_inferences read_last_output $master_path $num_inferences puts "\n$num_inferences inferences successfully completed"