diff options
| author | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
|---|---|---|
| committer | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
| commit | ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch) | |
| tree | a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv | |
| parent | 40da1752f2c8639186b72f6838aa415e854d0b1d (diff) | |
| download | thesis-master.tar.gz thesis-master.tar.bz2 thesis-master.zip | |
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv')
| -rw-r--r-- | python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv new file mode 100644 index 0000000..59ca3c2 --- /dev/null +++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv @@ -0,0 +1,381 @@ +// Copyright 2020-2023 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/* + * Module `dla_aux_depthwise_top` + * + * Top level of the aux block. + * + * WARNING! DO NOT EDIT THIS FILE FOR FUTURE-COMPATIBILITY + * + * This module is a template for all auxiliary blocks and the content should + * not be modified for future compatibility. + * + * Do not modify any of fields other than the module name and included package + * name. In case of a limitation or bug please contact owner of the Example + * Aux block. + * + * See README.md of the Example Aux block for more details. + */ + +`undefineall +`resetall +`default_nettype none + +module dla_aux_depthwise_top + import dla_aux_depthwise_pkg::*; +#( + parameter aux_data_pack_params_t AUX_DATA_PACK_PARAMS, + parameter stream_params_t CONFIG_STREAM_PARAMS, + parameter debug_axi_params_t DEBUG_AXI_PARAMS, + parameter aux_generic_params_t AUX_GENERIC_PARAMS, + parameter aux_special_params_t AUX_SPECIAL_PARAMS, + parameter vector_dot_arch_t AUX_DEPTHWISE_VECTOR_ARCH, + parameter vector_dot_arch_info_t DEPTHWISE_VECTOR_ARCH_INFO, + // + localparam stream_params_t DATA_STREAM_PARAMS = '{ // Data stream parameterization + DATA_WIDTH : aux_params_to_bus_width(AUX_DATA_PACK_PARAMS)}, + localparam int GROUP_DELAY = AUX_DATA_PACK_PARAMS.GROUP_DELAY, + localparam int GROUP_NUM = AUX_DATA_PACK_PARAMS.GROUP_NUM, + localparam int GROUP_SIZE = AUX_DATA_PACK_PARAMS.GROUP_SIZE, + localparam int VECTOR_SIZE = AUX_DATA_PACK_PARAMS.VECTOR_SIZE, + localparam int ELEMENT_BITS = AUX_DATA_PACK_PARAMS.ELEMENT_BITS +) ( + input wire clk , // Clock + input wire i_aresetn , // Active-low async reset + // + input var logic [GROUP_NUM -1:0] + [GROUP_SIZE -1:0] + [VECTOR_SIZE -1:0] + [ELEMENT_BITS-1:0] i_data , // Data input stream port + input var logic i_data_valid , // Data input stream port valid + output generic_response_t o_data , // Data input stream port response + // + input generic_response_t i_result , // Result output stream port response + output logic [GROUP_NUM -1:0] + [GROUP_SIZE -1:0] + [VECTOR_SIZE -1:0] + [ELEMENT_BITS-1:0] o_result , // Result output stream port + output logic o_result_valid , // Result output stream port valid + // + input var logic [CONFIG_STREAM_PARAMS.DATA_WIDTH-1:0] i_config , // Config stream port + input var logic i_config_valid , // Config stream port valid + output generic_response_t o_config , // Config stream port response + input var logic i_config_filter_bias_valid, // Config (actual data) for cache + input var logic [CONFIG_STREAM_PARAMS.DATA_WIDTH-1:0] i_config_filter_bias_data, + output var logic o_config_filter_bias_ready, + // + input var logic [DEBUG_AXI_PARAMS.ADDR_WIDTH-1:0] i_debug_raddr , // Debug AXI read-address port + input var logic i_debug_raddr_valid, // Debug AXI read-address port valid + output generic_response_t o_debug_raddr , // Debug AXI read-address port response + input generic_response_t i_debug_rdata , // Debug AXI read-data port response + output logic [DEBUG_AXI_PARAMS.DATA_WIDTH-1:0] o_debug_rdata , // Debug AXI read-data port + output logic o_debug_rdata_valid // Debug AXI read-data port valid +); + + localparam int GROUP_SHIFT = (GROUP_NUM-1)*GROUP_DELAY+1; + + // Parameter checking + initial /* synthesis enable_verilog_initial_construct */ + begin + // check AUX_DATA_PACK_PARAMS + if (AUX_DATA_PACK_PARAMS.ELEMENT_BITS > 32 || AUX_DATA_PACK_PARAMS.ELEMENT_BITS <= 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), ELEMENT_BITS = %0d, ", + "expected 0 < ELEMENT_BITS <= 32"}, AUX_DATA_PACK_PARAMS.ELEMENT_BITS); + end + if (AUX_DATA_PACK_PARAMS.VECTOR_SIZE <= 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), VECTOR_SIZE = %0d, ", + "expected VECTOR_SIZE > 0"}, AUX_DATA_PACK_PARAMS.VECTOR_SIZE); + end + if (AUX_DATA_PACK_PARAMS.NATIVE_VECTOR_SIZE <= 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), NATIVE_VECTOR_SIZE = %0d, ", + "expected NATIVE_VECTOR_SIZE > 0"}, AUX_DATA_PACK_PARAMS.NATIVE_VECTOR_SIZE); + end + if (AUX_DATA_PACK_PARAMS.GROUP_SIZE <= 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), GROUP_SIZE = %0d, ", + "expected GROUP_SIZE > 0"}, AUX_DATA_PACK_PARAMS.GROUP_SIZE); + end + if (AUX_DATA_PACK_PARAMS.GROUP_NUM <= 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), GROUP_NUM = %0d, ", + "expected GROUP_NUM > 0"}, AUX_DATA_PACK_PARAMS.GROUP_NUM); + end + if (AUX_DATA_PACK_PARAMS.GROUP_DELAY < 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), GROUP_DELAY = %0d, ", + "expected GROUP_DELAY >= 0"}, AUX_DATA_PACK_PARAMS.GROUP_DELAY); + end + + // check CONFIG_STREAM_PARAMS + if (CONFIG_STREAM_PARAMS.DATA_WIDTH <= 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), CONFIG_STREAM_PARAMS.DATA_WIDTH ", + "= %0d, expected CONFIG_STREAM_PARAMS.DATA_WIDTH > 0"}, CONFIG_STREAM_PARAMS.DATA_WIDTH); + end + + // check DEBUG_AXI_PARAMS + if (DEBUG_AXI_PARAMS.DATA_WIDTH <= 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), DEBUG_AXI_PARAMS.DATA_WIDTH = ", + "%0d, expected DEBUG_AXI_PARAMS.DATA_WIDTH > 0"}, DEBUG_AXI_PARAMS.DATA_WIDTH); + end + if (DEBUG_AXI_PARAMS.ADDR_WIDTH <= 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), DEBUG_AXI_PARAMS.ADDR_WIDTH = ", + "%0d, expected DEBUG_AXI_PARAMS.ADDR_WIDTH > 0"}, DEBUG_AXI_PARAMS.ADDR_WIDTH); + end + + // check AUX_GENERIC_PARAMS + if (AUX_GENERIC_PARAMS.INPUT_BUFFER_REG_STAGES < 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.", + "INPUT_BUFFER_REG_STAGES = %0d, expected AUX_GENERIC_PARAMS.INPUT_BUFFER_REG_STAGES >= 0"}, + AUX_GENERIC_PARAMS.INPUT_BUFFER_REG_STAGES); + end + if (AUX_GENERIC_PARAMS.COMMAND_BUFFER_DEPTH < 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.", + "COMMAND_BUFFER_DEPTH = %0d, expected AUX_GENERIC_PARAMS.COMMAND_BUFFER_DEPTH >= 0"}, + AUX_GENERIC_PARAMS.COMMAND_BUFFER_DEPTH); + end + if (AUX_GENERIC_PARAMS.PER_GROUP_CONTROL != 0 && + AUX_GENERIC_PARAMS.PER_GROUP_CONTROL != 1) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.", + "PER_GROUP_CONTROL = %0d, expected AUX_GENERIC_PARAMS.PER_GROUP_CONTROL = 0 or 1"}, + AUX_GENERIC_PARAMS.PER_GROUP_CONTROL); + end + if (AUX_GENERIC_PARAMS.DEBUG_LEVEL < 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.DEBUG_LEVEL ", + "= %0d, expected AUX_GENERIC_PARAMS.DEBUG_LEVEL >= 0"}, AUX_GENERIC_PARAMS.DEBUG_LEVEL); + end + if (AUX_GENERIC_PARAMS.DEBUG_ID < 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.DEBUG_ID = ", + "%0d, expected AUX_GENERIC_PARAMS.DEBUG_ID >= 0"}, AUX_GENERIC_PARAMS.DEBUG_ID); + end + if (AUX_GENERIC_PARAMS.DEBUG_EVENT_DEPTH < 0) begin + $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.", + "DEBUG_EVENT_DEPTH = %0d, expected AUX_GENERIC_PARAMS.DEBUG_EVENT_DEPTH >= 0"}, + AUX_GENERIC_PARAMS.DEBUG_EVENT_DEPTH); + end + + // Note: AUX_SPECIAL_PARAMS must be checked in relevant files (config_decoder, control and core) + end + + // Arch assignment + localparam aux_depthwise_arch_params_t ARCH = '{ + AUX_DATA_PACK_PARAMS : AUX_DATA_PACK_PARAMS, + CONFIG_STREAM_PARAMS : CONFIG_STREAM_PARAMS, + DEBUG_AXI_PARAMS : DEBUG_AXI_PARAMS, + AUX_GENERIC_PARAMS : AUX_GENERIC_PARAMS, + AUX_SPECIAL_PARAMS : AUX_SPECIAL_PARAMS + }; + + // Reset module constants + localparam RST_USE_SYNCHRONIZER = 1; // yes clock domain crossings + localparam RST_PIPE_DEPTH = 3; // stages of synchronization registers + localparam RST_NUM_COPIES = 3; // number of reset signals + + // synchronized reset + logic [RST_NUM_COPIES-1:0] sclrn; + + /*------------------------------------------------------------------------------ + -- Reset handler + ------------------------------------------------------------------------------*/ + dla_reset_handler_simple #( + .USE_SYNCHRONIZER(RST_USE_SYNCHRONIZER), + .PIPE_DEPTH (RST_PIPE_DEPTH ), + .NUM_COPIES (RST_NUM_COPIES ) + ) dla_reset_handler_simple_inst ( + .clk (clk ), + .i_resetn(i_aresetn), // active-low asyncronous reset input + .o_sclrn (sclrn ) // one or more copies of synchronized reset, 'dont_merge' constraints applied + ); + + // Group 0 only signals + control_to_config_t control_to_config; + + // Systolic Group signals + // config-control interface and related signals + depthwise_config_to_control_if #( + .special_params (AUX_SPECIAL_PARAMS ), + .data_pack_params(AUX_DATA_PACK_PARAMS) + ) config_to_control[GROUP_SHIFT](); + + // control-lane interface + depthwise_control_to_lane_if #( + .special_params (AUX_SPECIAL_PARAMS ), + .data_pack_params(AUX_DATA_PACK_PARAMS) + ) control_to_lane[GROUP_SHIFT](); + + // data and result handshaking signals + logic stream_data_valid[GROUP_SHIFT-1:0]; + generic_response_t stream_result_request [GROUP_SHIFT-1:0]; + + // synchronized reset + logic group_sreset_n [GROUP_SHIFT-1:0]; + + logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0][ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT*ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH-1:0][ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS-1:0] filter_data_buf; + logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0][AUX_DEPTHWISE_VECTOR_ARCH.BIAS_WIDTH-1:0] bias_data_buf; // fp32 bias + // debug connections + debug_config_t debug_from_config; + debug_group_t debug_from_group ; + + // Assign group delay input signals + assign group_sreset_n [0] = sclrn[0]; + assign stream_data_valid [0] = (i_data_valid & o_data.ready); + assign stream_result_request[0].ready = i_result.ready; + + // + // Group-delay chains of all group delayed signals + // + if (GROUP_DELAY > 0) begin : gen_non_zero_delay + for(genvar i = 1; i < GROUP_SHIFT; i++) begin : gen_group_delay + always_ff @(posedge clk) begin : proc_group_delay + group_sreset_n [i] <= group_sreset_n [i-1]; + stream_result_request[i] <= stream_result_request[i-1]; + stream_data_valid [i] <= stream_data_valid [i-1]; + control_to_lane [i].data[0][0] <= control_to_lane [i-1].data[0][0]; + config_to_control [i].data[0][0] <= config_to_control [i-1].data[0][0]; + end : proc_group_delay + end : gen_group_delay + end : gen_non_zero_delay + + // + // Config decoder + // + dla_aux_depthwise_config_decoder #( + .ARCH(ARCH) + ) dla_aux_depthwise_config_decoder_inst ( + .clk ( clk ), + .i_resetn ( sclrn [RST_NUM_COPIES-2] ), + .i_config ( i_config ), + .i_config_valid ( i_config_valid ), + .o_config ( o_config ), + .i_control_to_config ( control_to_config ), + .o_config_to_control ( config_to_control[0] ), + .o_debug ( debug_from_config ) + ); + + // + // Multiple groups (phases) are generated + // + // First group is responsible for interacting with Config Decoder and generating debug information + dla_aux_depthwise_group #( + .ID (0 ), + .ARCH(ARCH), + .AUX_DEPTHWISE_VECTOR_ARCH(AUX_DEPTHWISE_VECTOR_ARCH), + .DEPTHWISE_VECTOR_ARCH_INFO(DEPTHWISE_VECTOR_ARCH_INFO) + ) dla_aux_depthwise_group_inst ( + .clk ( clk ), + .i_resetn ( group_sreset_n [0] ), + .i_data ( i_data [0] ), + .i_data_valid ( stream_data_valid [0] ), + .o_data ( {>>{o_data}} ), + .i_result ( stream_result_request [0] ), + .o_result ( o_result [0] ), + .o_result_valid ( o_result_valid ), + .i_filter ( filter_data_buf ), + .i_bias ( bias_data_buf ), + .i_config_to_control ( config_to_control [0] ), + .o_control_to_config ( control_to_config ), + .i_control_to_lane ( control_to_lane [0] ), + .o_control_to_lane_next ( control_to_lane [0] ), + .i_config_filter_bias_valid ( i_config_filter_bias_valid ), + .i_config_filter_bias_data ( i_config_filter_bias_data ), + .o_config_filter_bias_ready ( o_config_filter_bias_ready ), + .o_debug ( debug_from_group ) + ); + // Other groups only consume decoded config + for (genvar i = 1; i < GROUP_NUM; i++) begin : gen_groups + // Must pass an interface to o_control_lane_next, so create a placeholder interface + depthwise_control_to_lane_if #(.special_params(AUX_SPECIAL_PARAMS), .data_pack_params(AUX_DATA_PACK_PARAMS)) output_control_to_lane(); + + dla_aux_depthwise_group #( + .ID (i ), + .ARCH(ARCH), + .AUX_DEPTHWISE_VECTOR_ARCH(AUX_DEPTHWISE_VECTOR_ARCH), + .DEPTHWISE_VECTOR_ARCH_INFO(DEPTHWISE_VECTOR_ARCH_INFO) + ) dla_aux_depthwise_group_inst ( + .clk ( clk ), + .i_resetn ( group_sreset_n [i * GROUP_DELAY] ), + .i_data ( i_data [i ] ), + .i_data_valid ( stream_data_valid [i * GROUP_DELAY] ), + .o_data ( ), + .i_result ( stream_result_request[i * GROUP_DELAY] ), + .o_result ( o_result [i ] ), + .o_result_valid ( ), + .i_filter ( filter_data_buf ), + .i_bias ( bias_data_buf ), + .i_config_to_control ( config_to_control [i * GROUP_DELAY] ), + .o_control_to_config ( ), + .i_control_to_lane ( control_to_lane [i * GROUP_DELAY] ), + .o_control_to_lane_next ( output_control_to_lane ), + .i_config_filter_bias_valid ( i_config_filter_bias_valid ), + .i_config_filter_bias_data ( i_config_filter_bias_data ), + .o_config_filter_bias_ready ( ), + .o_debug ( ) + ); + end : gen_groups + + // + // Filter bias cache + // + logic filter_cache_ready; + logic configured_and_ready; + initial begin + if (o_config_filter_bias_ready != (filter_cache_ready && configured_and_ready)) begin + $display("Double plumbed filters not matching ready %d %d %d", o_config_filter_bias_ready, filter_cache_ready, configured_and_ready); + end + end + //assign o_config_filter_bias_ready = filter_cache_ready && configured_and_ready; + assign configured_and_ready = control_to_lane[0].data[0][0].configured_starting != + control_to_lane[0].data[0][0].configured_ending; + + dla_aux_depthwise_filter_bias_cache #( + .MAX_WINDOW_HEIGHT (ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT ), + .MAX_WINDOW_WIDTH (ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH ), + .ELEMENT_BITS (ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS ), + .CONFIG_BIT_WIDTH (32 ), + .VECTOR_SIZE (ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE ), + .WINDOW_BITS_VERTICAL ($clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT + 1) ), + .WINDOW_BITS_HORIZONTAL ($clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH + 1) ), + .BIAS_WIDTH (AUX_DEPTHWISE_VECTOR_ARCH.BIAS_WIDTH ) + ) filter_bias_cache ( + .clk ( clk ), + .i_resetn ( group_sreset_n [0] ), + .i_data_valid ( i_config_filter_bias_valid ), + .i_data ( i_config_filter_bias_data ), + .o_config_filter_bias_ready ( filter_cache_ready ), + .i_configured_and_ready ( configured_and_ready ), + .i_done ( control_to_lane[GROUP_SHIFT-1].data[0][0].done ), // use the last lane's data + .i_window_width ( control_to_lane[0].data[0][0].window_width ), + .i_window_height ( control_to_lane[0].data[0][0].window_height ), + .o_filter_valid ( ), + .o_filter ( filter_data_buf ), + .o_bias ( bias_data_buf ) + ); + + // + // Debug + // + dla_aux_depthwise_debug #( + .ARCH(ARCH) + ) dla_aux_depthwise_debug_inst ( + .clk (clk ), + .i_resetn(sclrn[RST_NUM_COPIES-1] ), + // + .i_config(debug_from_config ), + .i_group (debug_from_group ), + // + .i_raddr (i_debug_raddr ), + .i_raddr_valid(i_debug_raddr_valid), + .o_raddr (o_debug_raddr ), + .i_rdata (i_debug_rdata ), + .o_rdata (o_debug_rdata ), + .o_rdata_valid(o_debug_rdata_valid) + ); + +endmodule |
