// Copyright 2020-2023 Intel Corporation. // // This software and the related documents are Intel copyrighted materials, // and your use of them is governed by the express license under which they // were provided to you ("License"). Unless the License provides otherwise, // you may not use, modify, copy, publish, distribute, disclose or transmit // this software or the related documents without Intel's prior written // permission. // // This software and the related documents are provided as is, with no express // or implied warranties, other than those that are expressly stated in the // License. /* * Module `dla_aux_depthwise_top` * * Top level of the aux block. * * WARNING! DO NOT EDIT THIS FILE FOR FUTURE-COMPATIBILITY * * This module is a template for all auxiliary blocks and the content should * not be modified for future compatibility. * * Do not modify any of fields other than the module name and included package * name. In case of a limitation or bug please contact owner of the Example * Aux block. * * See README.md of the Example Aux block for more details. */ `undefineall `resetall `default_nettype none module dla_aux_depthwise_top import dla_aux_depthwise_pkg::*; #( parameter aux_data_pack_params_t AUX_DATA_PACK_PARAMS, parameter stream_params_t CONFIG_STREAM_PARAMS, parameter debug_axi_params_t DEBUG_AXI_PARAMS, parameter aux_generic_params_t AUX_GENERIC_PARAMS, parameter aux_special_params_t AUX_SPECIAL_PARAMS, parameter vector_dot_arch_t AUX_DEPTHWISE_VECTOR_ARCH, parameter vector_dot_arch_info_t DEPTHWISE_VECTOR_ARCH_INFO, // localparam stream_params_t DATA_STREAM_PARAMS = '{ // Data stream parameterization DATA_WIDTH : aux_params_to_bus_width(AUX_DATA_PACK_PARAMS)}, localparam int GROUP_DELAY = AUX_DATA_PACK_PARAMS.GROUP_DELAY, localparam int GROUP_NUM = AUX_DATA_PACK_PARAMS.GROUP_NUM, localparam int GROUP_SIZE = AUX_DATA_PACK_PARAMS.GROUP_SIZE, localparam int VECTOR_SIZE = AUX_DATA_PACK_PARAMS.VECTOR_SIZE, localparam int ELEMENT_BITS = AUX_DATA_PACK_PARAMS.ELEMENT_BITS ) ( input wire clk , // Clock input wire i_aresetn , // Active-low async reset // input var logic [GROUP_NUM -1:0] [GROUP_SIZE -1:0] [VECTOR_SIZE -1:0] [ELEMENT_BITS-1:0] i_data , // Data input stream port input var logic i_data_valid , // Data input stream port valid output generic_response_t o_data , // Data input stream port response // input generic_response_t i_result , // Result output stream port response output logic [GROUP_NUM -1:0] [GROUP_SIZE -1:0] [VECTOR_SIZE -1:0] [ELEMENT_BITS-1:0] o_result , // Result output stream port output logic o_result_valid , // Result output stream port valid // input var logic [CONFIG_STREAM_PARAMS.DATA_WIDTH-1:0] i_config , // Config stream port input var logic i_config_valid , // Config stream port valid output generic_response_t o_config , // Config stream port response input var logic i_config_filter_bias_valid, // Config (actual data) for cache input var logic [CONFIG_STREAM_PARAMS.DATA_WIDTH-1:0] i_config_filter_bias_data, output var logic o_config_filter_bias_ready, // input var logic [DEBUG_AXI_PARAMS.ADDR_WIDTH-1:0] i_debug_raddr , // Debug AXI read-address port input var logic i_debug_raddr_valid, // Debug AXI read-address port valid output generic_response_t o_debug_raddr , // Debug AXI read-address port response input generic_response_t i_debug_rdata , // Debug AXI read-data port response output logic [DEBUG_AXI_PARAMS.DATA_WIDTH-1:0] o_debug_rdata , // Debug AXI read-data port output logic o_debug_rdata_valid // Debug AXI read-data port valid ); localparam int GROUP_SHIFT = (GROUP_NUM-1)*GROUP_DELAY+1; // Parameter checking initial /* synthesis enable_verilog_initial_construct */ begin // check AUX_DATA_PACK_PARAMS if (AUX_DATA_PACK_PARAMS.ELEMENT_BITS > 32 || AUX_DATA_PACK_PARAMS.ELEMENT_BITS <= 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), ELEMENT_BITS = %0d, ", "expected 0 < ELEMENT_BITS <= 32"}, AUX_DATA_PACK_PARAMS.ELEMENT_BITS); end if (AUX_DATA_PACK_PARAMS.VECTOR_SIZE <= 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), VECTOR_SIZE = %0d, ", "expected VECTOR_SIZE > 0"}, AUX_DATA_PACK_PARAMS.VECTOR_SIZE); end if (AUX_DATA_PACK_PARAMS.NATIVE_VECTOR_SIZE <= 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), NATIVE_VECTOR_SIZE = %0d, ", "expected NATIVE_VECTOR_SIZE > 0"}, AUX_DATA_PACK_PARAMS.NATIVE_VECTOR_SIZE); end if (AUX_DATA_PACK_PARAMS.GROUP_SIZE <= 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), GROUP_SIZE = %0d, ", "expected GROUP_SIZE > 0"}, AUX_DATA_PACK_PARAMS.GROUP_SIZE); end if (AUX_DATA_PACK_PARAMS.GROUP_NUM <= 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), GROUP_NUM = %0d, ", "expected GROUP_NUM > 0"}, AUX_DATA_PACK_PARAMS.GROUP_NUM); end if (AUX_DATA_PACK_PARAMS.GROUP_DELAY < 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), GROUP_DELAY = %0d, ", "expected GROUP_DELAY >= 0"}, AUX_DATA_PACK_PARAMS.GROUP_DELAY); end // check CONFIG_STREAM_PARAMS if (CONFIG_STREAM_PARAMS.DATA_WIDTH <= 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), CONFIG_STREAM_PARAMS.DATA_WIDTH ", "= %0d, expected CONFIG_STREAM_PARAMS.DATA_WIDTH > 0"}, CONFIG_STREAM_PARAMS.DATA_WIDTH); end // check DEBUG_AXI_PARAMS if (DEBUG_AXI_PARAMS.DATA_WIDTH <= 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), DEBUG_AXI_PARAMS.DATA_WIDTH = ", "%0d, expected DEBUG_AXI_PARAMS.DATA_WIDTH > 0"}, DEBUG_AXI_PARAMS.DATA_WIDTH); end if (DEBUG_AXI_PARAMS.ADDR_WIDTH <= 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), DEBUG_AXI_PARAMS.ADDR_WIDTH = ", "%0d, expected DEBUG_AXI_PARAMS.ADDR_WIDTH > 0"}, DEBUG_AXI_PARAMS.ADDR_WIDTH); end // check AUX_GENERIC_PARAMS if (AUX_GENERIC_PARAMS.INPUT_BUFFER_REG_STAGES < 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.", "INPUT_BUFFER_REG_STAGES = %0d, expected AUX_GENERIC_PARAMS.INPUT_BUFFER_REG_STAGES >= 0"}, AUX_GENERIC_PARAMS.INPUT_BUFFER_REG_STAGES); end if (AUX_GENERIC_PARAMS.COMMAND_BUFFER_DEPTH < 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.", "COMMAND_BUFFER_DEPTH = %0d, expected AUX_GENERIC_PARAMS.COMMAND_BUFFER_DEPTH >= 0"}, AUX_GENERIC_PARAMS.COMMAND_BUFFER_DEPTH); end if (AUX_GENERIC_PARAMS.PER_GROUP_CONTROL != 0 && AUX_GENERIC_PARAMS.PER_GROUP_CONTROL != 1) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.", "PER_GROUP_CONTROL = %0d, expected AUX_GENERIC_PARAMS.PER_GROUP_CONTROL = 0 or 1"}, AUX_GENERIC_PARAMS.PER_GROUP_CONTROL); end if (AUX_GENERIC_PARAMS.DEBUG_LEVEL < 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.DEBUG_LEVEL ", "= %0d, expected AUX_GENERIC_PARAMS.DEBUG_LEVEL >= 0"}, AUX_GENERIC_PARAMS.DEBUG_LEVEL); end if (AUX_GENERIC_PARAMS.DEBUG_ID < 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.DEBUG_ID = ", "%0d, expected AUX_GENERIC_PARAMS.DEBUG_ID >= 0"}, AUX_GENERIC_PARAMS.DEBUG_ID); end if (AUX_GENERIC_PARAMS.DEBUG_EVENT_DEPTH < 0) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.", "DEBUG_EVENT_DEPTH = %0d, expected AUX_GENERIC_PARAMS.DEBUG_EVENT_DEPTH >= 0"}, AUX_GENERIC_PARAMS.DEBUG_EVENT_DEPTH); end // Note: AUX_SPECIAL_PARAMS must be checked in relevant files (config_decoder, control and core) end // Arch assignment localparam aux_depthwise_arch_params_t ARCH = '{ AUX_DATA_PACK_PARAMS : AUX_DATA_PACK_PARAMS, CONFIG_STREAM_PARAMS : CONFIG_STREAM_PARAMS, DEBUG_AXI_PARAMS : DEBUG_AXI_PARAMS, AUX_GENERIC_PARAMS : AUX_GENERIC_PARAMS, AUX_SPECIAL_PARAMS : AUX_SPECIAL_PARAMS }; // Reset module constants localparam RST_USE_SYNCHRONIZER = 1; // yes clock domain crossings localparam RST_PIPE_DEPTH = 3; // stages of synchronization registers localparam RST_NUM_COPIES = 3; // number of reset signals // synchronized reset logic [RST_NUM_COPIES-1:0] sclrn; /*------------------------------------------------------------------------------ -- Reset handler ------------------------------------------------------------------------------*/ dla_reset_handler_simple #( .USE_SYNCHRONIZER(RST_USE_SYNCHRONIZER), .PIPE_DEPTH (RST_PIPE_DEPTH ), .NUM_COPIES (RST_NUM_COPIES ) ) dla_reset_handler_simple_inst ( .clk (clk ), .i_resetn(i_aresetn), // active-low asyncronous reset input .o_sclrn (sclrn ) // one or more copies of synchronized reset, 'dont_merge' constraints applied ); // Group 0 only signals control_to_config_t control_to_config; // Systolic Group signals // config-control interface and related signals depthwise_config_to_control_if #( .special_params (AUX_SPECIAL_PARAMS ), .data_pack_params(AUX_DATA_PACK_PARAMS) ) config_to_control[GROUP_SHIFT](); // control-lane interface depthwise_control_to_lane_if #( .special_params (AUX_SPECIAL_PARAMS ), .data_pack_params(AUX_DATA_PACK_PARAMS) ) control_to_lane[GROUP_SHIFT](); // data and result handshaking signals logic stream_data_valid[GROUP_SHIFT-1:0]; generic_response_t stream_result_request [GROUP_SHIFT-1:0]; // synchronized reset logic group_sreset_n [GROUP_SHIFT-1:0]; logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0][ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT*ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH-1:0][ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS-1:0] filter_data_buf; logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0][AUX_DEPTHWISE_VECTOR_ARCH.BIAS_WIDTH-1:0] bias_data_buf; // fp32 bias // debug connections debug_config_t debug_from_config; debug_group_t debug_from_group ; // Assign group delay input signals assign group_sreset_n [0] = sclrn[0]; assign stream_data_valid [0] = (i_data_valid & o_data.ready); assign stream_result_request[0].ready = i_result.ready; // // Group-delay chains of all group delayed signals // if (GROUP_DELAY > 0) begin : gen_non_zero_delay for(genvar i = 1; i < GROUP_SHIFT; i++) begin : gen_group_delay always_ff @(posedge clk) begin : proc_group_delay group_sreset_n [i] <= group_sreset_n [i-1]; stream_result_request[i] <= stream_result_request[i-1]; stream_data_valid [i] <= stream_data_valid [i-1]; control_to_lane [i].data[0][0] <= control_to_lane [i-1].data[0][0]; config_to_control [i].data[0][0] <= config_to_control [i-1].data[0][0]; end : proc_group_delay end : gen_group_delay end : gen_non_zero_delay // // Config decoder // dla_aux_depthwise_config_decoder #( .ARCH(ARCH) ) dla_aux_depthwise_config_decoder_inst ( .clk ( clk ), .i_resetn ( sclrn [RST_NUM_COPIES-2] ), .i_config ( i_config ), .i_config_valid ( i_config_valid ), .o_config ( o_config ), .i_control_to_config ( control_to_config ), .o_config_to_control ( config_to_control[0] ), .o_debug ( debug_from_config ) ); // // Multiple groups (phases) are generated // // First group is responsible for interacting with Config Decoder and generating debug information dla_aux_depthwise_group #( .ID (0 ), .ARCH(ARCH), .AUX_DEPTHWISE_VECTOR_ARCH(AUX_DEPTHWISE_VECTOR_ARCH), .DEPTHWISE_VECTOR_ARCH_INFO(DEPTHWISE_VECTOR_ARCH_INFO) ) dla_aux_depthwise_group_inst ( .clk ( clk ), .i_resetn ( group_sreset_n [0] ), .i_data ( i_data [0] ), .i_data_valid ( stream_data_valid [0] ), .o_data ( {>>{o_data}} ), .i_result ( stream_result_request [0] ), .o_result ( o_result [0] ), .o_result_valid ( o_result_valid ), .i_filter ( filter_data_buf ), .i_bias ( bias_data_buf ), .i_config_to_control ( config_to_control [0] ), .o_control_to_config ( control_to_config ), .i_control_to_lane ( control_to_lane [0] ), .o_control_to_lane_next ( control_to_lane [0] ), .i_config_filter_bias_valid ( i_config_filter_bias_valid ), .i_config_filter_bias_data ( i_config_filter_bias_data ), .o_config_filter_bias_ready ( o_config_filter_bias_ready ), .o_debug ( debug_from_group ) ); // Other groups only consume decoded config for (genvar i = 1; i < GROUP_NUM; i++) begin : gen_groups // Must pass an interface to o_control_lane_next, so create a placeholder interface depthwise_control_to_lane_if #(.special_params(AUX_SPECIAL_PARAMS), .data_pack_params(AUX_DATA_PACK_PARAMS)) output_control_to_lane(); dla_aux_depthwise_group #( .ID (i ), .ARCH(ARCH), .AUX_DEPTHWISE_VECTOR_ARCH(AUX_DEPTHWISE_VECTOR_ARCH), .DEPTHWISE_VECTOR_ARCH_INFO(DEPTHWISE_VECTOR_ARCH_INFO) ) dla_aux_depthwise_group_inst ( .clk ( clk ), .i_resetn ( group_sreset_n [i * GROUP_DELAY] ), .i_data ( i_data [i ] ), .i_data_valid ( stream_data_valid [i * GROUP_DELAY] ), .o_data ( ), .i_result ( stream_result_request[i * GROUP_DELAY] ), .o_result ( o_result [i ] ), .o_result_valid ( ), .i_filter ( filter_data_buf ), .i_bias ( bias_data_buf ), .i_config_to_control ( config_to_control [i * GROUP_DELAY] ), .o_control_to_config ( ), .i_control_to_lane ( control_to_lane [i * GROUP_DELAY] ), .o_control_to_lane_next ( output_control_to_lane ), .i_config_filter_bias_valid ( i_config_filter_bias_valid ), .i_config_filter_bias_data ( i_config_filter_bias_data ), .o_config_filter_bias_ready ( ), .o_debug ( ) ); end : gen_groups // // Filter bias cache // logic filter_cache_ready; logic configured_and_ready; initial begin if (o_config_filter_bias_ready != (filter_cache_ready && configured_and_ready)) begin $display("Double plumbed filters not matching ready %d %d %d", o_config_filter_bias_ready, filter_cache_ready, configured_and_ready); end end //assign o_config_filter_bias_ready = filter_cache_ready && configured_and_ready; assign configured_and_ready = control_to_lane[0].data[0][0].configured_starting != control_to_lane[0].data[0][0].configured_ending; dla_aux_depthwise_filter_bias_cache #( .MAX_WINDOW_HEIGHT (ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT ), .MAX_WINDOW_WIDTH (ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH ), .ELEMENT_BITS (ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS ), .CONFIG_BIT_WIDTH (32 ), .VECTOR_SIZE (ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE ), .WINDOW_BITS_VERTICAL ($clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT + 1) ), .WINDOW_BITS_HORIZONTAL ($clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH + 1) ), .BIAS_WIDTH (AUX_DEPTHWISE_VECTOR_ARCH.BIAS_WIDTH ) ) filter_bias_cache ( .clk ( clk ), .i_resetn ( group_sreset_n [0] ), .i_data_valid ( i_config_filter_bias_valid ), .i_data ( i_config_filter_bias_data ), .o_config_filter_bias_ready ( filter_cache_ready ), .i_configured_and_ready ( configured_and_ready ), .i_done ( control_to_lane[GROUP_SHIFT-1].data[0][0].done ), // use the last lane's data .i_window_width ( control_to_lane[0].data[0][0].window_width ), .i_window_height ( control_to_lane[0].data[0][0].window_height ), .o_filter_valid ( ), .o_filter ( filter_data_buf ), .o_bias ( bias_data_buf ) ); // // Debug // dla_aux_depthwise_debug #( .ARCH(ARCH) ) dla_aux_depthwise_debug_inst ( .clk (clk ), .i_resetn(sclrn[RST_NUM_COPIES-1] ), // .i_config(debug_from_config ), .i_group (debug_from_group ), // .i_raddr (i_debug_raddr ), .i_raddr_valid(i_debug_raddr_valid), .o_raddr (o_debug_raddr ), .i_rdata (i_debug_rdata ), .o_rdata (o_debug_rdata ), .o_rdata_valid(o_debug_rdata_valid) ); endmodule