// Copyright 2020-2023 Intel Corporation. // // This software and the related documents are Intel copyrighted materials, // and your use of them is governed by the express license under which they // were provided to you ("License"). Unless the License provides otherwise, // you may not use, modify, copy, publish, distribute, disclose or transmit // this software or the related documents without Intel's prior written // permission. // // This software and the related documents are provided as is, with no express // or implied warranties, other than those that are expressly stated in the // License. /* * Module `dla_aux_depthwise_lane` * * Lane sub-module of the auxiliary block. * * WARNING! DO NOT EDIT THIS FILE FOR FUTURE-COMPATIBILITY * * This module is a template for all auxiliary blocks and the content should * not be modified for future compatibility. * * Do not modify any of fields other than the module name and included package * name. In case of a limitation or bug please contact owner of the Example * Aux block. * * See README.md of the Example Aux block for more details. */ `undefineall `resetall `default_nettype none `include "dla_acl_parameter_assert.svh" module dla_aux_depthwise_lane import dla_aux_depthwise_pkg::*; #( parameter int ID, // Unique ID of the lane parameter aux_depthwise_arch_params_t ARCH, // Architecture parameters parameter vector_dot_arch_t AUX_DEPTHWISE_VECTOR_ARCH, parameter vector_dot_arch_info_t DEPTHWISE_VECTOR_ARCH_INFO ) ( input wire clk , // Clock input wire i_resetn , // Active-low sync reset // input var logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0] [ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS-1:0] i_data , // Data input port input var logic i_data_valid , // Data input port valid output generic_response_t o_data , // Data input response // input generic_response_t i_result , // Result output response output logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0] [ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS-1:0] o_result , // Result output port output logic o_result_valid , // Result output port valid // input wire [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0] [ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT*ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH-1:0] [ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS-1:0] i_filter, input wire [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0] [AUX_DEPTHWISE_VECTOR_ARCH.BIAS_WIDTH-1:0] i_bias, // fp32 bias // input var logic i_config_filter_bias_valid, // Config (actual data) for cache input var logic [31:0] i_config_filter_bias_data, output var logic o_config_filter_bias_ready, // depthwise_control_to_lane_if.receiver i_control_to_lane, // Control to lane port output lane_to_control_t o_lane_to_control, // Control to lane response // output debug_lane_t o_debug // Debug output ); /* synthesis translate_off */ `DLA_ACL_PARAMETER_ASSERT_MESSAGE(aux_data_pack_params_t'(i_control_to_lane.data_pack_params) == ARCH.AUX_DATA_PACK_PARAMS, "i_control_to_lane if parameters don't match data pack params") `DLA_ACL_PARAMETER_ASSERT_MESSAGE(aux_special_params_t'(i_control_to_lane.special_params) == ARCH.AUX_SPECIAL_PARAMS, "i_control_to_lane if parameters don't match special params") /* synthesis translate_on */ localparam MIN_OUTPUT_BUFFER_FIFO_CUTOFF = aux_depthwise_calc_core_latency(ARCH.AUX_SPECIAL_PARAMS, ARCH.AUX_DATA_PACK_PARAMS, DEPTHWISE_VECTOR_ARCH_INFO); // If FIFO depth or cutoff parameters are set to -1 the values are calculated internally localparam OUTPUT_BUFFER_FIFO_CUTOFF = ARCH.AUX_GENERIC_PARAMS.OUTPUT_BUFFER_FIFO_CUTOFF == -1 ? MIN_OUTPUT_BUFFER_FIFO_CUTOFF : ARCH.AUX_GENERIC_PARAMS.OUTPUT_BUFFER_FIFO_CUTOFF; localparam OUTPUT_BUFFER_FIFO_DEPTH = ARCH.AUX_GENERIC_PARAMS.OUTPUT_BUFFER_FIFO_DEPTH == -1 ? OUTPUT_BUFFER_FIFO_CUTOFF + 3 : ARCH.AUX_GENERIC_PARAMS.OUTPUT_BUFFER_FIFO_DEPTH; localparam FIFO_WIDTH = ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS * ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE; logic fifo_afull; // Parameter checking initial /* synthesis enable_verilog_initial_construct */ begin // The output (skid) buffer size must be larger than the data stored in the data pipeline. // In the case of back-pressure the core will fill the output buffer as it cannot be stalled if (OUTPUT_BUFFER_FIFO_CUTOFF >= OUTPUT_BUFFER_FIFO_DEPTH) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_lane.sv), expecting ", "OUTPUT_BUFFER_FIFO_CUTOFF < OUTPUT_BUFFER_FIFO_DEPTH"}); end // The skid depth must be greater than a minimum value if (OUTPUT_BUFFER_FIFO_CUTOFF < MIN_OUTPUT_BUFFER_FIFO_CUTOFF) begin $fatal(1, {"Illegal parameterization (dla_aux_depthwise_lane.sv), expecting ", "OUTPUT_BUFFER_FIFO_CUTOFF >= ", $sformatf("%0d",MIN_OUTPUT_BUFFER_FIFO_CUTOFF)}); end end // internal connecting wires for user logic block logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0] [ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS-1:0] core_data ; // logic core_data_valid ; generic_response_t core_data_response; // logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0] [ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS-1:0] core_result ; // logic core_result_valid ; generic_response_t core_result_response; // logic depthwise_filter_ready; // connect ready signals to input buffer assign core_data_response.ready = i_control_to_lane.data[0][0].ready & ~fifo_afull & i_resetn; // connect core input and output valid signals to the control assign o_lane_to_control.core_input_valid = core_data_valid; assign o_lane_to_control.core_output_valid = core_result_valid; assign o_lane_to_control.depthwise_filter_ready = depthwise_filter_ready; dla_aux_depthwise_input_buffer #( .ARCH(ARCH) ) dla_aux_depthwise_input_buffer_inst ( .clk (clk ), .i_resetn (i_resetn ), .i_data (i_data ), .i_data_valid (i_data_valid ), .o_data (o_data ), .i_result (core_data_response ), .o_result (core_data ), .o_result_valid(core_data_valid ), .o_debug (o_debug.input_buffer) ); dla_aux_depthwise_core #( .ID (ID ), .ARCH(ARCH), .AUX_DEPTHWISE_VECTOR_ARCH(AUX_DEPTHWISE_VECTOR_ARCH), .DEPTHWISE_VECTOR_ARCH_INFO(DEPTHWISE_VECTOR_ARCH_INFO) ) dla_aux_depthwise_core_inst ( .clk (clk ), .i_resetn (i_resetn ), .i_data (core_data ), .i_data_valid (core_data_valid ), .i_control (i_control_to_lane ), .o_result (core_result ), .o_result_valid(core_result_valid ), .i_filter (i_filter ), .i_bias (i_bias ), .o_depthwise_filter_ready(depthwise_filter_ready), .i_config_filter_bias_valid (i_config_filter_bias_valid ), .i_config_filter_bias_data (i_config_filter_bias_data ), .o_config_filter_bias_ready (o_config_filter_bias_ready ), .o_debug (o_debug.core_function) ); // Output buffer is implemented with a FIFO // (depth is in words and width is in bits) dla_hld_fifo #( .WIDTH (FIFO_WIDTH ), .DEPTH (OUTPUT_BUFFER_FIFO_DEPTH ), .ALMOST_FULL_CUTOFF(OUTPUT_BUFFER_FIFO_CUTOFF), .SYNCHRONIZE_RESET (0 ), .NEVER_OVERFLOWS (1 ), .STYLE ("ms" ) ) dla_hld_fifo_inst ( .clock (clk ), .resetn (i_resetn ), // .i_valid (core_result_valid ), .i_data (core_result ), .o_stall (o_debug.output_buffer.full ), .o_almost_full (fifo_afull ), // .o_valid (o_result_valid ), .o_data (o_result ), .i_stall (!i_result.ready ), .o_almost_empty( ), .o_empty (o_debug.output_buffer.empty), // .ecc_err_status( ) ); endmodule