summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv')
-rw-r--r--python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv381
1 files changed, 381 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv
new file mode 100644
index 0000000..59ca3c2
--- /dev/null
+++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_top.sv
@@ -0,0 +1,381 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ * Module `dla_aux_depthwise_top`
+ *
+ * Top level of the aux block.
+ *
+ * WARNING! DO NOT EDIT THIS FILE FOR FUTURE-COMPATIBILITY
+ *
+ * This module is a template for all auxiliary blocks and the content should
+ * not be modified for future compatibility.
+ *
+ * Do not modify any of fields other than the module name and included package
+ * name. In case of a limitation or bug please contact owner of the Example
+ * Aux block.
+ *
+ * See README.md of the Example Aux block for more details.
+ */
+
+`undefineall
+`resetall
+`default_nettype none
+
+module dla_aux_depthwise_top
+ import dla_aux_depthwise_pkg::*;
+#(
+ parameter aux_data_pack_params_t AUX_DATA_PACK_PARAMS,
+ parameter stream_params_t CONFIG_STREAM_PARAMS,
+ parameter debug_axi_params_t DEBUG_AXI_PARAMS,
+ parameter aux_generic_params_t AUX_GENERIC_PARAMS,
+ parameter aux_special_params_t AUX_SPECIAL_PARAMS,
+ parameter vector_dot_arch_t AUX_DEPTHWISE_VECTOR_ARCH,
+ parameter vector_dot_arch_info_t DEPTHWISE_VECTOR_ARCH_INFO,
+ //
+ localparam stream_params_t DATA_STREAM_PARAMS = '{ // Data stream parameterization
+ DATA_WIDTH : aux_params_to_bus_width(AUX_DATA_PACK_PARAMS)},
+ localparam int GROUP_DELAY = AUX_DATA_PACK_PARAMS.GROUP_DELAY,
+ localparam int GROUP_NUM = AUX_DATA_PACK_PARAMS.GROUP_NUM,
+ localparam int GROUP_SIZE = AUX_DATA_PACK_PARAMS.GROUP_SIZE,
+ localparam int VECTOR_SIZE = AUX_DATA_PACK_PARAMS.VECTOR_SIZE,
+ localparam int ELEMENT_BITS = AUX_DATA_PACK_PARAMS.ELEMENT_BITS
+) (
+ input wire clk , // Clock
+ input wire i_aresetn , // Active-low async reset
+ //
+ input var logic [GROUP_NUM -1:0]
+ [GROUP_SIZE -1:0]
+ [VECTOR_SIZE -1:0]
+ [ELEMENT_BITS-1:0] i_data , // Data input stream port
+ input var logic i_data_valid , // Data input stream port valid
+ output generic_response_t o_data , // Data input stream port response
+ //
+ input generic_response_t i_result , // Result output stream port response
+ output logic [GROUP_NUM -1:0]
+ [GROUP_SIZE -1:0]
+ [VECTOR_SIZE -1:0]
+ [ELEMENT_BITS-1:0] o_result , // Result output stream port
+ output logic o_result_valid , // Result output stream port valid
+ //
+ input var logic [CONFIG_STREAM_PARAMS.DATA_WIDTH-1:0] i_config , // Config stream port
+ input var logic i_config_valid , // Config stream port valid
+ output generic_response_t o_config , // Config stream port response
+ input var logic i_config_filter_bias_valid, // Config (actual data) for cache
+ input var logic [CONFIG_STREAM_PARAMS.DATA_WIDTH-1:0] i_config_filter_bias_data,
+ output var logic o_config_filter_bias_ready,
+ //
+ input var logic [DEBUG_AXI_PARAMS.ADDR_WIDTH-1:0] i_debug_raddr , // Debug AXI read-address port
+ input var logic i_debug_raddr_valid, // Debug AXI read-address port valid
+ output generic_response_t o_debug_raddr , // Debug AXI read-address port response
+ input generic_response_t i_debug_rdata , // Debug AXI read-data port response
+ output logic [DEBUG_AXI_PARAMS.DATA_WIDTH-1:0] o_debug_rdata , // Debug AXI read-data port
+ output logic o_debug_rdata_valid // Debug AXI read-data port valid
+);
+
+ localparam int GROUP_SHIFT = (GROUP_NUM-1)*GROUP_DELAY+1;
+
+ // Parameter checking
+ initial /* synthesis enable_verilog_initial_construct */
+ begin
+ // check AUX_DATA_PACK_PARAMS
+ if (AUX_DATA_PACK_PARAMS.ELEMENT_BITS > 32 || AUX_DATA_PACK_PARAMS.ELEMENT_BITS <= 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), ELEMENT_BITS = %0d, ",
+ "expected 0 < ELEMENT_BITS <= 32"}, AUX_DATA_PACK_PARAMS.ELEMENT_BITS);
+ end
+ if (AUX_DATA_PACK_PARAMS.VECTOR_SIZE <= 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), VECTOR_SIZE = %0d, ",
+ "expected VECTOR_SIZE > 0"}, AUX_DATA_PACK_PARAMS.VECTOR_SIZE);
+ end
+ if (AUX_DATA_PACK_PARAMS.NATIVE_VECTOR_SIZE <= 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), NATIVE_VECTOR_SIZE = %0d, ",
+ "expected NATIVE_VECTOR_SIZE > 0"}, AUX_DATA_PACK_PARAMS.NATIVE_VECTOR_SIZE);
+ end
+ if (AUX_DATA_PACK_PARAMS.GROUP_SIZE <= 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), GROUP_SIZE = %0d, ",
+ "expected GROUP_SIZE > 0"}, AUX_DATA_PACK_PARAMS.GROUP_SIZE);
+ end
+ if (AUX_DATA_PACK_PARAMS.GROUP_NUM <= 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), GROUP_NUM = %0d, ",
+ "expected GROUP_NUM > 0"}, AUX_DATA_PACK_PARAMS.GROUP_NUM);
+ end
+ if (AUX_DATA_PACK_PARAMS.GROUP_DELAY < 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), GROUP_DELAY = %0d, ",
+ "expected GROUP_DELAY >= 0"}, AUX_DATA_PACK_PARAMS.GROUP_DELAY);
+ end
+
+ // check CONFIG_STREAM_PARAMS
+ if (CONFIG_STREAM_PARAMS.DATA_WIDTH <= 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), CONFIG_STREAM_PARAMS.DATA_WIDTH ",
+ "= %0d, expected CONFIG_STREAM_PARAMS.DATA_WIDTH > 0"}, CONFIG_STREAM_PARAMS.DATA_WIDTH);
+ end
+
+ // check DEBUG_AXI_PARAMS
+ if (DEBUG_AXI_PARAMS.DATA_WIDTH <= 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), DEBUG_AXI_PARAMS.DATA_WIDTH = ",
+ "%0d, expected DEBUG_AXI_PARAMS.DATA_WIDTH > 0"}, DEBUG_AXI_PARAMS.DATA_WIDTH);
+ end
+ if (DEBUG_AXI_PARAMS.ADDR_WIDTH <= 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), DEBUG_AXI_PARAMS.ADDR_WIDTH = ",
+ "%0d, expected DEBUG_AXI_PARAMS.ADDR_WIDTH > 0"}, DEBUG_AXI_PARAMS.ADDR_WIDTH);
+ end
+
+ // check AUX_GENERIC_PARAMS
+ if (AUX_GENERIC_PARAMS.INPUT_BUFFER_REG_STAGES < 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.",
+ "INPUT_BUFFER_REG_STAGES = %0d, expected AUX_GENERIC_PARAMS.INPUT_BUFFER_REG_STAGES >= 0"},
+ AUX_GENERIC_PARAMS.INPUT_BUFFER_REG_STAGES);
+ end
+ if (AUX_GENERIC_PARAMS.COMMAND_BUFFER_DEPTH < 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.",
+ "COMMAND_BUFFER_DEPTH = %0d, expected AUX_GENERIC_PARAMS.COMMAND_BUFFER_DEPTH >= 0"},
+ AUX_GENERIC_PARAMS.COMMAND_BUFFER_DEPTH);
+ end
+ if (AUX_GENERIC_PARAMS.PER_GROUP_CONTROL != 0 &&
+ AUX_GENERIC_PARAMS.PER_GROUP_CONTROL != 1) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.",
+ "PER_GROUP_CONTROL = %0d, expected AUX_GENERIC_PARAMS.PER_GROUP_CONTROL = 0 or 1"},
+ AUX_GENERIC_PARAMS.PER_GROUP_CONTROL);
+ end
+ if (AUX_GENERIC_PARAMS.DEBUG_LEVEL < 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.DEBUG_LEVEL ",
+ "= %0d, expected AUX_GENERIC_PARAMS.DEBUG_LEVEL >= 0"}, AUX_GENERIC_PARAMS.DEBUG_LEVEL);
+ end
+ if (AUX_GENERIC_PARAMS.DEBUG_ID < 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.DEBUG_ID = ",
+ "%0d, expected AUX_GENERIC_PARAMS.DEBUG_ID >= 0"}, AUX_GENERIC_PARAMS.DEBUG_ID);
+ end
+ if (AUX_GENERIC_PARAMS.DEBUG_EVENT_DEPTH < 0) begin
+ $fatal(1, {"Illegal parameterization (dla_aux_depthwise_top.sv), AUX_GENERIC_PARAMS.",
+ "DEBUG_EVENT_DEPTH = %0d, expected AUX_GENERIC_PARAMS.DEBUG_EVENT_DEPTH >= 0"},
+ AUX_GENERIC_PARAMS.DEBUG_EVENT_DEPTH);
+ end
+
+ // Note: AUX_SPECIAL_PARAMS must be checked in relevant files (config_decoder, control and core)
+ end
+
+ // Arch assignment
+ localparam aux_depthwise_arch_params_t ARCH = '{
+ AUX_DATA_PACK_PARAMS : AUX_DATA_PACK_PARAMS,
+ CONFIG_STREAM_PARAMS : CONFIG_STREAM_PARAMS,
+ DEBUG_AXI_PARAMS : DEBUG_AXI_PARAMS,
+ AUX_GENERIC_PARAMS : AUX_GENERIC_PARAMS,
+ AUX_SPECIAL_PARAMS : AUX_SPECIAL_PARAMS
+ };
+
+ // Reset module constants
+ localparam RST_USE_SYNCHRONIZER = 1; // yes clock domain crossings
+ localparam RST_PIPE_DEPTH = 3; // stages of synchronization registers
+ localparam RST_NUM_COPIES = 3; // number of reset signals
+
+ // synchronized reset
+ logic [RST_NUM_COPIES-1:0] sclrn;
+
+ /*------------------------------------------------------------------------------
+ -- Reset handler
+ ------------------------------------------------------------------------------*/
+ dla_reset_handler_simple #(
+ .USE_SYNCHRONIZER(RST_USE_SYNCHRONIZER),
+ .PIPE_DEPTH (RST_PIPE_DEPTH ),
+ .NUM_COPIES (RST_NUM_COPIES )
+ ) dla_reset_handler_simple_inst (
+ .clk (clk ),
+ .i_resetn(i_aresetn), // active-low asyncronous reset input
+ .o_sclrn (sclrn ) // one or more copies of synchronized reset, 'dont_merge' constraints applied
+ );
+
+ // Group 0 only signals
+ control_to_config_t control_to_config;
+
+ // Systolic Group signals
+ // config-control interface and related signals
+ depthwise_config_to_control_if #(
+ .special_params (AUX_SPECIAL_PARAMS ),
+ .data_pack_params(AUX_DATA_PACK_PARAMS)
+ ) config_to_control[GROUP_SHIFT]();
+
+ // control-lane interface
+ depthwise_control_to_lane_if #(
+ .special_params (AUX_SPECIAL_PARAMS ),
+ .data_pack_params(AUX_DATA_PACK_PARAMS)
+ ) control_to_lane[GROUP_SHIFT]();
+
+ // data and result handshaking signals
+ logic stream_data_valid[GROUP_SHIFT-1:0];
+ generic_response_t stream_result_request [GROUP_SHIFT-1:0];
+
+ // synchronized reset
+ logic group_sreset_n [GROUP_SHIFT-1:0];
+
+ logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0][ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT*ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH-1:0][ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS-1:0] filter_data_buf;
+ logic [ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE-1:0][AUX_DEPTHWISE_VECTOR_ARCH.BIAS_WIDTH-1:0] bias_data_buf; // fp32 bias
+ // debug connections
+ debug_config_t debug_from_config;
+ debug_group_t debug_from_group ;
+
+ // Assign group delay input signals
+ assign group_sreset_n [0] = sclrn[0];
+ assign stream_data_valid [0] = (i_data_valid & o_data.ready);
+ assign stream_result_request[0].ready = i_result.ready;
+
+ //
+ // Group-delay chains of all group delayed signals
+ //
+ if (GROUP_DELAY > 0) begin : gen_non_zero_delay
+ for(genvar i = 1; i < GROUP_SHIFT; i++) begin : gen_group_delay
+ always_ff @(posedge clk) begin : proc_group_delay
+ group_sreset_n [i] <= group_sreset_n [i-1];
+ stream_result_request[i] <= stream_result_request[i-1];
+ stream_data_valid [i] <= stream_data_valid [i-1];
+ control_to_lane [i].data[0][0] <= control_to_lane [i-1].data[0][0];
+ config_to_control [i].data[0][0] <= config_to_control [i-1].data[0][0];
+ end : proc_group_delay
+ end : gen_group_delay
+ end : gen_non_zero_delay
+
+ //
+ // Config decoder
+ //
+ dla_aux_depthwise_config_decoder #(
+ .ARCH(ARCH)
+ ) dla_aux_depthwise_config_decoder_inst (
+ .clk ( clk ),
+ .i_resetn ( sclrn [RST_NUM_COPIES-2] ),
+ .i_config ( i_config ),
+ .i_config_valid ( i_config_valid ),
+ .o_config ( o_config ),
+ .i_control_to_config ( control_to_config ),
+ .o_config_to_control ( config_to_control[0] ),
+ .o_debug ( debug_from_config )
+ );
+
+ //
+ // Multiple groups (phases) are generated
+ //
+ // First group is responsible for interacting with Config Decoder and generating debug information
+ dla_aux_depthwise_group #(
+ .ID (0 ),
+ .ARCH(ARCH),
+ .AUX_DEPTHWISE_VECTOR_ARCH(AUX_DEPTHWISE_VECTOR_ARCH),
+ .DEPTHWISE_VECTOR_ARCH_INFO(DEPTHWISE_VECTOR_ARCH_INFO)
+ ) dla_aux_depthwise_group_inst (
+ .clk ( clk ),
+ .i_resetn ( group_sreset_n [0] ),
+ .i_data ( i_data [0] ),
+ .i_data_valid ( stream_data_valid [0] ),
+ .o_data ( {>>{o_data}} ),
+ .i_result ( stream_result_request [0] ),
+ .o_result ( o_result [0] ),
+ .o_result_valid ( o_result_valid ),
+ .i_filter ( filter_data_buf ),
+ .i_bias ( bias_data_buf ),
+ .i_config_to_control ( config_to_control [0] ),
+ .o_control_to_config ( control_to_config ),
+ .i_control_to_lane ( control_to_lane [0] ),
+ .o_control_to_lane_next ( control_to_lane [0] ),
+ .i_config_filter_bias_valid ( i_config_filter_bias_valid ),
+ .i_config_filter_bias_data ( i_config_filter_bias_data ),
+ .o_config_filter_bias_ready ( o_config_filter_bias_ready ),
+ .o_debug ( debug_from_group )
+ );
+ // Other groups only consume decoded config
+ for (genvar i = 1; i < GROUP_NUM; i++) begin : gen_groups
+ // Must pass an interface to o_control_lane_next, so create a placeholder interface
+ depthwise_control_to_lane_if #(.special_params(AUX_SPECIAL_PARAMS), .data_pack_params(AUX_DATA_PACK_PARAMS)) output_control_to_lane();
+
+ dla_aux_depthwise_group #(
+ .ID (i ),
+ .ARCH(ARCH),
+ .AUX_DEPTHWISE_VECTOR_ARCH(AUX_DEPTHWISE_VECTOR_ARCH),
+ .DEPTHWISE_VECTOR_ARCH_INFO(DEPTHWISE_VECTOR_ARCH_INFO)
+ ) dla_aux_depthwise_group_inst (
+ .clk ( clk ),
+ .i_resetn ( group_sreset_n [i * GROUP_DELAY] ),
+ .i_data ( i_data [i ] ),
+ .i_data_valid ( stream_data_valid [i * GROUP_DELAY] ),
+ .o_data ( ),
+ .i_result ( stream_result_request[i * GROUP_DELAY] ),
+ .o_result ( o_result [i ] ),
+ .o_result_valid ( ),
+ .i_filter ( filter_data_buf ),
+ .i_bias ( bias_data_buf ),
+ .i_config_to_control ( config_to_control [i * GROUP_DELAY] ),
+ .o_control_to_config ( ),
+ .i_control_to_lane ( control_to_lane [i * GROUP_DELAY] ),
+ .o_control_to_lane_next ( output_control_to_lane ),
+ .i_config_filter_bias_valid ( i_config_filter_bias_valid ),
+ .i_config_filter_bias_data ( i_config_filter_bias_data ),
+ .o_config_filter_bias_ready ( ),
+ .o_debug ( )
+ );
+ end : gen_groups
+
+ //
+ // Filter bias cache
+ //
+ logic filter_cache_ready;
+ logic configured_and_ready;
+ initial begin
+ if (o_config_filter_bias_ready != (filter_cache_ready && configured_and_ready)) begin
+ $display("Double plumbed filters not matching ready %d %d %d", o_config_filter_bias_ready, filter_cache_ready, configured_and_ready);
+ end
+ end
+ //assign o_config_filter_bias_ready = filter_cache_ready && configured_and_ready;
+ assign configured_and_ready = control_to_lane[0].data[0][0].configured_starting !=
+ control_to_lane[0].data[0][0].configured_ending;
+
+ dla_aux_depthwise_filter_bias_cache #(
+ .MAX_WINDOW_HEIGHT (ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT ),
+ .MAX_WINDOW_WIDTH (ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH ),
+ .ELEMENT_BITS (ARCH.AUX_DATA_PACK_PARAMS.ELEMENT_BITS ),
+ .CONFIG_BIT_WIDTH (32 ),
+ .VECTOR_SIZE (ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE ),
+ .WINDOW_BITS_VERTICAL ($clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT + 1) ),
+ .WINDOW_BITS_HORIZONTAL ($clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH + 1) ),
+ .BIAS_WIDTH (AUX_DEPTHWISE_VECTOR_ARCH.BIAS_WIDTH )
+ ) filter_bias_cache (
+ .clk ( clk ),
+ .i_resetn ( group_sreset_n [0] ),
+ .i_data_valid ( i_config_filter_bias_valid ),
+ .i_data ( i_config_filter_bias_data ),
+ .o_config_filter_bias_ready ( filter_cache_ready ),
+ .i_configured_and_ready ( configured_and_ready ),
+ .i_done ( control_to_lane[GROUP_SHIFT-1].data[0][0].done ), // use the last lane's data
+ .i_window_width ( control_to_lane[0].data[0][0].window_width ),
+ .i_window_height ( control_to_lane[0].data[0][0].window_height ),
+ .o_filter_valid ( ),
+ .o_filter ( filter_data_buf ),
+ .o_bias ( bias_data_buf )
+ );
+
+ //
+ // Debug
+ //
+ dla_aux_depthwise_debug #(
+ .ARCH(ARCH)
+ ) dla_aux_depthwise_debug_inst (
+ .clk (clk ),
+ .i_resetn(sclrn[RST_NUM_COPIES-1] ),
+ //
+ .i_config(debug_from_config ),
+ .i_group (debug_from_group ),
+ //
+ .i_raddr (i_debug_raddr ),
+ .i_raddr_valid(i_debug_raddr_valid),
+ .o_raddr (o_debug_raddr ),
+ .i_rdata (i_debug_rdata ),
+ .o_rdata (o_debug_rdata ),
+ .o_rdata_valid(o_debug_rdata_valid)
+ );
+
+endmodule