summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_control.sv
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_control.sv')
-rw-r--r--python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_control.sv484
1 files changed, 484 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_control.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_control.sv
new file mode 100644
index 0000000..9c5ae69
--- /dev/null
+++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_control.sv
@@ -0,0 +1,484 @@
+// Copyright 2020-2023 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/*
+ * Module `dla_aux_depthwise_control`
+ *
+ * Control of the core functionality of the auxiliary block.
+ *
+ * WARNING! ONLY EDIT THE PARTS MARKED IN BETWEEN
+ * "START EDITING" AND "END EDITING"
+ *
+ * See README.md of the Example Aux block for more details.
+ */
+
+`undefineall
+`resetall
+`default_nettype none
+
+`include "dla_acl_parameter_assert.svh"
+
+module dla_aux_depthwise_control
+ import dla_aux_depthwise_pkg::*;
+#(
+ parameter aux_depthwise_arch_params_t ARCH // Architecture parameters
+) (
+ input wire clk , // Clock
+ input wire i_resetn , // active low reset
+ //
+ depthwise_config_to_control_if.receiver i_config_to_control, // Config to control connection
+ output control_to_config_t o_control_to_config, // Control to config connection
+ depthwise_control_to_lane_if.sender o_control_to_lane , // Control to lane connection
+ input lane_to_control_t i_lane_to_control , // Lane to control connection
+ //
+ output debug_control_t o_debug // Debug output
+);
+
+/* synthesis translate_off */
+`DLA_ACL_PARAMETER_ASSERT_MESSAGE(aux_data_pack_params_t'(i_config_to_control.data_pack_params) == ARCH.AUX_DATA_PACK_PARAMS,
+ "i_config_to_control if parameters don't match data pack params")
+`DLA_ACL_PARAMETER_ASSERT_MESSAGE(aux_special_params_t'(i_config_to_control.special_params) == ARCH.AUX_SPECIAL_PARAMS,
+ "i_config_to_control if parameters don't match special params")
+`DLA_ACL_PARAMETER_ASSERT_MESSAGE(aux_data_pack_params_t'(o_control_to_lane.data_pack_params) == ARCH.AUX_DATA_PACK_PARAMS,
+ "o_control_to_lane if parameters don't match data pack params")
+`DLA_ACL_PARAMETER_ASSERT_MESSAGE(aux_special_params_t'(o_control_to_lane.special_params) == ARCH.AUX_SPECIAL_PARAMS,
+ "o_control_to_lane if parameters don't match special params")
+/* synthesis translate_on */
+
+//
+// ------------------------------ START EDITING ------------------------------
+//
+ // Shorthand versions of parameters
+ localparam NATIVE_VECTOR_SIZE = ARCH.AUX_DATA_PACK_PARAMS.NATIVE_VECTOR_SIZE ;
+ localparam VECTOR_SIZE = ARCH.AUX_DATA_PACK_PARAMS.VECTOR_SIZE ;
+ localparam MAX_WINDOW_HEIGHT = ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT ;
+ localparam MAX_WINDOW_WIDTH = ARCH.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH ;
+ localparam MAX_STRIDE_HORIZONTAL = ARCH.AUX_SPECIAL_PARAMS.MAX_STRIDE_HORIZONTAL;
+ localparam MAX_STRIDE_VERTICAL = ARCH.AUX_SPECIAL_PARAMS.MAX_STRIDE_VERTICAL ;
+ localparam MAX_DILATION_VERTICAL = ARCH.AUX_SPECIAL_PARAMS.MAX_DILATION_VERTICAL ;
+ localparam MAX_DILATION_HORIZONTAL = ARCH.AUX_SPECIAL_PARAMS.MAX_DILATION_HORIZONTAL ;
+ localparam VERTICAL_LINES = MAX_WINDOW_HEIGHT + ((MAX_WINDOW_HEIGHT-1) * (MAX_DILATION_VERTICAL-1));
+ localparam WINDOW_BITS_VERTICAL = $clog2(MAX_WINDOW_HEIGHT + 1);
+ localparam WINDOW_BITS_HORIZONTAL = $clog2(MAX_WINDOW_WIDTH + 1);
+ localparam DILATION_BITS_VERTICAL = $clog2(MAX_DILATION_VERTICAL + 1);
+ localparam DILATION_BITS_HORIZONTAL = $clog2(MAX_DILATION_HORIZONTAL + 1);
+ localparam VECTOR_RATIO = NATIVE_VECTOR_SIZE / VECTOR_SIZE;
+ localparam TILE_COUNT = ARCH.AUX_DATA_PACK_PARAMS.GROUP_SIZE *
+ ARCH.AUX_DATA_PACK_PARAMS.GROUP_NUM;
+
+
+ // input valid counters
+ logic [$clog2( VECTOR_RATIO + 1 )-1:0] count_in_vector;
+ logic [$clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_TILE_WIDTH + 1 )-1:0] count_in_width ;
+ logic [$clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_TILE_HEIGHT + 1 )-1:0] count_in_height;
+ logic [$clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_TILE_CHANNELS + NATIVE_VECTOR_SIZE)-1:0] count_in_channels ;
+
+ // output valid counters
+ logic [$clog2( VECTOR_RATIO + 1 )-1:0] count_out_vector;
+ logic [$clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_TILE_WIDTH + 1 )-1:0] count_out_width ;
+ logic [$clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_TILE_HEIGHT + 1 )-1:0] count_out_height;
+ logic [$clog2(ARCH.AUX_SPECIAL_PARAMS.MAX_TILE_CHANNELS + NATIVE_VECTOR_SIZE)-1:0] count_out_channels ;
+
+ // Register the computation of the effective filter sizes to be used lane_to_control_t
+ logic [WINDOW_BITS_VERTICAL-1:0] kernel_vert_minus_one;
+ logic [WINDOW_BITS_HORIZONTAL-1:0] kernel_horiz_minus_one;
+
+ logic [DILATION_BITS_VERTICAL-1:0] dilation_vert_minus_one;
+ logic [DILATION_BITS_HORIZONTAL-1:0] dilation_horiz_minus_one;
+
+ logic [WINDOW_BITS_VERTICAL+DILATION_BITS_VERTICAL-1:0] kernel_x_dilation_vert;
+ logic [WINDOW_BITS_HORIZONTAL+DILATION_BITS_HORIZONTAL-1:0] kernel_x_dilation_horiz;
+
+ logic [WINDOW_BITS_VERTICAL+DILATION_BITS_VERTICAL:0] eff_kernel_vert;
+ logic [WINDOW_BITS_HORIZONTAL+DILATION_BITS_HORIZONTAL:0] eff_kernel_horiz;
+
+ always_ff @(posedge clk) begin
+ if (~i_resetn) begin
+ kernel_vert_minus_one <= '{default:'0};
+ kernel_horiz_minus_one <= '{default:'0};
+ dilation_vert_minus_one <= '{default:'0};
+ dilation_horiz_minus_one <= '{default:'0};
+ kernel_x_dilation_vert <= '{default:'0};
+ kernel_x_dilation_horiz <= '{default:'0};
+ eff_kernel_vert <= '{default:'0};
+ eff_kernel_horiz <= '{default:'0};
+ end else begin
+ kernel_vert_minus_one <= (i_config_to_control.data[0][0].window_height - 1);
+ kernel_horiz_minus_one <= (i_config_to_control.data[0][0].window_width - 1);
+ dilation_vert_minus_one <= (i_config_to_control.data[0][0].dilation_vertical - 1);
+ dilation_horiz_minus_one <= (i_config_to_control.data[0][0].dilation_horizontal - 1);
+ kernel_x_dilation_vert <= kernel_vert_minus_one * dilation_vert_minus_one;
+ kernel_x_dilation_horiz <= kernel_horiz_minus_one * dilation_horiz_minus_one;
+ eff_kernel_vert <= i_config_to_control.data[0][0].window_height + kernel_x_dilation_vert;
+ eff_kernel_horiz <= i_config_to_control.data[0][0].window_width + kernel_x_dilation_horiz;
+ end
+ end
+ //
+ // Input valid counter comprises cascaded counters of vector, width, height and channels.
+ //
+ // The input backpressure signal is also generated in this process.
+ //
+ logic input_group_done;
+ logic feature_ready;
+ logic feature_almost_ready;
+ logic configured_delayed;
+ assign o_control_to_lane.data[0][0].ready = feature_ready;
+ always_ff @(posedge clk) begin : proc_input_counters
+ // Nested counters for channels, line and column, which operate only when the core's input is valid
+ configured_delayed <= i_config_to_control.data[0][0].configured;
+ input_group_done <= 0;
+ feature_almost_ready <= 0;
+ if (i_config_to_control.data[0][0].configured & ~configured_delayed)
+ o_control_to_lane.data[0][0].configured_starting <= ~o_control_to_lane.data[0][0].configured_starting;
+ if (o_control_to_config.done)
+ o_control_to_lane.data[0][0].configured_ending <= ~o_control_to_lane.data[0][0].configured_ending;
+ if (i_lane_to_control.core_input_valid) begin
+ // shallow channels counter
+ count_in_vector <= count_in_vector + 1'b1;
+ if (count_in_vector >= VECTOR_RATIO-1) begin
+ count_in_vector <= '0;
+ // column counter
+ count_in_width <= count_in_width + 1'b1;
+ // We want to stop reading features if filters are not ready and if we are close to getting enough features to produce output
+ // enough features euql to a number of rows = window_height and columns equal window_width
+ if ((count_in_height >= kernel_vert_minus_one) && (count_in_width >= i_config_to_control.data[0][0].window_width-2)) begin
+ feature_almost_ready <= 1;
+ end
+ if (count_in_width >= i_config_to_control.data[0][0].tile_width-1) begin
+ count_in_width <= '0;
+ // line counter
+ count_in_height <= count_in_height + 1'b1;
+ if (count_in_height >= i_config_to_control.data[0][0].tile_height-1) begin
+ count_in_height <= '0;
+ // channels counter
+ input_group_done <= 1;
+ count_in_channels <= $bits(count_in_channels)'(count_in_channels + NATIVE_VECTOR_SIZE);
+ if (count_in_channels >= i_config_to_control.data[0][0].tile_channels - NATIVE_VECTOR_SIZE) begin
+ count_in_channels <= '0;
+ input_group_done <= 1;
+ // input tensor is finished, backpressure the input pipeline
+ //o_control_to_lane.data[0][0].ready <= 1'b0;
+ end
+ end
+ end
+ end
+ end
+ //
+ begin
+ logic configured_reg;
+ logic filter_ready_reg;
+ // register the current value of the 'configured' signal
+ configured_reg <= i_config_to_control.data[0][0].configured;
+ filter_ready_reg <= i_lane_to_control.depthwise_filter_ready;
+ // wait for a rising edge of the 'configured' signal to disable input pipeline backpressure
+ // CHECKME: why the first and?
+ end
+ // reset counters if in reset or not configured
+ if (~i_resetn || ~i_config_to_control.data[0][0].configured) begin
+ count_in_vector <= '0;
+ count_in_width <= '0;
+ count_in_height <= '0;
+ count_in_channels <= '0;
+ input_group_done <= 1'b0;
+ feature_almost_ready <= 0;
+ configured_delayed <= '0;
+ end
+ if (~i_resetn) begin
+ o_control_to_lane.data[0][0].configured_starting <= 0;
+ o_control_to_lane.data[0][0].configured_ending <= 0;
+ end
+ end : proc_input_counters
+ //
+ // state machine to handle when features and filters should be ready to be received
+ // right now, we receive filters first then features, then we process
+ //
+ typedef enum logic [2:0] {
+ IDLE = 3'b000,
+ FILTER_FEATURE = 3'b001,
+ FILTER = 3'b010,
+ FEATURE = 3'b011,
+ PROCESSING = 3'b100
+ } state_t;
+ state_t state, state_next;
+ always_ff @(posedge clk) begin
+ if (~i_resetn) begin
+ state <= IDLE;
+ end else begin
+ state <= state_next;
+ end
+ end
+ always_comb begin
+ state_next = state;
+ feature_ready = 0;
+ case(state)
+ IDLE: begin
+ feature_ready = 0;
+ if (i_config_to_control.data[0][0].configured) begin
+ state_next = FILTER_FEATURE;
+ feature_ready = 1;
+ end
+ end
+ FILTER_FEATURE: begin
+ feature_ready = 1;
+ if (feature_almost_ready) begin
+ feature_ready = 0;
+ state_next = FILTER;
+ end
+ if (i_lane_to_control.depthwise_filter_ready)
+ state_next = FEATURE;
+ end
+ FILTER: begin
+ if (i_lane_to_control.depthwise_filter_ready)
+ state_next = FEATURE;
+ end
+ FEATURE: begin
+ feature_ready = 1;
+ if (input_group_done) begin
+ state_next = PROCESSING;
+ feature_ready = 0;
+ end
+ else if (o_control_to_lane.data[0][0].done & i_config_to_control.data[0][0].configured) begin
+ state_next = FILTER;
+ feature_ready = 0;
+ end else if (o_control_to_lane.data[0][0].done & ~i_config_to_control.data[0][0].configured) begin
+ state_next = IDLE;
+ feature_ready = 0;
+ end
+ end
+ PROCESSING: begin
+ if (o_control_to_config.done) begin
+ state_next = IDLE;
+ feature_ready = 0;
+ end
+ else if (o_control_to_lane.data[0][0].done & i_config_to_control.data[0][0].configured)
+ state_next = FILTER_FEATURE;
+ else if (o_control_to_lane.data[0][0].done & ~i_config_to_control.data[0][0].configured)
+ state_next = IDLE;
+ end
+ default: state_next = IDLE; // Default state
+ endcase
+ end
+ // Pass dilation from config to lane
+ assign o_control_to_lane.data[0][0].dilation_vertical = i_config_to_control.data[0][0].dilation_vertical;
+ assign o_control_to_lane.data[0][0].dilation_horizontal = i_config_to_control.data[0][0].dilation_horizontal;
+ //
+ // Line-buffers inside the core are implemented as FIFOs. FIFO synchronization and handover
+ // between consequent tensors are achieved by the following steps:
+ // * Line buffers are filled with tensor-width amount of data at the beginning of each tensor.
+ // * The fill level is kept constant throughout the tensor.
+ // * At the end of each tensor all FIFOs are drained to prepare them for the next tensor.
+ //
+ always_ff @(posedge clk) begin : proc_line_buff_control
+ o_control_to_lane.data[0][0].line_buff_wait_fill <= count_in_height == 0;
+ o_control_to_lane.data[0][0].line_buff_flush <= count_in_height == i_config_to_control.data[0][0].tile_height-1;
+ // Flush the FIFO fill level when window height is configured to be 1
+ if (i_config_to_control.data[0][0].window_height == 1 && i_config_to_control.data[0][0].configured) begin
+ o_control_to_lane.data[0][0].line_buff_wait_fill <= 1'b0;
+ o_control_to_lane.data[0][0].line_buff_flush <= 1'b1;
+ end
+ if (~i_resetn) begin
+ o_control_to_lane.data[0][0].line_buff_wait_fill <= 1'b0;
+ o_control_to_lane.data[0][0].line_buff_flush <= 1'b0;
+ end
+ end : proc_line_buff_control
+
+ //
+ // Padding generator control consists of multiple enable flags. Each flag enables a set/reset
+ // mode of a register or act like select bits of a multiplexer.
+ //
+ // If max window size is larger than the configured window size, then the generator is used to
+ // load the identity element of the operation into the out of bound registers.
+ //
+ always_ff @(posedge clk) begin : proc_pad_control
+ o_control_to_lane.data[0][0].window_height <= i_config_to_control.data[0][0].window_height;
+ o_control_to_lane.data[0][0].window_width <= i_config_to_control.data[0][0].window_width;
+ // Vertical padding control
+ for (int i = 0; i < TILE_COUNT; i++) begin : proc_pad_control_vert
+ for (int j = 0; j < VERTICAL_LINES; j++) begin
+ // For the height of the active window, determine if, when and which padding mode is enabled
+ // per-tile and per-line
+ // ((MAX_WINDOW_HEIGHT-1) * (MAX_DILATION_VERTICAL-1))
+ if (j < eff_kernel_vert) begin
+ o_control_to_lane.data[0][0].en_pad_zero_vert[i][j] <= (
+ count_in_height < j + i_config_to_control.data[0][0].tile_vertical_start[i] ||
+ count_in_height > j + i_config_to_control.data[0][0].tile_vertical_end [i]) &&
+ i_config_to_control.data[0][0].padding_mode == 2'b00;
+ //
+ // TODO: Implement constant and reflection boundary conditions
+ //
+ o_control_to_lane.data[0][0].en_pad_nan_vert[i][j] <= (
+ count_in_height < j + i_config_to_control.data[0][0].tile_vertical_start[i] ||
+ count_in_height > j + i_config_to_control.data[0][0].tile_vertical_end [i]) &&
+ i_config_to_control.data[0][0].padding_ignore;
+ end else begin
+ // For the lines outside the active window, pad everything to NaN, which is defined to be
+ // the identity element
+ o_control_to_lane.data[0][0].en_pad_nan_vert[i][j] <= 1'b1;
+ end
+ end
+ end : proc_pad_control_vert
+ // Horizontal padding control
+ for (int i = 0; i < TILE_COUNT; i++) begin : proc_pad_control_horiz
+ for (int j = 0; j < MAX_WINDOW_WIDTH; j++) begin
+ // For the width of the active window, determine if, when and which padding mode is enabled
+ // per-tile and per-line
+ if (j < i_config_to_control.data[0][0].window_width) begin
+ o_control_to_lane.data[0][0].en_pad_zero_horiz[i][j] <= (
+ count_in_width < j + i_config_to_control.data[0][0].tile_horizontal_start[i] ||
+ count_in_width > j + i_config_to_control.data[0][0].tile_horizontal_end [i]) &&
+ i_config_to_control.data[0][0].padding_mode == 2'b00;
+ //
+ // TODO: Implement constant and reflection boundary conditions
+ //
+ o_control_to_lane.data[0][0].en_pad_nan_horiz[i][j] <= (
+ count_in_width < j + i_config_to_control.data[0][0].tile_horizontal_start[i] ||
+ count_in_width > j + i_config_to_control.data[0][0].tile_horizontal_end [i]) &&
+ i_config_to_control.data[0][0].padding_ignore;
+ end else begin
+ // For the columns outside the active window pad everything to NaN, which is defined to be the
+ // identity element
+ o_control_to_lane.data[0][0].en_pad_nan_horiz[i][j] <= 1'b1;
+ end
+ end
+ end : proc_pad_control_horiz
+ // These flags mark the area of padding
+ o_control_to_lane.data[0][0].is_padding_zone_vert <= i_config_to_control.data[0][0].window_height > 1 &&
+ count_in_height < (eff_kernel_vert-1);
+ o_control_to_lane.data[0][0].is_padding_zone_horiz <= i_config_to_control.data[0][0].window_width > 1 &&
+ count_in_width < (eff_kernel_horiz-1);
+ if (~i_resetn) begin
+ o_control_to_lane.data[0][0].is_padding_zone_vert <= 1'b1;
+ o_control_to_lane.data[0][0].is_padding_zone_horiz <= 1'b1;
+ o_control_to_lane.data[0][0].window_height <= MAX_WINDOW_HEIGHT;
+ o_control_to_lane.data[0][0].window_width <= MAX_WINDOW_WIDTH;
+ end
+ end : proc_pad_control
+
+ //
+ // Stride counters and stride valid signal generator.
+ //
+ // Other input counters are used in conjunction
+ //
+ always_ff @(posedge clk) begin : proc_stride
+ // stride counters
+ logic [$clog2(MAX_STRIDE_VERTICAL +1):0] count_stride_vert ;
+ logic [$clog2(MAX_STRIDE_HORIZONTAL+1):0] count_stride_horiz;
+ // Count only when input is valid and shallow channels counter is about to overflow (which means
+ // we are moving on to the next face coordinates)
+ if (i_lane_to_control.core_input_valid) begin
+ if (count_in_vector >= VECTOR_RATIO-1) begin
+ // By default, increment the horizontal stride counter, as long as the width-counter has
+ // counted minimum window-with number of elements (so core has a full window to operate on).
+ if (count_in_width >= kernel_horiz_minus_one) begin
+ count_stride_horiz <= count_stride_horiz + 1'b1;
+ end
+ // Reset horizontal stride counter when it overflows
+ if (count_stride_horiz >= i_config_to_control.data[0][0].stride_horizontal - 1) begin
+ count_stride_horiz <= '0;
+ end
+ // Vertical stride counter is manipulated only when the input width-counter is about to
+ // overflow
+ if (count_in_width >= i_config_to_control.data[0][0].tile_width-1) begin
+ // Reset the horizontal counter
+ count_stride_horiz <= '0;
+ // By default, increment the vertical stride counter, as long as the height-counter has
+ // counted minimum window-height number of elements (so core has a full window to operate
+ // on).
+ if (count_in_height >= kernel_vert_minus_one) begin
+ count_stride_vert <= count_stride_vert + 1'b1;
+ end
+ // Reset vertical stride counter when it overflows or when input height-counter is about
+ // to overflow
+ if (
+ count_stride_vert >= i_config_to_control.data[0][0].stride_vertical - 1 ||
+ count_in_height >= i_config_to_control.data[0][0].tile_height - 1
+ ) begin
+ count_stride_vert <= '0;
+ end
+ end
+ end
+ end
+ // Stride counters must be reset when window height is configured to 1
+ if (
+ i_config_to_control.data[0][0].window_height == 1 && i_config_to_control.data[0][0].configured &&
+ count_stride_horiz == '1 && count_stride_vert == '1
+ ) begin
+ count_stride_vert <= '0;
+ count_stride_horiz <= '0;
+ end
+ // During reset both counters are set to give one extra cycle to the counters
+ if (~i_resetn || ~i_config_to_control.data[0][0].configured) begin
+ count_stride_horiz <= '1;
+ count_stride_vert <= '1;
+ end
+ //
+ // Stride is valid when both horizontal and vertical counters are zero
+ //
+ o_control_to_lane.data[0][0].stride_valid <= count_stride_vert == '0 && count_stride_horiz == '0;
+ end : proc_stride
+
+ //
+ // Output valid counter comprises cascaded counters of vector, width, height and channels.
+ //
+ // A 'done' pulse is sent to the config decoder when the last tensor element is processed.
+ //
+ always_ff @(posedge clk) begin : proc_output_counters
+ // clear the done signal by default
+ o_control_to_config.done <= 1'b0;
+ o_control_to_lane.data[0][0].done <= 1'b0;
+ // Nested counters for channels, line and column, which operate only when core has a valid result.
+ if (i_lane_to_control.core_output_valid) begin
+ // shallow channels counter
+ count_out_vector <= count_out_vector + 1'b1;
+ if (count_out_vector >= VECTOR_RATIO-1) begin
+ count_out_vector <= '0;
+ // column counter
+ count_out_width <= $bits(count_out_width)'(count_out_width + i_config_to_control.data[0][0].stride_horizontal);
+ if (count_out_width >= (i_config_to_control.data[0][0].tile_width -
+ i_config_to_control.data[0][0].window_width) -
+ kernel_x_dilation_horiz) begin
+ count_out_width <= '0;
+ // line counter
+ count_out_height <= $bits(count_out_height)'(count_out_height + i_config_to_control.data[0][0].stride_vertical);
+ //-i_config_to_control.data[0][0].stride_vertical + 1
+ if (count_out_height >= i_config_to_control.data[0][0].tile_height -
+ i_config_to_control.data[0][0].window_height -
+ kernel_x_dilation_vert) begin
+ count_out_height <= '0;
+ // send a 1 clock cycle long 'done' every time channels counter increment
+ // to indicate a new set of filters
+ o_control_to_lane.data[0][0].done <= 1'b1;
+ // channels counter
+ count_out_channels <= $bits(count_out_channels)'(count_out_channels + NATIVE_VECTOR_SIZE);
+ if (count_out_channels >= i_config_to_control.data[0][0].tile_channels - NATIVE_VECTOR_SIZE) begin
+ count_out_channels <= '0;
+ // send a 1 clock cycle long 'done' pulse after all counters reset to 0
+ o_control_to_config.done <= 1'b1;
+ end
+ end
+ end
+ end
+ end
+ // Reset counters if the module is in reset or not configured
+ if (~i_resetn || ~i_config_to_control.data[0][0].configured) begin
+ count_out_vector <= '0;
+ count_out_width <= '0;
+ count_out_height <= '0;
+ count_out_channels <= '0;
+ end
+ end : proc_output_counters
+//
+// ------------------------------ END EDITING ------------------------------
+//
+
+endmodule