summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv
diff options
context:
space:
mode:
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv')
-rw-r--r--python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv205
1 files changed, 205 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv
new file mode 100644
index 0000000..3721522
--- /dev/null
+++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv
@@ -0,0 +1,205 @@
+// Copyright 2020-2024 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you ("License"). Unless the License provides otherwise,
+// you may not use, modify, copy, publish, distribute, disclose or transmit
+// this software or the related documents without Intel's prior written
+// permission.
+//
+// This software and the related documents are provided as is, with no express
+// or implied warranties, other than those that are expressly stated in the
+// License.
+
+/**
+ * dla_lt_output_logic.sv
+ *
+ * Keeps track of finished CVEC lines, and writes the finished lines to the output.
+ * Also keeps track of the dimensions of the output, and outputs padding lines when
+ * required.
+ *
+ */
+
+`resetall
+`undefineall
+`default_nettype none
+
+module dla_lt_output_logic import dla_lt_pkg::*; #(
+ parameter int NUM_BUFFER_POOLS,
+ parameter int CVEC_PER_BUFFER,
+ parameter int ELEMENTS_PER_CYCLE,
+ parameter int N_POOL_BITS,
+ parameter int MAX_OUTPUT_W,
+ parameter int MAX_OUTPUT_H,
+ parameter int MAX_OUTPUT_D,
+ parameter int MAX_OUTPUT_C,
+ parameter int CVEC,
+ parameter int CNT_BITS,
+ parameter int MAX_DIM_BITS,
+ localparam int MAX_OUTPUT_VOLUME = MAX_OUTPUT_W * MAX_OUTPUT_H * MAX_OUTPUT_D * (MAX_OUTPUT_C/CVEC)
+) (
+ input wire clk,
+ input wire i_rstn,
+ input wire [MAX_OUTPUT_C-1:0][16-1:0] i_output_line_data [NUM_BUFFER_POOLS-1:0],
+ input wire [($clog2(CVEC_PER_BUFFER))-1:0] i_curr_out_line [NUM_BUFFER_POOLS-1:0],
+ input wire [ELEMENTS_PER_CYCLE-1:0] i_completed_vol_tally,
+ layout_transform_config_if if_lt_config,
+ input wire i_ready,
+
+ output logic [($clog2(CVEC_PER_BUFFER))-1:0] o_line_num [NUM_BUFFER_POOLS-1:0],
+ output logic [NUM_BUFFER_POOLS-1:0] o_read_req,
+ output logic [CVEC-1:0][16-1:0] o_data,
+ output logic o_valid,
+ output logic o_last,
+ input wire i_stall,
+ output logic [$clog2((MAX_OUTPUT_VOLUME))-1:0] o_lines_written,
+ output int o_finished_lines
+);
+
+ logic [MAX_DIM_BITS-1:0] top_full_padding, left_full_padding;
+ assign top_full_padding = if_lt_config.data.top_full_padding;
+ assign left_full_padding = if_lt_config.data.left_full_padding;
+
+ logic [$clog2((MAX_OUTPUT_VOLUME))-1:0] lines_written;
+ logic [16-1:0] finished_lines_comb, finished_lines_reg;
+ assign o_finished_lines = finished_lines_reg;
+ assign o_lines_written = lines_written;
+
+ // Add up all the lines in the current cycle that are complete CVEC lines, represented in the tally vector.
+ always_comb
+ begin
+ finished_lines_comb = 0;
+ for (int i =0 ; i < ELEMENTS_PER_CYCLE; i++)
+ begin
+ finished_lines_comb = finished_lines_comb + i_completed_vol_tally[i];
+ end
+ end
+
+ logic [N_POOL_BITS-1:0] pool_out_comb;
+ logic [N_POOL_BITS-1:0] pool_out;
+ logic [$clog2(CVEC_PER_BUFFER):0] line_out_comb;
+ logic [$clog2((NUM_BUFFER_POOLS) * (CVEC_PER_BUFFER)):0] output_cursor;
+
+ logic inside_padding;
+ logic write_line;
+ logic extra_cvec;
+
+ shortint unsigned output_w_cnt;
+ shortint unsigned output_h_cnt;
+ shortint unsigned total_output;
+ shortint unsigned cvec_rollover;
+ shortint unsigned cvec_count;
+ logic [MAX_DIM_BITS-1:0] left_pad, top_pad;
+
+ assign pool_out_comb = output_cursor[N_POOL_BITS-1:0];
+ assign line_out_comb = ((output_cursor >> N_POOL_BITS) & (CVEC_PER_BUFFER-1));
+
+
+ logic overflow;
+ logic front_pad;
+ assign overflow = (output_w_cnt >= if_lt_config.data.w_overflow
+ || output_h_cnt >= if_lt_config.data.h_overflow) && total_output < (if_lt_config.data.output_volume);
+ assign front_pad = (~top_full_padding[MAX_DIM_BITS-1] && top_pad <= top_full_padding) || (~left_full_padding[MAX_DIM_BITS-1] && left_pad <= left_full_padding);
+
+
+ assign inside_padding = i_stall == 0 && i_ready && (overflow || front_pad);
+
+ assign extra_cvec = (total_output >= if_lt_config.data.output_face_area) && total_output < if_lt_config.data.output_volume;
+
+ assign write_line = (i_stall == 0 && i_ready && (( lines_written < finished_lines_reg) || (extra_cvec)));
+
+
+ always_ff @( posedge clk)
+ begin
+ o_valid <= 1'b0;
+ finished_lines_reg <= finished_lines_reg + finished_lines_comb;
+ o_read_req <= '{default: '0};
+ if (!i_rstn)
+ begin
+ finished_lines_reg <= '0;
+ lines_written <= '0;
+ output_cursor <= '0;
+ o_data <= '0;
+ o_line_num <= '{default: '0};
+ pool_out <= '0;
+
+ output_h_cnt <= 0;
+ output_w_cnt <= 0;
+
+ total_output <= 0;
+ cvec_rollover <= 0;
+ o_last <= 0;
+ cvec_count <= '0;
+ left_pad <= '0;
+ top_pad <= '0;
+ end
+ else if (if_lt_config.valid)
+ begin
+ o_line_num[pool_out_comb] <= line_out_comb;
+ pool_out <= pool_out_comb;
+ o_data <= i_output_line_data[pool_out_comb][CVEC*(cvec_count)+:CVEC];
+ o_last <= lines_written == (if_lt_config.data.output_volume - 1) || total_output == (if_lt_config.data.output_volume - 1);
+ output_cursor <= output_cursor;
+ lines_written <= lines_written;
+ cvec_rollover <= cvec_rollover;
+ total_output <= total_output;
+
+ // Could do the following with a state machine..?
+ o_valid <= '0;
+ o_read_req[pool_out_comb] <= 1'b1;
+ if (inside_padding) begin
+ o_data <= '0;
+ o_valid <= '1;
+ end
+ else if (write_line) begin
+ o_read_req[pool_out_comb] <= 1'b1;
+ end
+ if (i_stall) begin
+ o_valid <= o_valid;
+ o_data <= o_data;
+ end else begin
+ if ((write_line == 1 || extra_cvec) && inside_padding == 0 ) begin
+ o_read_req[pool_out_comb] <= 1'b1;
+
+ o_valid <= i_curr_out_line[pool_out_comb] == line_out_comb; // This makes it so that output stalls when all RAM ports are busy...
+ if (i_curr_out_line[pool_out_comb] == line_out_comb) begin
+ output_cursor <= ((output_cursor + 1) & ((NUM_BUFFER_POOLS) * (CVEC_PER_BUFFER) - 1));
+
+ lines_written <= lines_written + 1;
+ end
+ end
+
+ if (inside_padding || write_line == 1 && i_curr_out_line[pool_out_comb] == line_out_comb) begin
+ total_output <= total_output + 1;
+ cvec_rollover <= cvec_rollover + 1;
+
+ // Keep track of dimensionality of output:
+ output_w_cnt <= output_w_cnt + 1;
+ left_pad <= left_pad+1;
+
+ if (output_w_cnt >= if_lt_config.data.output_w-1)
+ begin
+ output_w_cnt <= 0;
+ left_pad <= 0;
+ top_pad <= top_pad + 1;
+ output_h_cnt <= output_h_cnt + 1;
+ if (output_h_cnt >= if_lt_config.data.output_h-1)
+ begin
+ output_h_cnt <= 0;
+ end
+ end
+ if ((cvec_rollover) == if_lt_config.data.output_face_area - 1) // start going to overflow...
+ begin
+ output_cursor <= 0;
+ cvec_rollover <= 0;
+ cvec_count <= cvec_count + 1;
+ left_pad<= 0;
+ top_pad <= 0;
+ output_w_cnt <= 0;
+ output_h_cnt <= 0;
+ end
+ end
+ end
+ end
+ end
+endmodule