diff options
| author | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
|---|---|---|
| committer | Eric Dao <eric@erickhangdao.com> | 2025-03-10 17:54:31 -0400 |
| commit | ab224e2e6ba65f5a369ec392f99cd8845ad06c98 (patch) | |
| tree | a1e757e9341863ed52b8ad4c5a1c45933aab9da4 /python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv | |
| parent | 40da1752f2c8639186b72f6838aa415e854d0b1d (diff) | |
| download | thesis-master.tar.gz thesis-master.tar.bz2 thesis-master.zip | |
Diffstat (limited to 'python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv')
| -rw-r--r-- | python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv new file mode 100644 index 0000000..3721522 --- /dev/null +++ b/python/openvino/demo/ip/intel_ai_ip/verilog/dla_lt_output_logic.sv @@ -0,0 +1,205 @@ +// Copyright 2020-2024 Intel Corporation. +// +// This software and the related documents are Intel copyrighted materials, +// and your use of them is governed by the express license under which they +// were provided to you ("License"). Unless the License provides otherwise, +// you may not use, modify, copy, publish, distribute, disclose or transmit +// this software or the related documents without Intel's prior written +// permission. +// +// This software and the related documents are provided as is, with no express +// or implied warranties, other than those that are expressly stated in the +// License. + +/** + * dla_lt_output_logic.sv + * + * Keeps track of finished CVEC lines, and writes the finished lines to the output. + * Also keeps track of the dimensions of the output, and outputs padding lines when + * required. + * + */ + +`resetall +`undefineall +`default_nettype none + +module dla_lt_output_logic import dla_lt_pkg::*; #( + parameter int NUM_BUFFER_POOLS, + parameter int CVEC_PER_BUFFER, + parameter int ELEMENTS_PER_CYCLE, + parameter int N_POOL_BITS, + parameter int MAX_OUTPUT_W, + parameter int MAX_OUTPUT_H, + parameter int MAX_OUTPUT_D, + parameter int MAX_OUTPUT_C, + parameter int CVEC, + parameter int CNT_BITS, + parameter int MAX_DIM_BITS, + localparam int MAX_OUTPUT_VOLUME = MAX_OUTPUT_W * MAX_OUTPUT_H * MAX_OUTPUT_D * (MAX_OUTPUT_C/CVEC) +) ( + input wire clk, + input wire i_rstn, + input wire [MAX_OUTPUT_C-1:0][16-1:0] i_output_line_data [NUM_BUFFER_POOLS-1:0], + input wire [($clog2(CVEC_PER_BUFFER))-1:0] i_curr_out_line [NUM_BUFFER_POOLS-1:0], + input wire [ELEMENTS_PER_CYCLE-1:0] i_completed_vol_tally, + layout_transform_config_if if_lt_config, + input wire i_ready, + + output logic [($clog2(CVEC_PER_BUFFER))-1:0] o_line_num [NUM_BUFFER_POOLS-1:0], + output logic [NUM_BUFFER_POOLS-1:0] o_read_req, + output logic [CVEC-1:0][16-1:0] o_data, + output logic o_valid, + output logic o_last, + input wire i_stall, + output logic [$clog2((MAX_OUTPUT_VOLUME))-1:0] o_lines_written, + output int o_finished_lines +); + + logic [MAX_DIM_BITS-1:0] top_full_padding, left_full_padding; + assign top_full_padding = if_lt_config.data.top_full_padding; + assign left_full_padding = if_lt_config.data.left_full_padding; + + logic [$clog2((MAX_OUTPUT_VOLUME))-1:0] lines_written; + logic [16-1:0] finished_lines_comb, finished_lines_reg; + assign o_finished_lines = finished_lines_reg; + assign o_lines_written = lines_written; + + // Add up all the lines in the current cycle that are complete CVEC lines, represented in the tally vector. + always_comb + begin + finished_lines_comb = 0; + for (int i =0 ; i < ELEMENTS_PER_CYCLE; i++) + begin + finished_lines_comb = finished_lines_comb + i_completed_vol_tally[i]; + end + end + + logic [N_POOL_BITS-1:0] pool_out_comb; + logic [N_POOL_BITS-1:0] pool_out; + logic [$clog2(CVEC_PER_BUFFER):0] line_out_comb; + logic [$clog2((NUM_BUFFER_POOLS) * (CVEC_PER_BUFFER)):0] output_cursor; + + logic inside_padding; + logic write_line; + logic extra_cvec; + + shortint unsigned output_w_cnt; + shortint unsigned output_h_cnt; + shortint unsigned total_output; + shortint unsigned cvec_rollover; + shortint unsigned cvec_count; + logic [MAX_DIM_BITS-1:0] left_pad, top_pad; + + assign pool_out_comb = output_cursor[N_POOL_BITS-1:0]; + assign line_out_comb = ((output_cursor >> N_POOL_BITS) & (CVEC_PER_BUFFER-1)); + + + logic overflow; + logic front_pad; + assign overflow = (output_w_cnt >= if_lt_config.data.w_overflow + || output_h_cnt >= if_lt_config.data.h_overflow) && total_output < (if_lt_config.data.output_volume); + assign front_pad = (~top_full_padding[MAX_DIM_BITS-1] && top_pad <= top_full_padding) || (~left_full_padding[MAX_DIM_BITS-1] && left_pad <= left_full_padding); + + + assign inside_padding = i_stall == 0 && i_ready && (overflow || front_pad); + + assign extra_cvec = (total_output >= if_lt_config.data.output_face_area) && total_output < if_lt_config.data.output_volume; + + assign write_line = (i_stall == 0 && i_ready && (( lines_written < finished_lines_reg) || (extra_cvec))); + + + always_ff @( posedge clk) + begin + o_valid <= 1'b0; + finished_lines_reg <= finished_lines_reg + finished_lines_comb; + o_read_req <= '{default: '0}; + if (!i_rstn) + begin + finished_lines_reg <= '0; + lines_written <= '0; + output_cursor <= '0; + o_data <= '0; + o_line_num <= '{default: '0}; + pool_out <= '0; + + output_h_cnt <= 0; + output_w_cnt <= 0; + + total_output <= 0; + cvec_rollover <= 0; + o_last <= 0; + cvec_count <= '0; + left_pad <= '0; + top_pad <= '0; + end + else if (if_lt_config.valid) + begin + o_line_num[pool_out_comb] <= line_out_comb; + pool_out <= pool_out_comb; + o_data <= i_output_line_data[pool_out_comb][CVEC*(cvec_count)+:CVEC]; + o_last <= lines_written == (if_lt_config.data.output_volume - 1) || total_output == (if_lt_config.data.output_volume - 1); + output_cursor <= output_cursor; + lines_written <= lines_written; + cvec_rollover <= cvec_rollover; + total_output <= total_output; + + // Could do the following with a state machine..? + o_valid <= '0; + o_read_req[pool_out_comb] <= 1'b1; + if (inside_padding) begin + o_data <= '0; + o_valid <= '1; + end + else if (write_line) begin + o_read_req[pool_out_comb] <= 1'b1; + end + if (i_stall) begin + o_valid <= o_valid; + o_data <= o_data; + end else begin + if ((write_line == 1 || extra_cvec) && inside_padding == 0 ) begin + o_read_req[pool_out_comb] <= 1'b1; + + o_valid <= i_curr_out_line[pool_out_comb] == line_out_comb; // This makes it so that output stalls when all RAM ports are busy... + if (i_curr_out_line[pool_out_comb] == line_out_comb) begin + output_cursor <= ((output_cursor + 1) & ((NUM_BUFFER_POOLS) * (CVEC_PER_BUFFER) - 1)); + + lines_written <= lines_written + 1; + end + end + + if (inside_padding || write_line == 1 && i_curr_out_line[pool_out_comb] == line_out_comb) begin + total_output <= total_output + 1; + cvec_rollover <= cvec_rollover + 1; + + // Keep track of dimensionality of output: + output_w_cnt <= output_w_cnt + 1; + left_pad <= left_pad+1; + + if (output_w_cnt >= if_lt_config.data.output_w-1) + begin + output_w_cnt <= 0; + left_pad <= 0; + top_pad <= top_pad + 1; + output_h_cnt <= output_h_cnt + 1; + if (output_h_cnt >= if_lt_config.data.output_h-1) + begin + output_h_cnt <= 0; + end + end + if ((cvec_rollover) == if_lt_config.data.output_face_area - 1) // start going to overflow... + begin + output_cursor <= 0; + cvec_rollover <= 0; + cvec_count <= cvec_count + 1; + left_pad<= 0; + top_pad <= 0; + output_w_cnt <= 0; + output_h_cnt <= 0; + end + end + end + end + end +endmodule |
