summaryrefslogtreecommitdiff
path: root/python/openvino/demo/ip/intel_ai_ip/verilog/dla_aux_depthwise_pkg.sv
blob: 0f07a5a822ec5ebc22c5560cb736780704907ff2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
// Copyright 2020-2023 Intel Corporation.
//
// This software and the related documents are Intel copyrighted materials,
// and your use of them is governed by the express license under which they
// were provided to you ("License"). Unless the License provides otherwise,
// you may not use, modify, copy, publish, distribute, disclose or transmit
// this software or the related documents without Intel's prior written
// permission.
//
// This software and the related documents are provided as is, with no express
// or implied warranties, other than those that are expressly stated in the
// License.


/*
 * Module `dla_aux_depthwise_pkg`
 *
 * Package template for specific auxiliary blocks.
 *
 * WARNING!  ONLY EDIT THE PARTS MARKED IN BETWEEN
 *           "START EDITING" AND "END EDITING"
 *
 * The names of the structures and parameterized classes are fixed types and
 * should not be modified for future-compatibility. The contents of structures
 * marked for editing are examples that can be discarded and populated with
 * auxiliary-specific content.
 *
 * See README.md of the Example Aux block for more details.
 */

`undefineall
`resetall
`default_nettype none

package dla_aux_depthwise_pkg;
  import dla_common_pkg::*;

  `include "dla_aux_generic_pkg.svh" // Header for definitions common among all auxiliaries

// ------------------------------ START EDITING ------------------------------
  // Other imports and includes can be added
// ------------------------------  END EDITING  ------------------------------

  //
  // User Aux port and parameter structures
  //

  // Auxiliary parameters specific for each aux
  typedef struct {
// ------------------------------ START EDITING ------------------------------
    int DEPTHWISE_TYPE;
    int MAX_WINDOW_HEIGHT;
    int MAX_WINDOW_WIDTH;
    int MAX_STRIDE_VERTICAL;
    int MAX_STRIDE_HORIZONTAL;
    int MAX_TILE_HEIGHT;
    int MAX_TILE_WIDTH;
    int MAX_TILE_CHANNELS;
    int CONFIG_ID_WIDTH;
    int PIPELINE_REG_NUM;
    int MAX_DILATION_VERTICAL;
    int MAX_DILATION_HORIZONTAL;
// ------------------------------  END EDITING  ------------------------------
  } aux_special_params_t;

  // Port structure from control to config
  typedef struct {
    logic done; // One clock cycle wide pulse used to signal config that previous config is done
  } control_to_config_t;

  // grouped arch parameter type
  typedef struct  {
    aux_data_pack_params_t AUX_DATA_PACK_PARAMS;
    stream_params_t        CONFIG_STREAM_PARAMS;
    debug_axi_params_t     DEBUG_AXI_PARAMS;
    aux_generic_params_t   AUX_GENERIC_PARAMS;
    aux_special_params_t   AUX_SPECIAL_PARAMS;
  } aux_depthwise_arch_params_t;

  typedef struct {
    int DOT_OUTPUT_WIDTH;
    int DEVICE_DSP_MULT_LATENCY;
    int DSP_ADDER_LAENCY;
    int DSP_CHAIN_ADDER_LAENCY;
    int DSP_OUTPUT_LATENCY;
    int DSP_CHAIN_LEN;
    int STANDALONE_DSP_DOT_LATENCY;
    int CHAINED_DSP_DOT_LATENCY;
    int DSP_DOT_LATENCY;
    int EXTRA_LATENCY;
    int DOT_LATENCY;
    int ACCUM_LATENCY;
    int TOTAL_LATENCY;
  } vector_dot_arch_info_t;

  // Function used for calculating the skid buffer latency. This latency value is not necessarily
  // equal to the input-to-output delay of the core. For simple blocks those values might be equal.
  // For complex blocks with arbitrary clock-enable signals within the core the amount of skid is
  // calculated by considering the worst corner cases of the backpressure switching.
  function int aux_depthwise_calc_core_latency(
    aux_special_params_t special_params,
    aux_data_pack_params_t data_pack_params,
    vector_dot_arch_info_t DEPTHWISE_VECTOR_ARCH_INFO
  );
// ------------------------------ START EDITING ------------------------------
    localparam FIXED_RESPONSE_DELAY = 6;
    localparam OUTPUT_FIFO_RESPONSE_DELAY = 3;

    return FIXED_RESPONSE_DELAY + OUTPUT_FIFO_RESPONSE_DELAY +
      special_params.PIPELINE_REG_NUM + 1 +
      //$clog2(special_params.MAX_WINDOW_HEIGHT) + 1 +
      $clog2(special_params.MAX_WINDOW_WIDTH) + 1 +
      1 + DEPTHWISE_VECTOR_ARCH_INFO.DSP_DOT_LATENCY +  // vector engine chain delay
      data_pack_params.NATIVE_VECTOR_SIZE / data_pack_params.VECTOR_SIZE;
// ------------------------------  END EDITING  ------------------------------
  endfunction : aux_depthwise_calc_core_latency

  // Total number of config bits are calculated here. Its implementation is aux specific.
  function int aux_depthwise_calc_total_config_bits(aux_depthwise_arch_params_t params);
// ------------------------------ START EDITING ------------------------------
    automatic int num_lanes = aux_calc_num_lanes(params.AUX_DATA_PACK_PARAMS);

    return (
             params.AUX_SPECIAL_PARAMS.CONFIG_ID_WIDTH                           +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_WINDOW_HEIGHT        + 1)             +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_WINDOW_WIDTH         + 1)             +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_STRIDE_VERTICAL      + 1)             +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_STRIDE_HORIZONTAL    + 1)             +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_TILE_HEIGHT          + 1)             +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_TILE_WIDTH           + 1)             +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_TILE_CHANNELS        + 1)             +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_TILE_HEIGHT          + 1) * num_lanes +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_TILE_HEIGHT          + 1) * num_lanes +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_TILE_WIDTH           + 1) * num_lanes +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_TILE_WIDTH           + 1) * num_lanes +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_DILATION_VERTICAL     + 1)             +
      $clog2(params.AUX_SPECIAL_PARAMS.MAX_DILATION_HORIZONTAL  + 1)             +
             params.AUX_DATA_PACK_PARAMS.ELEMENT_BITS                            +
             2                                                                   +
             1
    );
  endfunction : aux_depthwise_calc_total_config_bits
// ------------------------------  END EDITING  ------------------------------

  // Other user defined structures, classes, functions, etc.
// ------------------------------ START EDITING ------------------------------
  // ...
  // ...
  // Definitions for the vector dot engine used to compute depthwise convolution
  // defined similar to fpga/pe_array/rtl/src/dla_pe_array_pkg.sv
  typedef struct {
    int FEATURE_WIDTH;
    int FILTER_WIDTH;
    int BIAS_WIDTH;
    int DOT_SIZE;
    device_family_t DEVICE;
  } vector_dot_arch_t;



  function automatic vector_dot_arch_info_t get_arch_info(
    dla_aux_depthwise_pkg::vector_dot_arch_t arch
  );
    `include "dla_vector_dot_arch_info.svh"
    automatic vector_dot_arch_info_t arch_info = '{
      DOT_OUTPUT_WIDTH: DOT_OUTPUT_WIDTH,
      DEVICE_DSP_MULT_LATENCY: DEVICE_DSP_MULT_LATENCY,
      DSP_ADDER_LAENCY: DSP_ADDER_LAENCY,
      DSP_CHAIN_ADDER_LAENCY: DSP_CHAIN_ADDER_LAENCY,
      DSP_OUTPUT_LATENCY: DSP_OUTPUT_LATENCY,
      DSP_CHAIN_LEN: DSP_CHAIN_LEN,
      STANDALONE_DSP_DOT_LATENCY: STANDALONE_DSP_DOT_LATENCY,
      CHAINED_DSP_DOT_LATENCY: CHAINED_DSP_DOT_LATENCY,
      DSP_DOT_LATENCY: DSP_DOT_LATENCY,
      EXTRA_LATENCY: EXTRA_LATENCY,
      DOT_LATENCY: DOT_LATENCY,
      ACCUM_LATENCY: ACCUM_LATENCY,
      TOTAL_LATENCY: TOTAL_LATENCY
    };
    return arch_info;
  endfunction
// ------------------------------  END EDITING  ------------------------------

  // User defined logic debug port
  typedef struct {
// ------------------------------ START EDITING ------------------------------
      logic dummy; // other signals
// ------------------------------  END EDITING  ------------------------------
  } debug_core_t;

  // User defined control debug port
  typedef struct {
// ------------------------------ START EDITING ------------------------------
      logic dummy; // other signals
// ------------------------------  END EDITING  ------------------------------
  } debug_control_t;

  // User defined config debug port
  typedef struct {
// ------------------------------ START EDITING ------------------------------
      logic dummy; // other signals
// ------------------------------  END EDITING  ------------------------------
  } debug_config_t;

// Header for debug-related definitions common among all auxiliaries
`include "dla_aux_generic_debug_pkg.svh"

endpackage