1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
--- /nfs/site/disks/swip_dla_1/resources/inference_engine/2023.3.0_with_dev_tools/1/linux64/suse12/python/openvino/tools/benchmark/benchmark.py 2024-03-01 14:01:50.443877000 -0500
+++ benchmark.py 2024-04-01 10:06:18.751566000 -0400
@@ -1,14 +1,15 @@
-# Copyright (C) 2018-2023 Intel Corporation
+# Copyright (C) 2018-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
from datetime import datetime
from math import ceil
+import warnings
from openvino.runtime import Core, get_version, AsyncInferQueue
-from .utils.constants import GPU_DEVICE_NAME, XML_EXTENSION, BIN_EXTENSION
-from .utils.logging import logger
-from .utils.utils import get_duration_seconds
+from openvino.tools.benchmark.utils.constants import GPU_DEVICE_NAME, XML_EXTENSION, BIN_EXTENSION
+from openvino.tools.benchmark.utils.logging import logger
+from openvino.tools.benchmark.utils.utils import get_duration_seconds
def percentile(values, percent):
return values[ceil(len(values) * percent / 100) - 1]
@@ -17,7 +18,17 @@
def __init__(self, device: str, number_infer_requests: int = 0, number_iterations: int = None,
duration_seconds: int = None, api_type: str = 'async', inference_only = None):
self.device = device
- self.core = Core()
+ dla_plugins = os.environ.get('DLA_PLUGINS', default='')
+ if dla_plugins == '':
+ # Backwards compatability for old DLA_PLUGINS_XML_FILE
+ warnings.warn("DLA_PLUGINS_XML_FILE option is deprecated as of 2024.1, Please use DLA_PLUGINS")
+ dla_plugins = os.environ.get('DLA_PLUGINS_XML_FILE', default='')
+ self.core = Core(dla_plugins)
+ if "FPGA" in self.device:
+ dla_arch_file = os.environ.get('DLA_ARCH_FILE')
+ if dla_arch_file is None:
+ raise Exception(f"To use FPGA, you need to specify the path to an arch_file!")
+ self.core.set_property(device_name="FPGA", properties={"ARCH_PATH": dla_arch_file})
self.nireq = number_infer_requests if api_type == 'async' else 1
self.niter = number_iterations
self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device)
@@ -59,6 +70,9 @@
def set_cache_dir(self, cache_dir: str):
self.core.set_property({'CACHE_DIR': cache_dir})
+ def set_allow_auto_batching(self, flag: bool):
+ self.core.set_property({'ALLOW_AUTO_BATCHING': flag})
+
def read_model(self, path_to_model: str):
model_filename = os.path.abspath(path_to_model)
head, ext = os.path.splitext(model_filename)
@@ -110,7 +124,7 @@
(self.duration_seconds and exec_time < self.duration_seconds) or \
(iteration % self.nireq):
idle_id = infer_queue.get_idle_request_id()
- if idle_id in in_fly:
+ if idle_id in in_fly: # Is this check neccessary?
times.append(infer_queue[idle_id].latency)
else:
in_fly.add(idle_id)
@@ -162,7 +176,6 @@
def main_loop(self, requests, data_queue, batch_size, latency_percentile, pcseq):
if self.api_type == 'sync':
times, total_duration_sec, iteration = self.sync_inference(requests[0], data_queue)
- fps = len(batch_size) * iteration / total_duration_sec
elif self.inference_only:
times, total_duration_sec, iteration = self.async_inference_only(requests)
fps = len(batch_size) * iteration / total_duration_sec
@@ -175,6 +188,9 @@
min_latency_ms = times[0]
max_latency_ms = times[-1]
+ if self.api_type == 'sync':
+ fps = len(batch_size) * 1000 / median_latency_ms
+
if pcseq:
for group in self.latency_groups:
if group.times:
|