--- /nfs/site/disks/swip_dla_1/resources/inference_engine/2023.3.0_with_dev_tools/1/linux64/suse12/python/openvino/tools/benchmark/benchmark.py 2024-03-01 14:01:50.443877000 -0500 +++ benchmark.py 2024-04-01 10:06:18.751566000 -0400 @@ -1,14 +1,15 @@ -# Copyright (C) 2018-2023 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import os from datetime import datetime from math import ceil +import warnings from openvino.runtime import Core, get_version, AsyncInferQueue -from .utils.constants import GPU_DEVICE_NAME, XML_EXTENSION, BIN_EXTENSION -from .utils.logging import logger -from .utils.utils import get_duration_seconds +from openvino.tools.benchmark.utils.constants import GPU_DEVICE_NAME, XML_EXTENSION, BIN_EXTENSION +from openvino.tools.benchmark.utils.logging import logger +from openvino.tools.benchmark.utils.utils import get_duration_seconds def percentile(values, percent): return values[ceil(len(values) * percent / 100) - 1] @@ -17,7 +18,17 @@ def __init__(self, device: str, number_infer_requests: int = 0, number_iterations: int = None, duration_seconds: int = None, api_type: str = 'async', inference_only = None): self.device = device - self.core = Core() + dla_plugins = os.environ.get('DLA_PLUGINS', default='') + if dla_plugins == '': + # Backwards compatability for old DLA_PLUGINS_XML_FILE + warnings.warn("DLA_PLUGINS_XML_FILE option is deprecated as of 2024.1, Please use DLA_PLUGINS") + dla_plugins = os.environ.get('DLA_PLUGINS_XML_FILE', default='') + self.core = Core(dla_plugins) + if "FPGA" in self.device: + dla_arch_file = os.environ.get('DLA_ARCH_FILE') + if dla_arch_file is None: + raise Exception(f"To use FPGA, you need to specify the path to an arch_file!") + self.core.set_property(device_name="FPGA", properties={"ARCH_PATH": dla_arch_file}) self.nireq = number_infer_requests if api_type == 'async' else 1 self.niter = number_iterations self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) @@ -59,6 +70,9 @@ def set_cache_dir(self, cache_dir: str): self.core.set_property({'CACHE_DIR': cache_dir}) + def set_allow_auto_batching(self, flag: bool): + self.core.set_property({'ALLOW_AUTO_BATCHING': flag}) + def read_model(self, path_to_model: str): model_filename = os.path.abspath(path_to_model) head, ext = os.path.splitext(model_filename) @@ -110,7 +124,7 @@ (self.duration_seconds and exec_time < self.duration_seconds) or \ (iteration % self.nireq): idle_id = infer_queue.get_idle_request_id() - if idle_id in in_fly: + if idle_id in in_fly: # Is this check neccessary? times.append(infer_queue[idle_id].latency) else: in_fly.add(idle_id) @@ -162,7 +176,6 @@ def main_loop(self, requests, data_queue, batch_size, latency_percentile, pcseq): if self.api_type == 'sync': times, total_duration_sec, iteration = self.sync_inference(requests[0], data_queue) - fps = len(batch_size) * iteration / total_duration_sec elif self.inference_only: times, total_duration_sec, iteration = self.async_inference_only(requests) fps = len(batch_size) * iteration / total_duration_sec @@ -175,6 +188,9 @@ min_latency_ms = times[0] max_latency_ms = times[-1] + if self.api_type == 'sync': + fps = len(batch_size) * 1000 / median_latency_ms + if pcseq: for group in self.latency_groups: if group.times: