// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // // The function of this file is to provide mAP and COCO AP calculation in metrics_eval // and metrics_update. The calculation is comprised with two parts, 1) data preprocessing, // and 2) metrics calculation. Data preprocessing consists of prediction box parsing; // resize and filtering; non-max suppression; and clipping. The preprocessed data is stored // in `PredictionEntry` and `AnnotationEntry` structs, which are used in the `metrics_update` // and `metrics_eval`. `metrics_update` updates intermidiate statistics to form the batched // statistics, and the `metrics_eval` calculated the integral of the ROC of P-R curve. All of // the metadata should be set in the header file and the runtime invariants are set using // `set_runtime`. The validate_yolo_wrapper is the main entery point of the subroutine. // // The mAP algorithm is built according to the section 2.2 in https://arxiv.org/pdf/1607.03476.pdf // and OpenVINO's accuracy_checker. The COCO AP algorithm is specified in // https://cocodataset.org/#detection-eval. The result is compared value-by-value with the // result from OpenVINO's accuracy_checker using dlsdk launcher. To obtain the the golden // result, apply the steps in https://docs.openvino.ai/latest/omz_models_model_yolo_v3_tf.html. #include "average_precision.hpp" #include #if defined(_WIN32) || defined(_WIN64) #include #else #include #endif #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) #include namespace fs = std::filesystem; #endif #include #include #include #include #include #include #include #include #include #include #include "utils.hpp" #define VERBOSE 0 // Parses predicted boxes in `results_data` to a 2d tensor `raw_predictions`. The parameter // `batch` indicates the image which corresponds to those predicted boxes. // Order: conv2d_12[1x255x26x26] -> conv2d_9[1x255x13x13], NCHW order void parse_prediction_boxes(std::vector &predicted_val, Tensor2d &raw_predictions) { raw_predictions.emplace_back(std::vector{}); const std::vector &grid_sizes = yolo_meta.grid_sizes.at(runtime_vars.name); int total_boxes{0}; std::for_each(std::begin(grid_sizes), std::end(grid_sizes), [&](unsigned n) { total_boxes += std::pow(n, 2) * yolo_meta.box_per_channel; }); for (int count = 0; count < total_boxes; count++) { raw_predictions.emplace_back(Box{}); raw_predictions[count].reserve(yolo_meta.pbox_size); } auto index_of = [=](int n, int c, int h, int w, int C, int H, int W) { return n * C * H * W + c * H * W + h * W + w; }; // first are boxes in 26x26 grid // treat each tensor as 3 batchs for (int grid : grid_sizes) { // offset to where the data is retrieved int data_offset{0}; // offset to where the data is inserted int position_offset{0}; for (int n : grid_sizes) { if (n == grid) break; data_offset += pow(n, 2) * yolo_meta.channel; position_offset += pow(n, 2) * yolo_meta.box_per_channel; } int N = yolo_meta.box_per_channel, C = yolo_meta.pbox_size, H = grid, W = grid; for (int n = 0; n < N; n++) { for (int c = 0; c < C; c++) { for (int h = 0; h < H; h++) { for (int w = 0; w < W; w++) { // corresponds to #c data for grid #h,w, of the #n anchor Box &pbox = raw_predictions[position_offset + n * H * W + h * W + w]; // fills prediction boxes pbox.emplace_back(predicted_val[data_offset + index_of(n, c, h, w, C, H, W)]); } } } } } } // Parses annotation boxes stored in text file and stores in a 3d tensor `raw_annotation`. // Precondition: the file is formatted such that each line contains 5 doubles and separated // by spaces, i.e. [class, x, y, width, height]. Returns -3 if cannot read from file. int parse_annotation_boxes(Tensor3d &raw_annotation, const std::string &path) { int err = 0; std::ifstream annotation_file(path); if (!annotation_file.is_open()) { slog::err << "Couldn't access path: " << path << slog::endl; err = -3; } else { Tensor2d annotation_box; int class_id; double x, y, w, h; while (annotation_file >> class_id >> x >> y >> w >> h) { annotation_box.emplace_back(Box{x, y, w, h, (double)class_id}); } raw_annotation.emplace_back(annotation_box); } return err; } // Extracts filenames in `path` with given extension specified in `extensions`. // Returns the number of file with extension `ext`, or -1 for error. int walk_dirent(std::vector &names, const std::string &path, std::string ext) { #if defined(_WIN32) || defined(_WIN64) #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) int count = 0; for (const auto &entry : fs::directory_iterator(path)) { if (fs::is_regular_file(entry)) { std::string filename = entry.path().filename().string(); std::string file_extension = filename.substr(filename.find_last_of(".") + 1); if (file_extension == ext) { names.emplace_back(filename); count++; } } } #endif #else DIR *dir = opendir(path.c_str()); int count = 0; if (!dir) { slog::err << "Couldn't access path: " << path << slog::endl; count = -1; } else { for (struct dirent *dent; (dent = readdir(dir)) != nullptr;) { std::string dirname(dent->d_name); std::string stem = GetStem(dirname); std::string extension = GetExtension(dirname); if (stem == "" || stem == "." || extension != ext) continue; names.emplace_back(stem); count += 1; } closedir(dir); } #endif return count; } // Dispatches each step of collecting predicted boxes, annotation boxes, and shapes. // The function returns 0 on success, -1 for mismatch in the number of annotation files // and validation images, -2 for missing annotation file, -3 for failing to access annotation // file, and -4 for failing to access validation image. int collect_validation_dataset(std::vector &image_paths, Tensor3d &raw_annotations, Tensor2d &shapes) { int err = 0; // set of annotation file name std::vector tmp; int num_file = walk_dirent(tmp, runtime_vars.groundtruth_loc, runtime_vars.gt_extension); if (num_file < (int)(runtime_vars.batch_size * runtime_vars.niter)) { if (num_file >= 0) { slog::err << "Not enough validation data found. " << runtime_vars.batch_size * runtime_vars.niter << " required, " << num_file << " provided." << slog::endl; } err = -1; } else { std::set annotation_file_index(tmp.begin(), tmp.end()); // gets all images, corresponding annotation, and shapes std::string gt_path; for (unsigned batch = 0; batch < runtime_vars.batch_size * runtime_vars.niter; batch++) { std::string image_path(image_paths[batch]); std::string img_name = GetStem(image_path); if (annotation_file_index.find(img_name) == annotation_file_index.end()) { slog::err << "Missing annotation file for validation image: " << image_paths[batch] << slog::endl; err = -2; break; } else { gt_path = runtime_vars.groundtruth_loc + "/" + img_name + "." + runtime_vars.gt_extension; // gets image dimensions cv::Mat image = cv::imread(image_paths[batch]); if (image.data == nullptr || image.empty()) { slog::err << "Couldn't open input image: " << image_paths[batch] << slog::endl; err = -4; break; } err = parse_annotation_boxes(raw_annotations, gt_path); if (err != 0) break; shapes.emplace_back(Box{(double)image.cols, (double)image.rows}); } } } return err; } // Removes items at `indices` in the vector `vec` template void reduce_by_index(std::vector &vec, std::vector indices) { std::sort(indices.begin(), indices.end()); for (auto it = indices.rbegin(); it != indices.rend(); it++) { vec.erase(vec.begin() + *it); } } // Calculates and returns the Intersection over Union score for two boxes by // calculating their area of overlap and area of union. double intersection_over_union(Box box1, Box box2) { using namespace std; { double intersect_length_x = max(0.0, min(box1[X_MAX], box2[X_MAX]) - max(box1[X_MIN], box2[X_MIN]) + yolo_meta.boundary); double intersect_length_y = max(0.0, min(box1[Y_MAX], box2[Y_MAX]) - max(box1[Y_MIN], box2[Y_MIN]) + yolo_meta.boundary); double intersection_of_area = intersect_length_x * intersect_length_y; double box1_area = (box1[X_MAX] - box1[X_MIN] + yolo_meta.boundary) * (box1[Y_MAX] - box1[Y_MIN] + yolo_meta.boundary); double box2_area = (box2[X_MAX] - box2[X_MIN] + yolo_meta.boundary) * (box2[Y_MAX] - box2[Y_MIN] + yolo_meta.boundary); double union_of_area = box1_area + box2_area - intersection_of_area; return (union_of_area > 0.0) ? intersection_of_area / union_of_area : 0.0; } // namespace std } // This function returns the index of the largest element in the vector `vec`. template int argmax(std::vector vec) { return std::distance(vec.begin(), std::max_element(vec.begin(), vec.end())); } // This function returns the index of the largest element in the iterator from `begin` to `end`. template int argmax(Iter begin, Iter end) { return std::distance(begin, std::max_element(begin, end)); } // Resize the coordinates of bounding boxes from relative ratio to grid cell to the actual coordinates in pixel. // This function resizes prediction boxes in the 2d tensor `raw_predictions` based on the definition in page 4 of // https://arxiv.org/pdf/1612.08242.pdf. The prediction boxes are also filtered based on their confidence score // and class specific score. The result is stored in an instance of `PredictionEntry` which is used for statistics // calculation. void resize_and_filter_prediction_boxes(Tensor2d &raw_predictions, PredictionEntry &prediction, const unsigned batch) { unsigned size = 0; #if VERBOSE == 1 unsigned c12 = 0, c9 = 0, c58 = 0, c66 = 0, c74 = 0; #endif for (unsigned grid : yolo_meta.grid_sizes.at(runtime_vars.name)) { unsigned offset{0}; for (unsigned n : yolo_meta.grid_sizes.at(runtime_vars.name)) { if (n == grid) break; offset += pow(n, 2) * yolo_meta.box_per_channel; } for (unsigned x = 0; x < grid; x++) { for (unsigned y = 0; y < grid; y++) { for (unsigned n = 0; n < yolo_meta.box_per_channel; n++) { unsigned bbox_idx = offset + n * pow(grid, 2) + y * grid + x; Box &bbox = raw_predictions[bbox_idx]; // find the predicted label as the class with highest score int label = argmax(bbox.begin() + (yolo_meta.pbox_size - yolo_meta.num_classes), bbox.end()); double cls_score = bbox[BBOX_CONFIDENCE] * bbox[(yolo_meta.pbox_size - yolo_meta.num_classes) + label]; // filter outliers with low confidence score or class score if (bbox[BBOX_CONFIDENCE] < yolo_meta.confidence_threshold || cls_score < yolo_meta.confidence_threshold) continue; prediction.cls.push_back(label); prediction.cls_score.push_back(cls_score); #if VERBOSE == 1 c74 += (unsigned)(grid == 52); c66 += (unsigned)(grid == 26); c58 += (unsigned)(grid == 13); c12 += (unsigned)(grid == 26); c9 += (unsigned)(grid == 13); #endif // deduce anchor box width and height unsigned dim = yolo_meta.anchor_sizes.at(runtime_vars.name).at(grid).size() / yolo_meta.box_per_channel; double anchor_w = yolo_meta.anchor_sizes.at(runtime_vars.name).at(grid)[n * dim]; double anchor_h = yolo_meta.anchor_sizes.at(runtime_vars.name).at(grid)[n * dim + 1]; // calculate width and height of bbox double bbox_center_x = (bbox[BBOX_X] + x) / grid; double bbox_center_y = (bbox[BBOX_Y] + y) / grid; double bbox_w = exp(bbox[BBOX_W]) * anchor_w / yolo_meta.dst_image_size[IMG_W]; double bbox_h = exp(bbox[BBOX_H]) * anchor_h / yolo_meta.dst_image_size[IMG_H]; // calculate actual coordinates of bbox double x_max, x_min, y_max, y_min; double w = runtime_vars.source_image_sizes[batch][IMG_W]; double h = runtime_vars.source_image_sizes[batch][IMG_H]; x_max = w * (bbox_center_x + bbox_w / 2.0); x_min = w * (bbox_center_x - bbox_w / 2.0); y_max = h * (bbox_center_y + bbox_h / 2.0); y_min = h * (bbox_center_y - bbox_h / 2.0); prediction.x_max.emplace_back(x_max); prediction.x_min.emplace_back(x_min); prediction.y_max.emplace_back(y_max); prediction.y_min.emplace_back(y_min); size += 1; } } } } prediction.size = size; #if VERBOSE == 1 if (runtime_vars.name == "yolo-v3-tf") { slog::info << "prediction boxes from conv2d58: " << c58 << slog::endl; slog::info << "prediction boxes from conv2d66: " << c66 << slog::endl; slog::info << "prediction boxes from conv2d74: " << c74 << slog::endl; } else if (runtime_vars.name == "yolo-v3-tiny-tf") { slog::info << "prediction boxes from conv2d12: " << c12 << slog::endl; slog::info << "prediction boxes from conv2d9: " << c9 << slog::endl; } #endif } // Returns indices of `vec` sorted in descending order. std::vector argsort_gt(const std::vector &vec) { std::vector order(vec.size()); std::generate(order.begin(), order.end(), [n = 0]() mutable { return n++; }); std::sort(order.begin(), order.end(), [&](int i1, int i2) { return vec[i1] > vec[i2]; }); return order; } // Performs non-maximum suppression algorithm to eliminate repetitive bounding boxes. // A bounding box is preserved iff. it has the highest confidence score over all // overlapping bounding boxes. void nms(PredictionEntry &prediction) { if (prediction.size == 0) return; std::vector &&order = argsort_gt(prediction.cls_score); std::vector keep; std::set discard; unsigned top_score_idx; while (discard.size() < order.size()) { bool has_top = false; for (unsigned idx : order) { if (discard.find(idx) != discard.end()) continue; if (!has_top) { has_top = true; top_score_idx = idx; keep.emplace_back(top_score_idx); discard.insert(top_score_idx); continue; } double iou = intersection_over_union(prediction.box_at(idx), prediction.box_at(top_score_idx)); if (iou > yolo_meta.iou_threshold) { discard.insert(idx); } } } std::vector discard_idx(discard.size()); std::vector indexes(discard.begin(), discard.end()); std::sort(indexes.begin(), indexes.end()); std::sort(keep.begin(), keep.end()); std::vector::iterator it = std::set_difference(indexes.begin(), indexes.end(), keep.begin(), keep.end(), discard_idx.begin()); discard_idx.resize(it - discard_idx.begin()); // remove filtered predicted bounding boxes. reduce_by_index(prediction.x_max, discard_idx); reduce_by_index(prediction.x_min, discard_idx); reduce_by_index(prediction.y_max, discard_idx); reduce_by_index(prediction.y_min, discard_idx); reduce_by_index(prediction.cls_score, discard_idx); reduce_by_index(prediction.cls, discard_idx); prediction.size -= discard_idx.size(); } // Calculates the actual size of the groundtruth bounding boxes. void resize_annotation_boxes(Tensor3d &raw_annotations, AnnotationEntry &annotation, const unsigned batch) { for (auto >_box : raw_annotations[batch]) { annotation.x_max.emplace_back(gt_box[BBOX_X] + gt_box[BBOX_W]); annotation.x_min.emplace_back(gt_box[BBOX_X]); annotation.y_max.emplace_back(gt_box[BBOX_Y] + gt_box[BBOX_H]); annotation.y_min.emplace_back(gt_box[BBOX_Y]); annotation.cls.emplace_back(gt_box[BBOX_CONFIDENCE]); } annotation.size = raw_annotations[batch].size(); } // Limits the coordinates of predicted bounding boxes within the dimension of source image. void clip_box(PredictionEntry &prediction, const unsigned batch) { if (prediction.size == 0) return; double x_upper_bound = runtime_vars.source_image_sizes[batch][IMG_W]; double y_upper_bound = runtime_vars.source_image_sizes[batch][IMG_H]; auto _clip = [](double v, double lower, double upper) { return (v < lower) ? lower : ((v > upper) ? upper : v); }; for (unsigned idx = 0; idx < prediction.size; idx++) { prediction.x_max[idx] = _clip(prediction.x_max[idx], 0, x_upper_bound); prediction.x_min[idx] = _clip(prediction.x_min[idx], 0, x_upper_bound); prediction.y_max[idx] = _clip(prediction.y_max[idx], 0, y_upper_bound); prediction.y_min[idx] = _clip(prediction.y_min[idx], 0, y_upper_bound); } } // Calculates area under the PR curve using 11-intervaled sum. double average_precision(const std::vector &precision, const std::vector &recall, unsigned interval) { double result = 0.0; double step = 1 / (double)(interval - 1); for (unsigned intvl = 0; intvl < interval; intvl++) { double point = step * intvl; double max_precision = 0.0; for (unsigned idx = 0; idx < recall.size(); idx++) { if (recall[idx] >= point) { if (precision[idx] > max_precision) { max_precision = precision[idx]; } } } result += max_precision / (double)interval; } return result; } // Stores intermediate statistics for AP calculation. AP's are calculated from // true-positive, false-positive, and the number of targets, sorted // by the class score of the predicted bounding box. typedef struct _map_stats { int num_gt_object; std::vector scores; std::vector true_positive; std::vector false_positive; _map_stats() { this->num_gt_object = 0; } } mAPStats; // Calculates the 11-point interpolated mAP. std::vector mean_average_precision(PredictionEntry &prediction, AnnotationEntry &annotation, double thresh) { std::vector class_list(yolo_meta.num_classes); std::generate(class_list.begin(), class_list.end(), [n = 0]() mutable { return n++; }); std::vector image_result(yolo_meta.num_classes, mAPStats{}); // average precision for each class for (int category : class_list) { // total number of bounding boxes in the annotation. int num_gt_object = std::count_if(annotation.cls.begin(), annotation.cls.end(), [&](int &cls) { return (cls == (int)category); }); // total number of predicted bounding boxes. int num_pred_boxes = std::count_if(prediction.cls.begin(), prediction.cls.end(), [&](int &cls) { return (cls == (int)category); }); image_result[category].num_gt_object = num_gt_object; // stores the scores for sorting out the correct order of TP and FP. image_result[category].true_positive.resize(num_pred_boxes, 0); image_result[category].false_positive.resize(num_pred_boxes, 0); image_result[category].scores.resize(num_pred_boxes, 0.0); std::set matched_gtbox; unsigned pred_num = 0; std::vector &&sorted_pbox_idx = argsort_gt(prediction.cls_score); for (unsigned &pbox_idx : sorted_pbox_idx) { if (prediction.cls[pbox_idx] != category) continue; image_result[category].scores[pred_num] = prediction.cls_score[pbox_idx]; unsigned most_overlapped_idx = 0; double most_overlapped_iou = 0.0; // finds the most overlapped predicted bounding box. for (unsigned gtbox_idx = 0; gtbox_idx < annotation.size; gtbox_idx++) { if (annotation.cls[gtbox_idx] != category) continue; double iou = intersection_over_union(prediction.box_at(pbox_idx), annotation.box_at(gtbox_idx)); if (iou > most_overlapped_iou) { most_overlapped_iou = iou; most_overlapped_idx = gtbox_idx; } } // when there is no ground truth, all predicted boxes are false positive, // and they are preserved for batched AP calculation. if (!num_gt_object) { image_result[category].false_positive[pred_num++] = 1; } else { // the predicted bounding box is a true positive iff. it is the most overlapped, // the matched groundtruth bounding box has not been matched previously, and // the iou is above `thresh`. if (most_overlapped_iou >= thresh) { if (matched_gtbox.find(most_overlapped_idx) == matched_gtbox.end()) { matched_gtbox.insert(most_overlapped_idx); image_result[category].true_positive[pred_num++] = 1; } else { image_result[category].false_positive[pred_num++] = 1; } } else { image_result[category].false_positive[pred_num++] = 1; } } } } return image_result; } // Initializes runtime variables in `runtime_vars` struct. void set_runtime(std::string name, unsigned niter, unsigned batch_size, const std::string &input_loc, const std::string &annotation_loc) { runtime_vars.name = name; runtime_vars.niter = niter; runtime_vars.batch_size = batch_size; runtime_vars.groundtruth_loc = annotation_loc; runtime_vars.input_loc = input_loc; } // Return type of function `validate_yolo`. struct metrics { std::vector map; Tensor2d coco; }; // Main function that takes the results data and annotation location, and calculates mAP score for the network. struct metrics validate_yolo(std::vector &results_data, Tensor3d &raw_annotations, const unsigned batch) { Tensor2d raw_predictions; PredictionEntry prediction; AnnotationEntry annotation; // executes accuracy check recipes. try { parse_prediction_boxes(results_data, raw_predictions); resize_and_filter_prediction_boxes(raw_predictions, prediction, batch); resize_annotation_boxes(raw_annotations, annotation, batch); nms(prediction); clip_box(prediction, batch); } catch (const std::exception &e) { slog::err << "Abort postprocessing." << slog::endl; std::cerr << e.what() << std::endl; exit(EXIT_FAILURE); } // mAP std::vector map_stats = mean_average_precision(prediction, annotation, yolo_meta.pascal_voc_metric); // COCO metric Tensor2d coco_ap_stats; std::for_each(std::begin(yolo_meta.coco_metric), std::end(yolo_meta.coco_metric), [&](const double thresh) { coco_ap_stats.emplace_back(mean_average_precision(prediction, annotation, thresh)); }); return {map_stats, coco_ap_stats}; } // This function appends all of the elements in `v2` at the end of `v1` in order. template void extend(std::vector &v1, const std::vector &v2) { v1.reserve(v1.size() + v2.size()); v1.insert(v1.end(), v2.begin(), v2.end()); } // Updates the batched statistics from individual image's result. The final batched AP and COCO AP is // calculated based on updated `batched_stats`. void metrics_update(std::vector &batched_stats, const std::vector &img_stats) { for (unsigned cat = 0; cat < yolo_meta.num_classes; cat++) { batched_stats[cat].num_gt_object += img_stats[cat].num_gt_object; // updates batched statistics. omits the class where no prediction presents. if (!img_stats[cat].scores.size()) continue; extend(batched_stats[cat].scores, img_stats[cat].scores); extend(batched_stats[cat].true_positive, img_stats[cat].true_positive); extend(batched_stats[cat].false_positive, img_stats[cat].false_positive); } } // Calculates AP using the given integral function. double metrics_eval(const std::vector &stats, unsigned interval) { std::vector class_aps; for (unsigned category = 0; category < yolo_meta.num_classes; category++) { // omits the class when there is no prediction presents. if (!stats[category].scores.size()) continue; // the predictions are false-positive when there is no groundtruth for this // class, and therefore the class AP is 0.0 if (stats[category].num_gt_object == 0 && stats[category].scores.size()) { class_aps.push_back(0.0); continue; } int TP = 0, FP = 0; std::vector precision, recall; // sorts the tp and fp based on the order of confidence score. std::vector &&sorted_stats_index = argsort_gt(stats[category].scores); // calculates intermediate statistics calculation. for (unsigned idx : sorted_stats_index) { TP += stats[category].true_positive[idx]; FP += stats[category].false_positive[idx]; precision.emplace_back(TP / (double)(TP + FP)); recall.emplace_back(TP / (double)stats[category].num_gt_object); } // returns ROC of P-R curve. class_aps.emplace_back(average_precision(precision, recall, interval)); } return std::accumulate(class_aps.begin(), class_aps.end(), 0.0) / (double)class_aps.size(); } // Wrapper of the function `validate_yolo`. This function prepares data and dispatches metrics calculations for each // validation image, accumulates metrics results, and returns the batched mAP and COCO AP. std::pair validate_yolo_wrapper(std::map &raw_results, const std::vector> &result_layout, std::vector input_files) { slog::info << "Start validating yolo." << slog::endl; std::ofstream fout; fout.open("ap_report.txt"); // preserves all correct paths to validation images. int num_image = runtime_vars.niter * runtime_vars.batch_size; std::vector input_image_paths; std::sort(std::begin(input_files), std::end(input_files)); // input_files is guaranteed not to be empty since that case is filtered out. for (auto &path : input_files) { if (path == "") break; if (num_image == 0) break; input_image_paths.push_back(path); num_image--; } // checks if there exists enough image files; this should always pass unless the image file is // deleted right after the inferencing step. if (num_image != 0) { slog::err << "Not enough image input found. " << runtime_vars.batch_size * runtime_vars.niter << " required, " << (runtime_vars.batch_size * runtime_vars.niter - num_image) << " provided." << slog::endl; exit(EXIT_FAILURE); } // stores all annotation boxes for each image from groundtruth file. // if an input image does not have a corresponding groundtruth file, an error occurs. Tensor3d raw_annotations; int err = collect_validation_dataset(input_image_paths, raw_annotations, runtime_vars.source_image_sizes); if (err) exit(EXIT_FAILURE); // updates the metrics each image at a time to reduce memory overhead. the result for each image // is accumulated in `batched_stats` and it will be used for batched mAP and COCO AP calculation. metrics batched_stats; batched_stats.map.resize(yolo_meta.num_classes, mAPStats{}); batched_stats.coco.resize(yolo_meta.coco_metric.size(), std::vector{}); std::for_each(batched_stats.coco.begin(), batched_stats.coco.end(), [&](std::vector &stats) { stats.resize(yolo_meta.num_classes, mAPStats{}); }); for (unsigned batch = 0; batch < runtime_vars.niter; batch++) { for (unsigned img = 0; img < runtime_vars.batch_size; img++) { // stores the flattened output tensors from the resulting convolution layers. std::vector curr_img_data; for (auto &item : result_layout) { const std::string &name = item.get_any_name(); auto curr_outputBlob = raw_results.at(name).at(batch); auto output_tensor_start = curr_outputBlob.data(); unsigned output_size = curr_outputBlob.get_size() / runtime_vars.batch_size; unsigned offset = img * output_size; for (unsigned idx = 0; idx < output_size; idx++) { curr_img_data.push_back(output_tensor_start[idx + offset]); } } struct metrics &&curr_img_stats = validate_yolo(curr_img_data, raw_annotations, img + batch * runtime_vars.batch_size); metrics_update(batched_stats.map, curr_img_stats.map); for (unsigned thresh = 0; thresh < yolo_meta.coco_metric.size(); thresh++) { metrics_update(batched_stats.coco[thresh], curr_img_stats.coco[thresh]); } double img_AP = metrics_eval(curr_img_stats.map, yolo_meta.ap_interval); // fout << "image " << input_files[img] << " AP @ 0.5" << std::endl; fout << std::fixed << std::setprecision(10) << img_AP << std::endl; } } double map = metrics_eval(batched_stats.map, yolo_meta.ap_interval); double coco_ap = 0.0; for (auto &coco_stats : batched_stats.coco) { coco_ap += metrics_eval(coco_stats, yolo_meta.coco_interval); } coco_ap /= (double)yolo_meta.coco_metric.size(); fout << "\nAP at IoU=.50: " << std::fixed << std::setprecision(6) << map * 100 << "%" << std::endl; fout << "AP at IoU=.50:.05:.95: " << std::fixed << std::setprecision(10) << coco_ap * 100 << "%" << std::endl; fout.close(); std::cout << "ap_report.txt has been generated in the current directory." << std::endl; return std::make_pair(map, coco_ap); }