如何解析trtexec输出的模型推理结果数据

84 阅读 0 评论 56 点赞

我是靠谱客的博主美丽过客，最近开发中收集的这篇文章主要介绍如何解析trtexec输出的模型推理结果数据，觉得挺不错的，现在分享给大家，希望可以做个参考。

概述

还是以onnx为例，执行下面的命令解析onnx生成engine并推理输出结果到一个json文件:

./trtexec --onnx=efficientdet-d0-s.onnx --loadInputs='data':o4_clip-1_raw_data.bin --saveEngine=efficientdet-d0-s.engine  --exportOutput=trtexec-result.json

这个--exportOutput指定的json文件里保存的是network的输出节点的数据，对于我修改过的efficientdet网络的输出节点有三个：regression、anchors和classification，那么这个json文件的内容类似如下：

[
  { "name" : "regression"
  , "dimensions" : "1x49104x4"
  , "values" : [ 5.78506, 6.88422, -1.56519, -17.5148, -0.113892, 0.545003, -1.52597, -0.767865, 1.00629, 0.163376, 0.0242282, -1.89316, 0.187985, 0.499627, -0.414611, -2.14051, 0.371698, 0.478594, -0.0126426, 2.01423, 0.997112, 0.517545, 1.88847, -0.707338, 0.157562, 0.0627687, 0.10975, -0.430063, 0.537361, 0.670655, 0.428142, ...]},
  { "name" : "anchors"
  , "dimensions" : "1x49104x4"
  , "values" : [ -12, -12, 20, 20, -7.2, -18.4, 15.2, 26.4, -18.4, -7.2, 26.4, 15.2, -16.1587, -16.1587, 24.1587, 24.1587, -10.1111, -24.2222, 18.1111, 32.2222, -24.2222, -10.1111, 32.2222, 18.1111, -21.3984, -21.3984, 29.3984, 29.3984, -13.7789, -31.5578, 21.7789, 39.5578, -31.5578, -13.7789, 39.5578, 21.7789, -12, ...]},
  { "name" : "classification"
  , "dimensions" : "1x49104x1"
  , "values" : [ 0.000323705, 3.062e-07, 0.00457684, 0.000632869, 0.0004986, 0.000207652, 0.000125256, 6.19738e-07, 0.0203817, 8.39792e-06, 8.40121e-09, 9.50497e-05, 9.16859e-06, 2.48826e-05, 1.39859e-06, 3.46441e-06, 2.93581e-08, 0.000207522, 6.74278e-06, 1.8361e-08, 3.44744e-05, 3.35026e-06, 2.89377e-05, 3.85066e-07, 1.02452e-05,...]}
]

怎么把它这个结果解析出来并过滤处理成最终的结果并且在图上画出识别结果的bbox来呢？这个和一般的模型的post process是类似的，需要结合网络本身设计对输出数据做后处理得出合理的bbox和对应的class以及score，针对我这个修改过的efficientdet导出的onnx调用trtexec推理得到的json结果数据进行解析的相关代码如下:

#encoding: utf-8
import numpy as np
import os
import cv2
import json

classes = ['baggage']

def nms(bboxes, classifictaion, thresh):
    
    x1 = bboxes[:, 0]
    y1 = bboxes[:, 1]
    x2 = bboxes[:, 2]
    y2 = bboxes[:, 3]
    scores = classifictaion[:, 1]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep

def parse_result(num_classes,json_file,input_img,output_img):
   #with open(json_file,'r',encoding='utf-8') as f:
   with open(json_file,'r') as f:
      data = json.load(f)
   regression = np.array(data[0]['values'])      #49104 x 4
   anchors    = np.array(data[1]['values'])      #49104 x 4
   classification = np.array(data[2]['values'])  #49104 x num_classes
   
   size_r = np.size(regression)
   size_c = np.size(classification)
   
   print("size_r:",size_r,"size_c:",size_c)
   
   max_nnan_ir = -1
   max_nnan_ic = -1
   i=0
   while i< size_r :
      #if np.isnan(regression[i]):  #n/a
      #if 'nan' == regression[i]:   #string 'n/a'
      if 0 == regression[i]:
         i += 4
      else:
         max_nnan_ir = i
         break;
   i = 0
   while i <size_c :
      #if np.isnan(classification[i]):  #n/a
      #if 'nan' == classification[i]:   #string 'n/a'
      if 0 == classification[i]:
         i += num_classes
      else:
         max_nnan_ic = i
         break;
   
   max_nnan_ir //= 4
   max_nnan_ic //= num_classes
   n = max(max_nnan_ir, max_nnan_ic)
   #print("max_nnan_ir/4:",max_nnan_ir,"max_nnan_ic /1:",max_nnan_ic,"num_classes:",num_classes)
   #print("max nan idx:",n-1)
   
   #filter out invalid regression data
   i=n*4
   reg_list = []
   anc_list = []
   cls_list = []
   while i< size_r :
      reg_box = [regression[i],regression[i+1],regression[i+2],regression[i+3]]
      anc_box = [anchors[i],anchors[i+1],anchors[i+2],anchors[i+3]]
      reg_list.append(reg_box)
      anc_list.append(anc_box)
      i += 4
   #filter out invalid classification data
   i=n
   while i< size_c :
      max_score = 0.0
      cls_id = -1
      for k in range(num_classes):
         if classification[i+k] > max_score:
            max_score = classification[i+k]
            cls_id = k
      cls_list.append([cls_id,max_score])
      i += num_classes
      
   regression = np.array(reg_list)
   anchors = np.array(anc_list)
   classification = np.array(cls_list)
   #print("regression num:",len(regression),"anchors num:",len(anchors),"classification num:",len(classification))
   
   y_centers_a = (anchors[..., 0] + anchors[..., 2]) / 2
   x_centers_a = (anchors[..., 1] + anchors[..., 3]) / 2
   ha = anchors[..., 2] - anchors[..., 0]
   wa = anchors[..., 3] - anchors[..., 1]

   w = np.exp(regression[..., 3]) * wa
   h = np.exp(regression[..., 2]) * ha

   y_centers = regression[..., 0] * ha + y_centers_a
   x_centers = regression[..., 1] * wa + x_centers_a

   ymin = y_centers - h / 2.
   xmin = x_centers - w / 2.
   ymax = y_centers + h / 2.
   xmax = x_centers + w / 2.
   
   xmin = np.maximum(xmin,0)
   ymin = np.maximum(ymin,0)
   xmax = np.minimum(xmax,1279)
   ymax = np.minimum(ymax,959)
   
   bboxes = np.vstack([xmin, ymin, xmax, ymax])
   print(bboxes.shape)
   bboxes = bboxes.swapaxes(0,1)
   print(bboxes.shape) 
    
   #nms
   iou_threshold=0.1
   keep = nms(bboxes,classification,iou_threshold)
   bboxes = bboxes[keep,:]
   classification = classification[keep,:]
      
   threshold = 0.1
   num_boxes = len(bboxes)
   #print("num_boxes:",num_boxes,"num_cls:",len(classification))
   
   #bboxes = bboxes*1280/512
   
   box_c = 0 
   img = cv2.imread(input_img)
   for i in range(num_boxes):
       obj = classes[int(classification[i][0])]
       score = classification[i][1]
       if score > threshold:
          box_c += 1
          x1 = int(bboxes[i][0])
          y1 = int(bboxes[i][1])
          x2 = int(bboxes[i][2])
          y2 = int(bboxes[i][3])
          cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 0), 2)
          cv2.putText(img, '{} {:.3f}'.format(obj, score),
                        (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (255, 255, 0), 2)
   
   cv2.imwrite(output_img,img)
   #cv2.imshow("result",img)
   #cv2.waitKey(100)
   #print("draw {} boxes".format(box_c))
     
   
if __name__ == "__main__":
   json_file = "/usr/src/tensorrt/bin/trtexec-result.json"
   input_img = "/usr/src/tensorrt/bin/o4_clip-1.jpg"
   output_img = "/usr/src/tensorrt/bin/o4_clip-1-TensorRT_result.jpg"
   num_class = 1
   parse_result(num_class,json_file,input_img,output_img)

如果要将我们自己用pytorch调用pt模型文件或者使用onnxruntime调用onnx识别图像的结果数据转换成trtexec的这种json输出格式，以便于对比结果数据的差异的话(应NVIDIA的要求写了下面的代码来仿照trtexec的输出格式来将模型的输出节点的数据保存到json文件里)，可以像下面这样写:

"""
Simple Inference Script of EfficientDet-Pytorch
"""
import json
import time
import onnxruntime
import torch
from torch.backends import cudnn

from backbone import EfficientDetBackbone
import cv2
import numpy as np

from efficientdet.utils import BBoxTransform, ClipBoxes
from utils.utils import preprocess, invert_affine, postprocess

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

class MyEncoder(json.JSONEncoder):
  def default(self, obj):
       if isinstance(obj, np.integer):
          return int(obj)
       elif isinstance(obj, np.floating):
          return float(obj)
       elif isinstance(obj, np.ndarray):
          return obj.tolist()
       else:
          return super(MyEncoder, self).default(obj)

def to_list(arr,r,c):
    data_list = []
    for i in range(r):
       for j in range(c):
          data_list.append(arr[i][j])
    return data_list

compound_coef = 0
force_input_size = None  # set None to use default size
#img_path = 'test/img.png'
img_path = 'o4_clip-1.png'

threshold = 0.1
iou_threshold = 0.1

use_cuda = True
use_float16 = False
cudnn.fastest = True
cudnn.benchmark = True

obj_list = ['baggage']

input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size
ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

if use_cuda:
    x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
else:
    x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

'''
model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list))
#model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
model.load_state_dict(torch.load(f'logs/airport/efficientdet-d0_499_79500.pth'))
model.requires_grad_(False)
model.eval()

if use_cuda:
    model = model.cuda()
if use_float16:
    model = model.half()
'''

ort_session = onnxruntime.InferenceSession("convert/efficientdet-d0.onnx")

with torch.no_grad():
    '''    
    #features, regression, classification, anchors = model(x)
    regression, classification, anchors = model(x)
    '''
    nx = to_numpy(x)
    #np.save("raw_data_numpy.npy",nx)
    nx.tofile("raw_data.bin")
    ort_inputs = {ort_session.get_inputs()[0].name: nx}
    ort_outs = ort_session.run(None, ort_inputs)
    #shapes: (1, 49104, 4) (1, 49104, 1) (1, 49104, 4)
    #print("shapes:",ort_outs[0].shape,ort_outs[1].shape,ort_outs[2].shape)
    
    r = 49104
    results = []
    dict_reg = {}
    dict_reg["name"] = "regression"
    dict_reg["dimensions"] = "1x49104x4"
    dict_reg["values"] = to_list(ort_outs[0][0],r,4)
    results.append(dict_reg)
    
    dict_cls = {}
    dict_cls["name"] = "classification"
    dict_cls["dimensions"] = "1x49104x1"
    dict_cls["values"] = to_list(ort_outs[1][0],r,1)
    results.append(dict_cls)
    
    dict_ach = {}
    dict_ach["name"] = "anchors"
    dict_ach["dimensions"] = "1x49104x4"
    dict_ach["values"] = to_list(ort_outs[2][0],r,4)
    results.append(dict_ach)
    f = open("onnxruntime_result.json","w")
    json.dump(results,f,cls=MyEncoder,indent=2)
    f.close()

    regression = torch.from_numpy(ort_outs[0])
    classification = torch.from_numpy(ort_outs[1])
    anchors = torch.from_numpy(ort_outs[2])
    
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    out = postprocess(x,
                      anchors, regression, classification,
                      regressBoxes, clipBoxes,
                      threshold, iou_threshold)


def display(preds, imgs, imshow=True, imwrite=False):
    for i in range(len(imgs)):
        if len(preds[i]['rois']) == 0:
            continue

        for j in range(len(preds[i]['rois'])):
            (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int)
            cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2)
            obj = obj_list[preds[i]['class_ids'][j]]
            score = float(preds[i]['scores'][j])
            if score >= threshold:
               cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score),
                        (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (255, 255, 0), 1)

        if imshow:
            cv2.imshow('img', imgs[i])
            cv2.waitKey(0)

        if imwrite:
            cv2.imwrite(f'o4_clip-1-OnnxRuntime_result.jpg', imgs[i])


out = invert_affine(framed_metas, out)
display(out, ori_imgs, imshow=False, imwrite=True)

上面注释掉的代码实际上是使用pytorch调用pt文件的常规写法，这是一般人最熟悉的，保留的是使用onnxruntime调用onnx的方式的写法。上面的代码中将模型的输出数据仿照trtexec的输出格式保存到onnxruntime_result.json中。