|
1 | | -import cv2 |
2 | 1 | import numpy as np |
| 2 | +import cv2 |
3 | 3 | import argparse |
4 | | -import time |
5 | | -from NanodetPlus import NanoDet |
| 4 | + |
| 5 | +from nanodet import NanoDet |
| 6 | + |
| 7 | +def str2bool(v): |
| 8 | + if v.lower() in ['on', 'yes', 'true', 'y', 't']: |
| 9 | + return True |
| 10 | + elif v.lower() in ['off', 'no', 'false', 'n', 'f']: |
| 11 | + return False |
| 12 | + else: |
| 13 | + raise NotImplementedError |
6 | 14 |
|
7 | 15 | backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA] |
8 | 16 | targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16] |
|
15 | 23 | help_msg_backends += "; {:d}: TIMVX" |
16 | 24 | help_msg_targets += "; {:d}: NPU" |
17 | 25 | except: |
18 | | - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/Sidd1609/5bb321c8733110ed613ec120c7c02e41 for more information.') |
19 | | - |
20 | | -classes = ( 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', |
21 | | - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', |
22 | | - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', |
23 | | - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', |
24 | | - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', |
25 | | - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', |
26 | | - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', |
27 | | - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', |
28 | | - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', |
29 | | - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', |
30 | | - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', |
31 | | - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', |
32 | | - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', |
33 | | - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' |
34 | | - ) |
35 | | - |
36 | | -def vis(preds, res_img): |
37 | | - if preds is not None: |
38 | | - image_shape = (416, 416) |
39 | | - top, left, newh, neww = 0, 0, image_shape[0], image_shape[1] |
40 | | - hw_scale = res_img.shape[0] / res_img.shape[1] |
| 26 | + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') |
| 27 | + |
| 28 | +classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', |
| 29 | + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', |
| 30 | + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', |
| 31 | + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', |
| 32 | + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', |
| 33 | + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', |
| 34 | + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', |
| 35 | + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', |
| 36 | + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', |
| 37 | + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', |
| 38 | + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', |
| 39 | + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', |
| 40 | + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', |
| 41 | + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') |
| 42 | + |
| 43 | +def letterbox(srcimg, target_size=(416, 416)): |
| 44 | + img = srcimg.copy() |
| 45 | + |
| 46 | + top, left, newh, neww = 0, 0, target_size[0], target_size[1] |
| 47 | + if img.shape[0] != img.shape[1]: |
| 48 | + hw_scale = img.shape[0] / img.shape[1] |
41 | 49 | if hw_scale > 1: |
42 | | - newh, neww = image_shape[0], int(image_shape[1] / hw_scale) |
43 | | - left = int((image_shape[1] - neww) * 0.5) |
| 50 | + newh, neww = target_size[0], int(target_size[1] / hw_scale) |
| 51 | + img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA) |
| 52 | + left = int((target_size[1] - neww) * 0.5) |
| 53 | + img = cv2.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv2.BORDER_CONSTANT, value=0) # add border |
44 | 54 | else: |
45 | | - newh, neww = int(image_shape[0] * hw_scale), image_shape[1] |
46 | | - top = int((image_shape[0] - newh) * 0.5) |
47 | | - |
48 | | - ratioh,ratiow = res_img.shape[0]/newh,res_img.shape[1]/neww |
49 | | - |
50 | | - det_bboxes = preds[0] |
51 | | - det_conf = preds[1] |
52 | | - det_classid = preds[2] |
53 | | - |
54 | | - for i in range(det_bboxes.shape[0]): |
55 | | - xmin, ymin, xmax, ymax = max(int((det_bboxes[i,0] - left) * ratiow), 0), max(int((det_bboxes[i,1] - top) * ratioh), 0), min( |
56 | | - int((det_bboxes[i,2] - left) * ratiow), res_img.shape[1]), min(int((det_bboxes[i,3] - top) * ratioh), res_img.shape[0]) |
57 | | - cv2.rectangle(res_img, (xmin, ymin), (xmax, ymax), (0, 0, 0), thickness=2) |
58 | | - #label = '%.2f' % det_conf[i] |
59 | | - label='' |
60 | | - label = '%s%s' % (classes[det_classid[i]], label) |
61 | | - labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
62 | | - top = max(top, labelSize[1]) |
63 | | - # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED) |
64 | | - cv2.putText(res_img, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) |
65 | | - |
| 55 | + newh, neww = int(target_size[0] * hw_scale), target_size[1] |
| 56 | + img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA) |
| 57 | + top = int((target_size[0] - newh) * 0.5) |
| 58 | + img = cv2.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0) |
66 | 59 | else: |
67 | | - print('No detections') |
| 60 | + img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA) |
| 61 | + |
| 62 | + letterbox_scale = [top, left, newh, neww] |
| 63 | + return img, letterbox_scale |
| 64 | + |
| 65 | +def unletterbox(bbox, original_image_shape, letterbox_scale): |
| 66 | + ret = bbox.copy() |
| 67 | + |
| 68 | + h, w = original_image_shape |
| 69 | + top, left, newh, neww = letterbox_scale |
| 70 | + |
| 71 | + if h == w: |
| 72 | + ratio = h / newh |
| 73 | + ret = ret * ratio |
| 74 | + return ret |
| 75 | + |
| 76 | + ratioh, ratiow = h / newh, w / neww |
| 77 | + ret[0] = max((ret[0] - left) * ratiow, 0) |
| 78 | + ret[1] = max((ret[1] - top) * ratioh, 0) |
| 79 | + ret[2] = min((ret[2] - left) * ratiow, w) |
| 80 | + ret[3] = min((ret[3] - top) * ratioh, h) |
| 81 | + |
| 82 | + return ret.astype(np.int32) |
| 83 | + |
| 84 | +def vis(preds, res_img, letterbox_scale, fps=None): |
| 85 | + ret = res_img.copy() |
68 | 86 |
|
69 | | - return res_img |
| 87 | + # draw FPS |
| 88 | + if fps is not None: |
| 89 | + fps_label = "FPS: %.2f" % fps |
| 90 | + cv2.putText(ret, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) |
| 91 | + |
| 92 | + # draw bboxes and labels |
| 93 | + for pred in preds: |
| 94 | + bbox = pred[:4] |
| 95 | + conf = pred[-2] |
| 96 | + classid = pred[-1].astype(np.int32) |
| 97 | + |
| 98 | + # bbox |
| 99 | + xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale) |
| 100 | + cv2.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2) |
| 101 | + |
| 102 | + # label |
| 103 | + label = "{:s}: {:.2f}".format(classes[classid], conf) |
| 104 | + cv2.putText(ret, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) |
| 105 | + |
| 106 | + return ret |
70 | 107 |
|
71 | 108 | if __name__=='__main__': |
72 | 109 | parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022') |
73 | | - parser.add_argument('--model', type=str, default='object_detection_nanodet-plus-m-1.5x-416.onnx', help="Path to the model") |
74 | | - parser.add_argument('--input_type', type=str, default='image', help="Input types: image or video") |
75 | | - parser.add_argument('--image_path', type=str, default='test2.jpg', help="Image path") |
| 110 | + parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') |
| 111 | + parser.add_argument('--model', '-m', type=str, default='object_detection_nanodet_2022nov.onnx', help="Path to the model") |
| 112 | + parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) |
| 113 | + parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) |
76 | 114 | parser.add_argument('--confidence', default=0.35, type=float, help='Class confidence') |
77 | 115 | parser.add_argument('--nms', default=0.6, type=float, help='Enter nms IOU threshold') |
78 | | - parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') |
| 116 | + parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') |
| 117 | + parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') |
79 | 118 | args = parser.parse_args() |
80 | | - model_net = NanoDet(modelPath= args.model ,prob_threshold=args.confidence, iou_threshold=args.nms) |
81 | 119 |
|
82 | | - if (args.input_type=="image"): |
83 | | - image = cv2.imread(args.image_path) |
84 | | - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
| 120 | + model = NanoDet(modelPath= args.model, |
| 121 | + prob_threshold=args.confidence, |
| 122 | + iou_threshold=args.nms, |
| 123 | + backend_id=args.backend, |
| 124 | + target_id=args.target) |
| 125 | + |
| 126 | + tm = cv2.TickMeter() |
| 127 | + tm.reset() |
| 128 | + if args.input is not None: |
| 129 | + image = cv2.imread(args.input) |
| 130 | + input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
85 | 131 |
|
86 | | - a = time.time() |
87 | | - preds = model_net.infer(image) |
88 | | - b = time.time() |
89 | | - print('Inference_Time:'+str(b-a)+' secs') |
| 132 | + # Letterbox transformation |
| 133 | + input_blob, letterbox_scale = letterbox(input_blob) |
90 | 134 |
|
91 | | - srcimg = vis(preds, image) |
| 135 | + # Inference |
| 136 | + tm.start() |
| 137 | + preds = model.infer(input_blob) |
| 138 | + tm.stop() |
| 139 | + print("Inference time: {:.2f} ms".format(tm.getTimeMilli())) |
92 | 140 |
|
93 | | - srcimg = cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB) |
94 | | - cv2.namedWindow(args.image_path, cv2.WINDOW_AUTOSIZE) |
95 | | - cv2.imshow(args.image_path, srcimg) |
96 | | - cv2.waitKey(0) |
| 141 | + img = vis(preds, image, letterbox_scale) |
97 | 142 |
|
98 | 143 | if args.save: |
99 | 144 | print('Resutls saved to result.jpg\n') |
100 | | - cv2.imwrite('result.jpg', srcimg) |
| 145 | + cv2.imwrite('result.jpg', img) |
101 | 146 |
|
102 | | - else: |
103 | | - print("Press 1 to stop video capture") |
104 | | - cap = cv2.VideoCapture(0) |
105 | | - tm = cv2.TickMeter() |
106 | | - frame_width = int(cap.get(3)) |
107 | | - frame_height = int(cap.get(4)) |
108 | | - size = (frame_width, frame_height) |
109 | | - total_frames = 0 |
| 147 | + if args.vis: |
| 148 | + cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE) |
| 149 | + cv2.imshow(args.input, img) |
| 150 | + cv2.waitKey(0) |
110 | 151 |
|
111 | | - if(args.save): |
112 | | - result = cv2.VideoWriter('Webcam_result.avi', cv2.VideoWriter_fourcc(*'MJPG'),10, size) |
| 152 | + else: |
| 153 | + print("Press any key to stop video capture") |
| 154 | + deviceId = 0 |
| 155 | + cap = cv2.VideoCapture(deviceId) |
113 | 156 |
|
114 | 157 | while cv2.waitKey(1) < 0: |
115 | 158 | hasFrame, frame = cap.read() |
116 | 159 | if not hasFrame: |
117 | 160 | print('No frames grabbed!') |
118 | 161 | break |
119 | 162 |
|
120 | | - frame = cv2.flip(frame, 1) |
121 | | - #frame = cv2.resize(frame, [args.width, args.height]) |
| 163 | + input_blob = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
| 164 | + input_blob, letterbox_scale = letterbox(input_blob) |
122 | 165 | # Inference |
123 | 166 | tm.start() |
124 | | - preds = model_net.infer(frame) |
| 167 | + preds = model.infer(input_blob) |
125 | 168 | tm.stop() |
126 | 169 |
|
127 | | - srcimg = vis(preds, frame) |
128 | | - |
129 | | - total_frames += 1 |
130 | | - fps=tm.getFPS() |
131 | | - |
132 | | - if fps > 0: |
133 | | - fps_label = "FPS: %.2f" % fps |
134 | | - cv2.putText(srcimg, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) |
135 | | - |
136 | | - cv2.imshow("output", srcimg) |
137 | | - |
138 | | - if cv2.waitKey(1) < 0: |
139 | | - print("Stream terminated") |
140 | | - break |
| 170 | + img = vis(preds, frame, letterbox_scale, fps=tm.getFPS()) |
141 | 171 |
|
142 | | - if(args.save): |
143 | | - result.write(frame) |
| 172 | + cv2.imshow("NanoDet Demo", img) |
144 | 173 |
|
145 | | - print("Total frames: " + str(total_frames)) |
| 174 | + tm.reset() |
0 commit comments