-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathget_repp_predictions.py
executable file
·204 lines (150 loc) · 8.14 KB
/
get_repp_predictions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 14 18:17:03 2020
@author: asabater
"""
# =============================================================================
# Script to make YOLO predictions either form a video or annotations file
# =============================================================================
import os
import json
import numpy as np
from tqdm import tqdm
from PIL import Image
import pickle
import argparse
import sys
sys.path.append('keras_yolo3/')
sys.path.append('keras_yolo3/yolo3/')
import keras_yolo3.train as ktrain
sys.path.append('../..')
from roi_nn import load_branch_body
from eyolo import load_yolo_model_raw
def video_iterator(video_file):
import cv2
vid = cv2.VideoCapture(video_file)
num_frame = 0
while True:
ret, frame = vid.read()
if not ret: break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = Image.fromarray(frame)
num_frame += 1
yield image, '{:06d}'.format(num_frame), False
def annotations_iterator(annotations_file, path_dataset):
with open(annotations_file, 'r') as f: annotations = sorted(f.read().splitlines())
for ann in tqdm(annotations):
image_id = ann.split()[0]
image = Image.open(path_dataset + image_id)
image_id = '.'.join(image_id.split('.')[:-1])
vid = '/'.join(image_id.split('/')[:-1])
yield image, image_id, vid
# Calculates YOLOv3 predictions given a data iterator
# Each video is dumped after its processing to a common pickle file. Pickle stream
# If an appearance embedding model is provided and add_appearance is set,
# appearance vectors are stored with the predictions
def get_scores_predictions_and_embeddings(store_filename, base_model, branch_model, add_appearance, repp_format):
# Open pickle stream file
file_writter = open(store_filename, 'wb')
preds_video = {}
last_video = ''
for img, image_id, vid in iterator:
frame_id = image_id.split('/')[-1]
# Video finished. Pickle dumping
if last_video != vid and last_video != '':
pickle.dump((last_video, preds_video), file_writter)
preds_video = {}
last_video = vid
last_video = vid
img_size = img.size
ih, iw = img_size[::-1]
width_diff = max(0, (ih-iw)//2)
height_diff = max(0, (iw-ih)//2)
# Compute image values for the RoI extraction from the feature maps
if add_appearance:
h = w = image_size[0] // downsample_rate
scale = min(w/iw, h/ih)
nw, nh = int(iw*scale), int(ih*scale)
dx, dy = (w-nw)//2, (h-nh)//2
# Get YOLO predictions
preds = base_model.get_prediction(img)
preds_frame = []
for i in range(len(preds[0])):
# Compute bbox center
y_min, x_min, y_max, x_max = preds[0][i]
y_min, x_min = max(0, y_min), max(0, x_min)
y_max, x_max = min(img_size[1], y_max), min(img_size[0], x_max)
width, height = x_max - x_min, y_max - y_min
if width <= 0 or height <= 0: continue
bbox_center = [ (x_min + width_diff + width/2)/max(iw,ih),
(y_min + height_diff + height/2)/max(iw,ih)]
# Initialize predictions
pred = { 'image_id': image_id, 'bbox': [ x_min, y_min, width, height ], 'bbox_center': bbox_center }
# Compute the appearance embedding vectors
if add_appearance:
roi_x_min, roi_y_min = dx + x_min*scale, dy + y_min*scale
roi_width = width*scale; roi_height = height*scale
roi_width = max(1., roi_width); roi_height = max(1., roi_height)
emb = branch_model.predict([preds[2][0], np.array([[[roi_x_min, roi_y_min, roi_width, roi_height]]])])[0]
pred['emb'] = emb
# Scores are given by a vector
if repp_format:
pred['scores'] = preds[1][i]
# Single score and category provided
else:
pred['score'] = float(preds[1][i])
pred['category_id']: int(preds[2][i])
preds_frame.append(pred)
preds_video[frame_id] = preds_frame
# Video finished. Pickle dumping
pickle.dump((last_video, preds_video), file_writter)
preds_video = {}
file_writter.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Perform YOLOv3 predictions either from an annotations file or a video')
parser.add_argument('--yolo_path', required=True, type=str, help='path to the trained YOLO folder')
parser.add_argument('--score', type=float, default=0.005, help='therhold to filter out low-scoring predictions')
parser.add_argument('--iou_thr', type=float, default=0.5, help='IoU threshold')
parser.add_argument('--max_boxes', type=int, default=20, help='maximum boxes per image')
parser.add_argument('--repp_format', action='store_true', help='store the predictions in REPP format (with an array of class confidence scores instead of one predictions per class)')
parser.add_argument('--add_appearance', action='store_true', help='compute one appearance embedding for each detection')
parser.add_argument('--from_video', type=str, required=False, help='path of the video to perform predictions')
parser.add_argument('--from_annotations', type=str, required=False, help='path to the annotations file to perform predictions')
parser.add_argument('--dataset_path', type=str, required=False, help='path to the dataset images. Required when making predictions form annotations')
args = parser.parse_args()
assert not (args.from_video is not None and args.from_annotations is not None), 'Only one data source (video or annotations) must be specified'
assert args.from_video is not None or args.from_annotations is not None, 'One data source (video or annotations) must be specified'
assert not (args.from_annotations is not None and args.dataset_path is None), 'Dataset path of the annotations data must be specified'
# Load YOLO settings
train_params = json.load(open(args.yolo_path + 'train_params.json', 'r'))
path_weights = args.yolo_path + 'weights/weights.h5'
image_size = train_params['input_shape']
if not args.repp_format and add_appearance:
print(' * REPP format not specified. Suppressing appearance computation')
args.add_appearance = False
# Load appearance embeddings model
if args.add_appearance:
path_roi_model = args.yolo_path + 'embedding_model/'
path_roi_model_params = json.load(open(path_roi_model+'train_params.json', 'r'))
downsample_rate = path_roi_model_params['downsample_rate']
branch_model = load_branch_body(path_roi_model)
else: downsample_rate, branch_model = None, None
# Load YOLO model
base_model, _ = load_yolo_model_raw(args.yolo_path, path_weights, image_size, args.repp_format,
downsample_rate, args.score, args.iou_thr, args.max_boxes)
if args.from_video is not None:
iterator = video_iterator(args.from_video)
store_filename = './predictions/preds{}{}_{}.pckl'.format(
'_repp' if args.repp_format else '',
'_app' if args.add_appearance else '',
args.from_video.split('/')[-1].split('.')[0])
# raise ValueError('Not implemented')
else:
iterator = annotations_iterator(args.from_annotations, args.dataset_path)
store_filename = './predictions/preds{}{}_{}.pckl'.format(
'_repp' if args.repp_format else '',
'_app' if args.add_appearance else '',
args.from_annotations.split('/')[-1].split('.')[0])
get_scores_predictions_and_embeddings(store_filename, base_model, branch_model, args.add_appearance, args.repp_format)
print('Predictions stored:', store_filename)