Skip to content

Commit 1c0813a

Browse files
mtarngandrebriggs
authored andcommitted
Downloading tags and existing classifications. Also new function to download tagged image data for training. (#42)
* Getting tag queries back. Need to construct into data map, also eventually remove module getting hack and creds * Have working list of imageTags returning now * Download of existing tags working now. Unsure about filenames all being integers only though? Works currently but upload function needs to be checked * Retreving existing tags in db * scrubbing creds * comment cleanup * creating function to download tagged images and their data * print -> logging * feedback
1 parent 80a2c29 commit 1c0813a

File tree

7 files changed

+275
-29
lines changed

7 files changed

+275
-29
lines changed

functions/pipeline/download/__init__.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,19 @@ def main(req: func.HttpRequest) -> func.HttpResponse:
3535
# DB configuration
3636
data_access = ImageTagDataAccess(get_postgres_provider())
3737
user_id = data_access.create_user(user_name)
38-
image_urls = list(data_access.get_new_images(image_count, user_id))
38+
image_id_to_urls = data_access.get_images_for_tagging(image_count, user_id)
39+
image_urls = list(image_id_to_urls.values())
3940

40-
# TODO: Populate starting json with tags, if any exist... (precomputed or retagging?)
41-
vott_json = create_starting_vott_json(image_urls)
41+
image_id_to_image_tags = {}
42+
for image_id in image_id_to_urls.keys():
43+
image_id_to_image_tags[image_id] = data_access.get_image_tags(image_id)
4244

43-
return_body_json = {"imageUrls": image_urls, "vottJson": vott_json}
45+
existing_classifications_list = data_access.get_existing_classifications()
46+
47+
vott_json = create_starting_vott_json(image_id_to_urls, image_id_to_image_tags, existing_classifications_list)
48+
49+
return_body_json = {"imageUrls": image_urls,
50+
"vottJson": vott_json}
4451

4552
content = json.dumps(return_body_json)
4653
return func.HttpResponse(

functions/pipeline/shared/db_access/db_access_v2.py

Lines changed: 135 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
1-
# import sys
21
import string
3-
# import os
4-
# import time
52
import logging
63
import random
74
from enum import IntEnum, unique
85
import getpass
96
import itertools
10-
117
from ..db_provider import DatabaseInfo, PostGresProvider
128

9+
1310
@unique
1411
class ImageTagState(IntEnum):
1512
NOT_READY = 0
@@ -19,6 +16,7 @@ class ImageTagState(IntEnum):
1916
INCOMPLETE_TAG = 4
2017
ABANDONED = 5
2118

19+
2220
# An entity class for a VOTT image
2321
class ImageInfo(object):
2422
def __init__(self, image_name, image_location, height, width):
@@ -27,14 +25,25 @@ def __init__(self, image_name, image_location, height, width):
2725
self.height = height
2826
self.width = width
2927

28+
29+
# Entity class for Tags stored in DB
3030
class ImageTag(object):
3131
def __init__(self, image_id, x_min, x_max, y_min, y_max, classification_names):
32-
self.image_id = image_id
33-
self.x_min = x_min
34-
self.x_max = x_max
35-
self.y_min = y_min
36-
self.y_max = y_max
37-
self.classification_names = classification_names
32+
self.image_id = image_id
33+
self.x_min = x_min
34+
self.x_max = x_max
35+
self.y_min = y_min
36+
self.y_max = y_max
37+
self.classification_names = classification_names
38+
39+
40+
# Vott tags have image height & width data as well.
41+
class VottImageTag(ImageTag):
42+
def __init__(self, image_id, x_min, x_max, y_min, y_max, classification_names, image_height, image_width):
43+
super().__init__(image_id, x_min, x_max, y_min, y_max, classification_names)
44+
self.image_height = image_height
45+
self.image_width = image_width
46+
3847

3948
class ImageTagDataAccess(object):
4049
def __init__(self, db_provider):
@@ -69,7 +78,7 @@ def create_user(self,user_name):
6978
finally: conn.close()
7079
return user_id
7180

72-
def get_new_images(self, number_of_images, user_id):
81+
def get_images_for_tagging(self, number_of_images, user_id):
7382
if number_of_images <= 0:
7483
raise ArgumentException("Parameter must be greater than zero")
7584

@@ -84,14 +93,16 @@ def get_new_images(self, number_of_images, user_id):
8493
cursor.execute(query.format(number_of_images, ImageTagState.READY_TO_TAG, ImageTagState.INCOMPLETE_TAG))
8594
for row in cursor:
8695
logging.debug('Image Id: {0} \t\tImage Name: {1} \t\tTag State: {2}'.format(row[0], row[1], row[2]))
87-
selected_images_to_tag[str(row[0])] = str(row[1])
96+
selected_images_to_tag[row[0]] = str(row[1])
8897
self._update_images(selected_images_to_tag,ImageTagState.TAG_IN_PROGRESS, user_id, conn)
89-
finally: cursor.close()
98+
finally:
99+
cursor.close()
90100
except Exception as e:
91101
logging.error("An errors occured getting images: {0}".format(e))
92102
raise
93-
finally: conn.close()
94-
return selected_images_to_tag.values()
103+
finally:
104+
conn.close()
105+
return selected_images_to_tag
95106

96107
def add_new_images(self,list_of_image_infos, user_id):
97108

@@ -119,6 +130,106 @@ def add_new_images(self,list_of_image_infos, user_id):
119130
finally: conn.close()
120131
return url_to_image_id_map
121132

133+
def get_tag_complete_images(self, number_of_images, user_id):
134+
if number_of_images <= 0:
135+
raise ArgumentException("Parameter must be greater than zero")
136+
137+
tag_complete_images = {}
138+
try:
139+
conn = self._db_provider.get_connection()
140+
try:
141+
cursor = conn.cursor()
142+
query = ("SELECT b.ImageId, b.ImageLocation, a.TagStateId FROM Image_Tagging_State a "
143+
"JOIN Image_Info b ON a.ImageId = b.ImageId WHERE a.TagStateId = {1} order by "
144+
"a.createddtim DESC limit {0}")
145+
cursor.execute(query.format(number_of_images, ImageTagState.COMPLETED_TAG))
146+
for row in cursor:
147+
logging.debug('Image Id: {0} \t\tImage Name: {1} \t\tTag State: {2}'.format(row[0], row[1], row[2]))
148+
tag_complete_images[row[0]] = str(row[1])
149+
finally:
150+
cursor.close()
151+
except Exception as e:
152+
logging.error("An errors occured getting images: {0}".format(e))
153+
raise
154+
finally:
155+
conn.close()
156+
return tag_complete_images
157+
158+
def get_image_tags(self, image_id):
159+
if type(image_id) is not int:
160+
raise TypeError('image_id must be an integer')
161+
162+
try:
163+
conn = self._db_provider.get_connection()
164+
try:
165+
cursor = conn.cursor()
166+
query = ("SELECT image_tags.imagetagid, image_info.imageid, x_min, x_max, y_min, y_max, "
167+
"classification_info.classificationname, image_info.height, image_info.width "
168+
"FROM image_tags "
169+
"inner join tags_classification on image_tags.imagetagid = tags_classification.imagetagid "
170+
"inner join classification_info on tags_classification.classificationid = classification_info.classificationid "
171+
"inner join image_info on image_info.imageid = image_tags.imageid "
172+
"WHERE image_tags.imageid = {0};")
173+
cursor.execute(query.format(image_id,))
174+
175+
logging.debug("Got image tags back for image_id={}".format(image_id))
176+
tag_id_to_VottImageTag = self.__build_id_to_VottImageTag(cursor)
177+
178+
finally:
179+
cursor.close()
180+
except Exception as e:
181+
logging.error("An error occurred getting image tags for image id = {0}: {1}".format(image_id, e))
182+
raise
183+
finally:
184+
conn.close()
185+
return list(tag_id_to_VottImageTag.values())
186+
187+
def __build_id_to_VottImageTag(self, tag_db_cursor):
188+
tag_id_to_VottImageTag = {}
189+
try :
190+
for row in tag_db_cursor:
191+
logging.debug(row)
192+
tag_id = row[0]
193+
if tag_id in tag_id_to_VottImageTag:
194+
logging.debug("Existing ImageTag found, appending classification {}", row[6])
195+
tag_id_to_VottImageTag[tag_id].classification_names.append(row[6].strip())
196+
else:
197+
logging.debug("No existing ImageTag found, creating new ImageTag: "
198+
"id={0} x_min={1} x_max={2} x_min={3} x_max={4} classification={5} "
199+
"image_height={6} image_width={7}"
200+
.format(row[1], float(row[2]), float(row[3]), float(row[4]), float(row[5]),
201+
[row[6].strip()], row[7], row[8]))
202+
tag_id_to_VottImageTag[tag_id] = VottImageTag(row[1], float(row[2]), float(row[3]),
203+
float(row[4]), float(row[5]), [row[6].strip()],
204+
row[7], row[8])
205+
except Exception as e:
206+
logging.error("An error occurred building VottImageTag dict: {0}".format(e))
207+
raise
208+
return tag_id_to_VottImageTag
209+
210+
211+
def get_existing_classifications(self):
212+
try:
213+
conn = self._db_provider.get_connection()
214+
try:
215+
cursor = conn.cursor()
216+
query = "SELECT classificationname from classification_info order by classificationname asc"
217+
cursor.execute(query)
218+
219+
classification_set = set()
220+
for row in cursor:
221+
logging.debug(row)
222+
classification_set.add(row[0])
223+
logging.debug("Got back {0} classifications existing in db.".format(len(classification_set)))
224+
finally:
225+
cursor.close()
226+
except Exception as e:
227+
logging.error("An error occurred getting classifications from DB: {0}".format(e))
228+
raise
229+
finally:
230+
conn.close()
231+
return list(classification_set)
232+
122233
def update_incomplete_images(self, list_of_image_ids, user_id):
123234
#TODO: Make sure the image ids are in a TAG_IN_PROGRESS state
124235
self._update_images(list_of_image_ids,ImageTagState.INCOMPLETE_TAG,user_id, self._db_provider.get_connection())
@@ -229,6 +340,12 @@ def main():
229340
# Checking in images been tagged
230341
#################################################################
231342

343+
# import sys
344+
# import os
345+
# sys.path.append("..")
346+
# sys.path.append(os.path.abspath('db_provider'))
347+
# from db_provider import DatabaseInfo, PostGresProvider
348+
232349
#Replace me for testing
233350
db_config = DatabaseInfo("","","","")
234351
data_access = ImageTagDataAccess(PostGresProvider(db_config))
@@ -241,7 +358,9 @@ def main():
241358
image_tags = generate_test_image_tags(list(url_to_image_id_map.values()),4,4)
242359
data_access.update_tagged_images(image_tags,user_id)
243360

244-
TestClassifications = ("maine coon","german shephard","goldfinch","mackerel"," african elephant","rattlesnake")
361+
362+
TestClassifications = ("maine coon","german shephard","goldfinch","mackerel","african elephant","rattlesnake")
363+
245364

246365
def generate_test_image_infos(count):
247366
list_of_image_infos = []

functions/pipeline/shared/db_access/test_db_access_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def test_get_new_images_bad_request(self):
7474
with self.assertRaises(ArgumentException):
7575
data_access = ImageTagDataAccess(MockDBProvider())
7676
num_of_images = -5
77-
data_access.get_new_images(num_of_images,5)
77+
data_access.get_images_for_tagging(num_of_images, 5)
7878

7979
def test_add_new_images_user_id_type_error(self):
8080
with self.assertRaises(TypeError):

functions/pipeline/shared/vott_parser/vott_parser.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,68 @@
11
import json
22

3-
def __build_frames_data(images):
3+
4+
def __build_tag_from_VottImageTag(image_tag):
5+
return {
6+
"x1": image_tag.x_min,
7+
"x2": image_tag.x_max,
8+
"y1": image_tag.y_min,
9+
"y2": image_tag.y_max,
10+
"width": image_tag.image_width,
11+
"height": image_tag.image_height,
12+
"tags": image_tag.classification_names
13+
}
14+
15+
16+
def __build_tag_list_from_VottImageTags(image_tag_list):
17+
tag_list = []
18+
for image_tag in image_tag_list:
19+
tag_list.append(__build_tag_from_VottImageTag(image_tag))
20+
return tag_list
21+
22+
23+
def __build_frames_data(image_id_to_urls, image_id_to_image_tags):
424
frames = {}
5-
for filename in images:
6-
# TODO: Build tag data per frame if they exist already
7-
frames[__get_filename_from_fullpath(filename)] = [] #list of tags
25+
for image_id in image_id_to_image_tags.keys():
26+
image_file_name = __get_filename_from_fullpath(image_id_to_urls[image_id])
27+
image_tags = __build_tag_list_from_VottImageTags(image_id_to_image_tags[image_id])
28+
frames[image_file_name] = image_tags
829
return frames
930

31+
1032
# For download function
11-
def create_starting_vott_json(images):
33+
def create_starting_vott_json(image_id_to_urls, image_id_to_image_tags, existing_classifications_list):
34+
# "frames"
35+
frame_to_tag_list_map = __build_frames_data(image_id_to_urls, image_id_to_image_tags)
36+
37+
# "inputTags"
38+
classification_str = ""
39+
for classification in existing_classifications_list:
40+
classification_str += classification + ","
41+
1242
return {
13-
"frames": __build_frames_data(images),
14-
"inputTags": "", # TODO: populate classifications that exist in db already
43+
"frames": frame_to_tag_list_map,
44+
"inputTags": classification_str,
1545
"scd": False # Required for VoTT and image processing? unknown if it's also used for video.
1646
}
1747

48+
1849
def __get_filename_from_fullpath(filename):
1950
path_components = filename.split('/')
2051
return path_components[-1]
2152

53+
2254
def __get_id_from_fullpath(fullpath):
2355
return int(__get_filename_from_fullpath(fullpath).split('.')[0])
2456

57+
2558
# Returns a list of processed tags for a single frame
2659
def __create_tag_data_list(json_tag_list):
2760
processed_tags = []
2861
for json_tag in json_tag_list:
2962
processed_tags.append(__process_json_tag(json_tag))
3063
return processed_tags
3164

65+
3266
def __process_json_tag(json_tag):
3367
return {
3468
"x1": json_tag['x1'],
@@ -42,6 +76,7 @@ def __process_json_tag(json_tag):
4276
"name": json_tag["name"]
4377
}
4478

79+
4580
# For upload function
4681
def process_vott_json(json):
4782
all_frame_data = json['frames']
@@ -79,6 +114,7 @@ def process_vott_json(json):
79114
"imageIdToTags": id_to_tags_dict
80115
}
81116

117+
82118
def main():
83119
images = {
84120
"1.png" : {},
@@ -121,7 +157,6 @@ def main():
121157
# add_tag_to_db('something', 2, (tag_data))
122158

123159

124-
125160
# Currently only used for testing...
126161
# returns a json representative of a tag given relevant components
127162
def __build_json_tag(x1, x2, y1, y2, img_width, img_height, UID, id, type, tags, name):
@@ -145,5 +180,6 @@ def __build_json_tag(x1, x2, y1, y2, img_width, img_height, UID, id, type, tags,
145180
"name": name
146181
}
147182

183+
148184
if __name__ == '__main__':
149185
main()

0 commit comments

Comments
 (0)