Skip to content

Commit 761fdb3

Browse files
authored
Upload json tagging data, update upload/download functions to use singular data_access library and configuration. (#35)
* Upload flow reworked to use new db_accessv2. Fixing db update bug from string subsitution. Upload handles visit&tag, visit&notag, novisit Saving untagged to INCOMPLETE_TAG state works through local func testing cleaned up saving untagged images a bit Saving tags to DB also works Working upload flow with visit&tag, visit&notag, novisit removing creds restructing vott_json_parser file. Deleting jsonpickle version Moving things around for importclean up purposes. Upload function still working, need to refactor download and check prior existing tests pytest working, had to rename db_access_v1 to fix conflicts Some more cleanup and restructring. Tested working through upload function and pytest * Working on refactoring download function, saving work. Need to get images from db then return Reverting some accidental renaming in other directories Removing vscode files download working, still need to 1. combine db config for all 3 functions, 2. write tests Download now works with new db_access_v2. Need to create shared_db config ditto prior * single db configuration minor spacing * updating onboarding function to use new shared directory * temp commenting out onboarding/onboarding-client.py as it breaks pytests. Should probably remove hardcoded resources * feedback * minor TODO comment reshuffling * removing deprecated db_access_v1
1 parent 9d0a45c commit 761fdb3

File tree

15 files changed

+311
-257
lines changed

15 files changed

+311
-257
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,6 @@ tag/*.csv
118118

119119
# TF exported graph files
120120
.pb
121+
122+
# VSCode
123+
.vscode/*.json

functions/pipeline/download/__init__.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,54 @@
22

33
import azure.functions as func
44
import json
5-
import os
65

7-
from ..shared import db_access as DB_Access
8-
from ..shared import vott_json_parser as vott_json_parser
6+
from ..shared.vott_parser import create_starting_vott_json
7+
from ..shared.db_provider import get_postgres_provider
8+
from ..shared.db_access import ImageTagDataAccess
9+
910

1011
def main(req: func.HttpRequest) -> func.HttpResponse:
1112
logging.info('Python HTTP trigger function processed a request.')
1213

13-
imageCount = req.params.get('imageCount')
14+
image_count = int(req.params.get('imageCount'))
15+
user_id = int(req.params.get('userId'))
16+
1417
# setup response object
1518
headers = {
1619
"content-type": "application/json"
1720
}
18-
if not imageCount:
21+
if not user_id:
22+
return func.HttpResponse(
23+
status_code=401,
24+
headers=headers,
25+
body=json.dumps({"error": "invalid userId given or omitted"})
26+
)
27+
elif not image_count:
1928
return func.HttpResponse(
2029
status_code=400,
2130
headers=headers,
2231
body=json.dumps({"error": "image count not specified"})
2332
)
2433
else:
25-
# setup response object
26-
connection = DB_Access.get_connection()
27-
# TODO: images need more meaningful data than just download urls
28-
image_urls = DB_Access.get_images_for_tagging(connection, imageCount)
34+
try:
35+
# DB configuration
36+
data_access = ImageTagDataAccess(get_postgres_provider())
2937

30-
# TODO: Build vott json
31-
vott_json = vott_json_parser.create_starting_json(image_urls)
38+
image_urls = list(data_access.get_new_images(image_count, user_id))
3239

33-
return_body_json = {"imageUrls": image_urls, "vottJson": vott_json}
40+
# TODO: Populate starting json with tags, if any exist... (precomputed or retagging?)
41+
vott_json = create_starting_vott_json(image_urls)
3442

35-
content = json.dumps(return_body_json)
36-
return func.HttpResponse(
37-
status_code=200,
38-
headers=headers,
39-
body=content
40-
)
43+
return_body_json = {"imageUrls": image_urls, "vottJson": vott_json}
44+
45+
content = json.dumps(return_body_json)
46+
return func.HttpResponse(
47+
status_code=200,
48+
headers=headers,
49+
body=content
50+
)
51+
except Exception as e:
52+
return func.HttpResponse(
53+
"exception:" + str(e),
54+
status_code=500
55+
)

functions/pipeline/onboarding/__init__.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
import os
22
import logging
33
import azure.functions as func
4-
from ..shared import db_access_v2 as DB_Access_V2
5-
from azure.storage.blob import BlockBlobService, ContentSettings
4+
5+
from ..shared.db_provider import get_postgres_provider
6+
from ..shared.db_access import ImageTagDataAccess, ImageInfo
7+
from azure.storage.blob import BlockBlobService
68

79
# TODO: User id as param to function - holding off until further discussion
810
# regarding whether user ID should be generated/looked up by the CLI or
911
# from within this function
1012

11-
default_db_host = ""
12-
default_db_name = ""
13-
default_db_user = ""
14-
default_db_pass = ""
15-
1613
def main(req: func.HttpRequest) -> func.HttpResponse:
1714
logging.info('Python HTTP trigger function processed a request.')
1815

@@ -41,19 +38,17 @@ def main(req: func.HttpRequest) -> func.HttpResponse:
4138
# Create ImageInfo object (def in db_access.py)
4239
# Note: For testing, default image height/width are set to 50x50
4340
# TODO: Figure out where actual height/width need to come from
44-
image = DB_Access_V2.ImageInfo(original_filename, url, 50, 50)
41+
image = ImageInfo(original_filename, url, 50, 50)
4542
# Append image object to the list
4643
image_object_list.append(image)
4744

4845
# TODO: Wrap db access section in try/catch, send an appropriate http response in the event of an error
4946
logging.info("Now connecting to database...")
50-
db_config = DB_Access_V2.DatabaseInfo(os.getenv('DB_HOST', default_db_host), os.getenv('DB_NAME', default_db_name), os.getenv('DB_USER', default_db_user), os.getenv('DB_PASS', default_db_pass))
51-
data_access = DB_Access_V2.ImageTagDataAccess(DB_Access_V2.PostGresProvider(db_config))
47+
data_access = ImageTagDataAccess(get_postgres_provider())
5248
logging.info("Connected.")
5349

5450
# Create user id
55-
user_id = data_access.create_user(DB_Access_V2.getpass.getuser())
56-
logging.info("The user id for '{0}' is {1}".format(DB_Access_V2.getpass.getuser(),user_id))
51+
user_id = data_access.create_user("testuser") # TODO: remove this hardcoding, should be passed in the request.
5752

5853
# Add new images to the database, and retrieve a dictionary ImageId's mapped to ImageUrl's
5954
image_id_url_map = data_access.add_new_images(image_object_list,user_id)
Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,39 @@
1-
import requests
2-
import json
3-
import pg8000
4-
5-
# The following mock client imitates the CLI during the onboarding scenario for new images.
6-
# The expectation is that the CLI uploads images to a temporary blob store, then gets a list
7-
# of URLs to those images and passes the list to an HTTP trigger function in the format of
8-
# a JSON string. The HTTP trigger function creates rows in the database for the images,
9-
# retrieves the ImageId's for them, and then copies the images, each renamed as "ImageId.extension",
10-
# into a permanent blob storage container. The HTTP function returns the list of URLs to
11-
# the images in permanent blob storage.
12-
13-
print("\nTest client for CLI Onboarding scenario")
14-
print('-' * 40)
15-
16-
# functionURL = "https://onboardinghttptrigger.azurewebsites.net/api/onboarding?code=lI1zl4IhiHcOcxTS85RsE7yZJXeNRxnr7tXSO1SrLWdpiN0W6hT3Jw=="
17-
functionURL = "http://localhost:7071/api/onboarding"
18-
# Sean's function URL:
19-
# functionURL = "https://onboardinghttptrigger.azurewebsites.net/api/onboarding?code=lI1zl4IhiHcOcxTS85RsE7yZJXeNRxnr7tXSO1SrLWdpiN0W6hT3Jw=="
20-
# functionURL = "https://abrig-linux-func.azurewebsites.net/api/onboarding"
21-
22-
urlList = { "imageUrls": ["https://akaonboardingstorage.blob.core.windows.net/aka-temp-source-container/puppies1.jpg",
23-
"https://akaonboardingstorage.blob.core.windows.net/aka-temp-source-container/puppies2.jpg",
24-
"https://akaonboardingstorage.blob.core.windows.net/aka-temp-source-container/puppies3.jpg"] }
25-
26-
headers = {"Content-Type": "application/json"}
27-
28-
print("Now executing POST request to onboard images...to:")
29-
print("Function URL: " + functionURL)
30-
print("Headers:")
31-
for key, value in headers.items():
32-
print("\t" + key + ": " + value)
33-
response = requests.post(url=functionURL, headers=headers, json=urlList)
34-
print("Completed POST request.")
35-
36-
raw_response = response.text
37-
response_array = raw_response.split(", ")
38-
response_output = "\n".join(response_array)
39-
40-
print(f"Response status code: {response.status_code}")
41-
print(f"Response string: {response_output}")
1+
# import requests
2+
#
3+
# # The following mock client imitates the CLI during the onboarding scenario for new images.
4+
# # The expectation is that the CLI uploads images to a temporary blob store, then gets a list
5+
# # of URLs to those images and passes the list to an HTTP trigger function in the format of
6+
# # a JSON string. The HTTP trigger function creates rows in the database for the images,
7+
# # retrieves the ImageId's for them, and then copies the images, each renamed as "ImageId.extension",
8+
# # into a permanent blob storage container. The HTTP function returns the list of URLs to
9+
# # the images in permanent blob storage.
10+
#
11+
# print("\nTest client for CLI Onboarding scenario")
12+
# print('-' * 40)
13+
#
14+
# # functionURL = "https://onboardinghttptrigger.azurewebsites.net/api/onboarding?code=lI1zl4IhiHcOcxTS85RsE7yZJXeNRxnr7tXSO1SrLWdpiN0W6hT3Jw=="
15+
# functionURL = "http://localhost:7071/api/onboarding"
16+
# # Sean's function URL:
17+
# # functionURL = "https://onboardinghttptrigger.azurewebsites.net/api/onboarding?code=lI1zl4IhiHcOcxTS85RsE7yZJXeNRxnr7tXSO1SrLWdpiN0W6hT3Jw=="
18+
# # functionURL = "https://abrig-linux-func.azurewebsites.net/api/onboarding"
19+
#
20+
# urlList = { "imageUrls": ["https://akaonboardingstorage.blob.core.windows.net/aka-temp-source-container/puppies1.jpg",
21+
# "https://akaonboardingstorage.blob.core.windows.net/aka-temp-source-container/puppies2.jpg",
22+
# "https://akaonboardingstorage.blob.core.windows.net/aka-temp-source-container/puppies3.jpg"] }
23+
#
24+
# headers = {"Content-Type": "application/json"}
25+
#
26+
# print("Now executing POST request to onboard images...to:")
27+
# print("Function URL: " + functionURL)
28+
# print("Headers:")
29+
# for key, value in headers.items():
30+
# print("\t" + key + ": " + value)
31+
# response = requests.post(url=functionURL, headers=headers, json=urlList)
32+
# print("Completed POST request.")
33+
#
34+
# raw_response = response.text
35+
# response_array = raw_response.split(", ")
36+
# response_output = "\n".join(response_array)
37+
#
38+
# print(f"Response status code: {response.status_code}")
39+
# print(f"Response string: {response_output}")

functions/pipeline/shared/__init__.py

Whitespace-only changes.

functions/pipeline/shared/db_access.py

Lines changed: 0 additions & 97 deletions
This file was deleted.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .db_access_v2 import ImageTagDataAccess, ImageTag, ImageInfo, ImageTagState

functions/pipeline/shared/db_access_v2.py renamed to functions/pipeline/shared/db_access/db_access_v2.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from enum import IntEnum, unique
77
import getpass
88
import itertools
9-
from .db_provider import DatabaseInfo, PostGresProvider
9+
10+
from ..db_provider import DatabaseInfo, PostGresProvider
1011

1112
@unique
1213
class ImageTagState(IntEnum):
@@ -76,11 +77,10 @@ def get_new_images(self, number_of_images, user_id):
7677
conn = self._db_provider.get_connection()
7778
try:
7879
cursor = conn.cursor()
79-
# TODO: Should we add TagStateId = INCOMPLETE_TAG also for fetching images?
8080
query = ("SELECT b.ImageId, b.ImageLocation, a.TagStateId FROM Image_Tagging_State a "
81-
"JOIN Image_Info b ON a.ImageId = b.ImageId WHERE a.TagStateId = 1 order by "
81+
"JOIN Image_Info b ON a.ImageId = b.ImageId WHERE a.TagStateId IN ({1}, {2}) order by "
8282
"a.createddtim DESC limit {0}")
83-
cursor.execute(query.format(number_of_images))
83+
cursor.execute(query.format(number_of_images, ImageTagState.READY_TO_TAG, ImageTagState.INCOMPLETE_TAG))
8484
for row in cursor:
8585
print('Image Id: {0} \t\tImage Name: {1} \t\tTag State: {2}'.format(row[0], row[1], row[2]))
8686
selected_images_to_tag[str(row[0])] = str(row[1])

functions/pipeline/shared/test_db_access_v2.py renamed to functions/pipeline/shared/db_access/test_db_access_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from unittest.mock import patch
33
from unittest.mock import Mock
44

5-
from db_access_v2 import(
5+
from .db_access_v2 import(
66
ImageTagDataAccess,
77
ArgumentException,
88
ImageTagState,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .db_provider import DatabaseInfo, DBProvider, PostGresProvider, get_postgres_provider

0 commit comments

Comments
 (0)