From 36b64d7e1be8fc6aed9ae8fa2ab6b6df0e3b43e9 Mon Sep 17 00:00:00 2001 From: mjanowiecki <32551917+mjanowiecki@users.noreply.github.com> Date: Tue, 20 Nov 2018 12:14:56 -0500 Subject: [PATCH 1/3] added bib number to csv --- getTopContainerCountByResource.py | 60 ++++++++++++++++++------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/getTopContainerCountByResource.py b/getTopContainerCountByResource.py index 0cab6eb..c8455cc 100644 --- a/getTopContainerCountByResource.py +++ b/getTopContainerCountByResource.py @@ -19,7 +19,7 @@ ids = requests.get(baseURL + endpoint, headers=headers).json() f=csv.writer(open('topContainerCountByResource.csv', 'wb')) -f.writerow(['title']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount']) +f.writerow(['title']+['bib']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount']) f2=csv.writer(open('topContainersLinks.csv', 'wb')) f2.writerow(['resourceUri']+['topContainerUri']) @@ -32,14 +32,19 @@ topContainerLinks = [] uniqueTopContainers = [] for id in ids: - resourceTopContainers = [] print 'id', id, total, 'records remaining' total = total - 1 endpoint = '/repositories/3/resources/'+str(id) output = requests.get(baseURL + endpoint, headers=headers).json() title = output['title'].encode('utf-8') + print title uri = output['uri'] + try: + bib = output['user_defined']['real_1'] + except: + bib ='' + print bib id0 = output['id_0'] try: id1 = output['id_1'] @@ -52,23 +57,27 @@ try: id3 = output['id_3'] except: - id3='' - # ###searchEndpoint = '/repositories/3/top_containers/search' - # ###output = requests.get(baseURL + searchEndpoint, headers=headers).json() + id3= '' page = 1 - payload = {'page': page, 'page_size': '3000', 'root_record': endpoint} - search = requests.get(baseURL+'/search', headers=headers, params=payload).json() + resultsPage = '' + # while resultsPage != []: + # payload = {'page': page, 'offset': offset, 'page_size': '100', 'root_record': endpoint} + # print payload + # search = requests.get(baseURL+'/search', headers=headers, params=payload).json() + # print search + + # resultsPage = search['results'] + # for result in resultsPage: + # results.append(result) results = [] - resultsPage = search['results'] - for result in resultsPage: - results.append(result) while resultsPage != []: - page = page + 1 - payload = {'page': page, 'page_size': '3000', 'root_record': endpoint} + print page + payload = {'page': page, 'page_size': '100', 'root_record': endpoint} search = requests.get(baseURL+'/search', headers=headers, params=payload).json() resultsPage = search['results'] for result in resultsPage: results.append(result) + page = page + 1 for result in results: try: @@ -85,23 +94,26 @@ topContainers = [] topContainerCount = len(resourceTopContainers) print 'top containers', topContainerCount - f.writerow([title]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount]) + f.writerow([title]+[bib]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount]) +print 'top container links' for topContainerLink in topContainerLinks: f2.writerow([topContainerLink[:topContainerLink.index('|')]]+[topContainerLink[topContainerLink.index('|')+1:]]) +print 'unique top containers' for topContainer in uniqueTopContainers: - search = requests.get(baseURL+topContainer, headers=headers).json() - try: - indicator = search['indicator'] - except: - indicator = '' - - try: - barcode = search['barcode'] - except: - barcode = '' - f3.writerow([topContainer]+[indicator]+[barcode]) + print topContainer + search = requests.get(baseURL+topContainer, headers=headers).json() + try: + indicator = search['indicator'] + except: + indicator = '' + + try: + barcode = search['barcode'] + except: + barcode = '' + f3.writerow([topContainer]+[indicator]+[barcode]) elapsedTime = time.time() - startTime m, s = divmod(elapsedTime, 60) From fc3d2b4f3b055fc8a4c6c0aa8479e19600cde210 Mon Sep 17 00:00:00 2001 From: mjanowiecki <32551917+mjanowiecki@users.noreply.github.com> Date: Wed, 21 Nov 2018 09:55:44 -0500 Subject: [PATCH 2/3] changes --- .gitignore | 3 ++- getTopContainerCountByResource.py | 9 --------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 6204478..02cb572 100644 --- a/.gitignore +++ b/.gitignore @@ -54,4 +54,5 @@ data/* *.json local/* *.xml -*.xslx \ No newline at end of file +*.xslx +FamilyAgentsFromCSV.py diff --git a/getTopContainerCountByResource.py b/getTopContainerCountByResource.py index c8455cc..44fbfb6 100644 --- a/getTopContainerCountByResource.py +++ b/getTopContainerCountByResource.py @@ -60,15 +60,6 @@ id3= '' page = 1 resultsPage = '' - # while resultsPage != []: - # payload = {'page': page, 'offset': offset, 'page_size': '100', 'root_record': endpoint} - # print payload - # search = requests.get(baseURL+'/search', headers=headers, params=payload).json() - # print search - - # resultsPage = search['results'] - # for result in resultsPage: - # results.append(result) results = [] while resultsPage != []: print page From 6e55602317aaed569651ab289811ddfba647ba9e Mon Sep 17 00:00:00 2001 From: mjanowiecki <32551917+mjanowiecki@users.noreply.github.com> Date: Wed, 21 Nov 2018 10:09:14 -0500 Subject: [PATCH 3/3] changesagain --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 02cb572..6c23154 100644 --- a/.gitignore +++ b/.gitignore @@ -55,4 +55,3 @@ data/* local/* *.xml *.xslx -FamilyAgentsFromCSV.py