Skip to content

Commit d9f4cce

Browse files
authored
Merge pull request #8 from mjanowiecki/master
TopContainer changes
2 parents ac35b3a + d5d2fb0 commit d9f4cce

File tree

2 files changed

+28
-25
lines changed

2 files changed

+28
-25
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,4 @@ data/*
5656
local/*
5757
*.xml
5858
*.xslx
59-
*.txt
59+
*.txt

getTopContainerCountByResource.py

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
ids = requests.get(baseURL + endpoint, headers=headers).json()
3030

3131
f=csv.writer(open('topContainerCountByResource.csv', 'wb'))
32-
f.writerow(['title']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount'])
32+
f.writerow(['title']+['bib']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount'])
3333

3434
f2=csv.writer(open('topContainersLinks.csv', 'wb'))
3535
f2.writerow(['resourceUri']+['topContainerUri'])
@@ -42,14 +42,19 @@
4242
topContainerLinks = []
4343
uniqueTopContainers = []
4444
for id in ids:
45-
4645
resourceTopContainers = []
4746
print 'id', id, total, 'records remaining'
4847
total = total - 1
4948
endpoint = '/repositories/3/resources/'+str(id)
5049
output = requests.get(baseURL + endpoint, headers=headers).json()
5150
title = output['title'].encode('utf-8')
51+
print title
5252
uri = output['uri']
53+
try:
54+
bib = output['user_defined']['real_1']
55+
except:
56+
bib =''
57+
print bib
5358
id0 = output['id_0']
5459
try:
5560
id1 = output['id_1']
@@ -62,23 +67,18 @@
6267
try:
6368
id3 = output['id_3']
6469
except:
65-
id3=''
66-
# ###searchEndpoint = '/repositories/3/top_containers/search'
67-
# ###output = requests.get(baseURL + searchEndpoint, headers=headers).json()
70+
id3= ''
6871
page = 1
69-
payload = {'page': page, 'page_size': '3000', 'root_record': endpoint}
70-
search = requests.get(baseURL+'/search', headers=headers, params=payload).json()
72+
resultsPage = ''
7173
results = []
72-
resultsPage = search['results']
73-
for result in resultsPage:
74-
results.append(result)
7574
while resultsPage != []:
76-
page = page + 1
77-
payload = {'page': page, 'page_size': '3000', 'root_record': endpoint}
75+
print page
76+
payload = {'page': page, 'page_size': '100', 'root_record': endpoint}
7877
search = requests.get(baseURL+'/search', headers=headers, params=payload).json()
7978
resultsPage = search['results']
8079
for result in resultsPage:
8180
results.append(result)
81+
page = page + 1
8282

8383
for result in results:
8484
try:
@@ -95,24 +95,27 @@
9595
topContainers = []
9696
topContainerCount = len(resourceTopContainers)
9797
print 'top containers', topContainerCount
98-
f.writerow([title]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount])
98+
f.writerow([title]+[bib]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount])
9999

100+
print 'top container links'
100101
for topContainerLink in topContainerLinks:
101102
f2.writerow([topContainerLink[:topContainerLink.index('|')]]+[topContainerLink[topContainerLink.index('|')+1:]])
102103

104+
print 'unique top containers'
103105
for topContainer in uniqueTopContainers:
104-
search = requests.get(baseURL+topContainer, headers=headers).json()
105-
try:
106-
indicator = search['indicator']
107-
except:
108-
indicator = ''
109-
110-
try:
111-
barcode = search['barcode']
112-
except:
113-
barcode = ''
114-
f3.writerow([topContainer]+[indicator]+[barcode])
106+
print topContainer
107+
search = requests.get(baseURL+topContainer, headers=headers).json()
108+
try:
109+
indicator = search['indicator']
110+
except:
111+
indicator = ''
115112

113+
try:
114+
barcode = search['barcode']
115+
except:
116+
barcode = ''
117+
f3.writerow([topContainer]+[indicator]+[barcode])
118+
116119
elapsedTime = time.time() - startTime
117120
m, s = divmod(elapsedTime, 60)
118121
h, m = divmod(m, 60)

0 commit comments

Comments
 (0)