|
19 | 19 | ids = requests.get(baseURL + endpoint, headers=headers).json() |
20 | 20 |
|
21 | 21 | f=csv.writer(open('topContainerCountByResource.csv', 'wb')) |
22 | | -f.writerow(['title']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount']) |
| 22 | +f.writerow(['title']+['bib']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount']) |
23 | 23 |
|
24 | 24 | f2=csv.writer(open('topContainersLinks.csv', 'wb')) |
25 | 25 | f2.writerow(['resourceUri']+['topContainerUri']) |
|
32 | 32 | topContainerLinks = [] |
33 | 33 | uniqueTopContainers = [] |
34 | 34 | for id in ids: |
35 | | - |
36 | 35 | resourceTopContainers = [] |
37 | 36 | print 'id', id, total, 'records remaining' |
38 | 37 | total = total - 1 |
39 | 38 | endpoint = '/repositories/3/resources/'+str(id) |
40 | 39 | output = requests.get(baseURL + endpoint, headers=headers).json() |
41 | 40 | title = output['title'].encode('utf-8') |
| 41 | + print title |
42 | 42 | uri = output['uri'] |
| 43 | + try: |
| 44 | + bib = output['user_defined']['real_1'] |
| 45 | + except: |
| 46 | + bib ='' |
| 47 | + print bib |
43 | 48 | id0 = output['id_0'] |
44 | 49 | try: |
45 | 50 | id1 = output['id_1'] |
|
52 | 57 | try: |
53 | 58 | id3 = output['id_3'] |
54 | 59 | except: |
55 | | - id3='' |
56 | | - # ###searchEndpoint = '/repositories/3/top_containers/search' |
57 | | - # ###output = requests.get(baseURL + searchEndpoint, headers=headers).json() |
| 60 | + id3= '' |
58 | 61 | page = 1 |
59 | | - payload = {'page': page, 'page_size': '3000', 'root_record': endpoint} |
60 | | - search = requests.get(baseURL+'/search', headers=headers, params=payload).json() |
| 62 | + resultsPage = '' |
| 63 | + # while resultsPage != []: |
| 64 | + # payload = {'page': page, 'offset': offset, 'page_size': '100', 'root_record': endpoint} |
| 65 | + # print payload |
| 66 | + # search = requests.get(baseURL+'/search', headers=headers, params=payload).json() |
| 67 | + # print search |
| 68 | + |
| 69 | + # resultsPage = search['results'] |
| 70 | + # for result in resultsPage: |
| 71 | + # results.append(result) |
61 | 72 | results = [] |
62 | | - resultsPage = search['results'] |
63 | | - for result in resultsPage: |
64 | | - results.append(result) |
65 | 73 | while resultsPage != []: |
66 | | - page = page + 1 |
67 | | - payload = {'page': page, 'page_size': '3000', 'root_record': endpoint} |
| 74 | + print page |
| 75 | + payload = {'page': page, 'page_size': '100', 'root_record': endpoint} |
68 | 76 | search = requests.get(baseURL+'/search', headers=headers, params=payload).json() |
69 | 77 | resultsPage = search['results'] |
70 | 78 | for result in resultsPage: |
71 | 79 | results.append(result) |
| 80 | + page = page + 1 |
72 | 81 |
|
73 | 82 | for result in results: |
74 | 83 | try: |
|
85 | 94 | topContainers = [] |
86 | 95 | topContainerCount = len(resourceTopContainers) |
87 | 96 | print 'top containers', topContainerCount |
88 | | - f.writerow([title]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount]) |
| 97 | + f.writerow([title]+[bib]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount]) |
89 | 98 |
|
| 99 | +print 'top container links' |
90 | 100 | for topContainerLink in topContainerLinks: |
91 | 101 | f2.writerow([topContainerLink[:topContainerLink.index('|')]]+[topContainerLink[topContainerLink.index('|')+1:]]) |
92 | 102 |
|
| 103 | +print 'unique top containers' |
93 | 104 | for topContainer in uniqueTopContainers: |
94 | | - search = requests.get(baseURL+topContainer, headers=headers).json() |
95 | | - try: |
96 | | - indicator = search['indicator'] |
97 | | - except: |
98 | | - indicator = '' |
99 | | - |
100 | | - try: |
101 | | - barcode = search['barcode'] |
102 | | - except: |
103 | | - barcode = '' |
104 | | - f3.writerow([topContainer]+[indicator]+[barcode]) |
| 105 | + print topContainer |
| 106 | + search = requests.get(baseURL+topContainer, headers=headers).json() |
| 107 | + try: |
| 108 | + indicator = search['indicator'] |
| 109 | + except: |
| 110 | + indicator = '' |
| 111 | + |
| 112 | + try: |
| 113 | + barcode = search['barcode'] |
| 114 | + except: |
| 115 | + barcode = '' |
| 116 | + f3.writerow([topContainer]+[indicator]+[barcode]) |
105 | 117 |
|
106 | 118 | elapsedTime = time.time() - startTime |
107 | 119 | m, s = divmod(elapsedTime, 60) |
|
0 commit comments