|
29 | 29 | ids = requests.get(baseURL + endpoint, headers=headers).json() |
30 | 30 |
|
31 | 31 | f=csv.writer(open('topContainerCountByResource.csv', 'wb')) |
32 | | -f.writerow(['title']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount']) |
| 32 | +f.writerow(['title']+['bib']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount']) |
33 | 33 |
|
34 | 34 | f2=csv.writer(open('topContainersLinks.csv', 'wb')) |
35 | 35 | f2.writerow(['resourceUri']+['topContainerUri']) |
|
42 | 42 | topContainerLinks = [] |
43 | 43 | uniqueTopContainers = [] |
44 | 44 | for id in ids: |
45 | | - |
46 | 45 | resourceTopContainers = [] |
47 | 46 | print 'id', id, total, 'records remaining' |
48 | 47 | total = total - 1 |
49 | 48 | endpoint = '/repositories/3/resources/'+str(id) |
50 | 49 | output = requests.get(baseURL + endpoint, headers=headers).json() |
51 | 50 | title = output['title'].encode('utf-8') |
| 51 | + print title |
52 | 52 | uri = output['uri'] |
| 53 | + try: |
| 54 | + bib = output['user_defined']['real_1'] |
| 55 | + except: |
| 56 | + bib ='' |
| 57 | + print bib |
53 | 58 | id0 = output['id_0'] |
54 | 59 | try: |
55 | 60 | id1 = output['id_1'] |
|
62 | 67 | try: |
63 | 68 | id3 = output['id_3'] |
64 | 69 | except: |
65 | | - id3='' |
66 | | - # ###searchEndpoint = '/repositories/3/top_containers/search' |
67 | | - # ###output = requests.get(baseURL + searchEndpoint, headers=headers).json() |
| 70 | + id3= '' |
68 | 71 | page = 1 |
69 | | - payload = {'page': page, 'page_size': '3000', 'root_record': endpoint} |
70 | | - search = requests.get(baseURL+'/search', headers=headers, params=payload).json() |
| 72 | + resultsPage = '' |
71 | 73 | results = [] |
72 | | - resultsPage = search['results'] |
73 | | - for result in resultsPage: |
74 | | - results.append(result) |
75 | 74 | while resultsPage != []: |
76 | | - page = page + 1 |
77 | | - payload = {'page': page, 'page_size': '3000', 'root_record': endpoint} |
| 75 | + print page |
| 76 | + payload = {'page': page, 'page_size': '100', 'root_record': endpoint} |
78 | 77 | search = requests.get(baseURL+'/search', headers=headers, params=payload).json() |
79 | 78 | resultsPage = search['results'] |
80 | 79 | for result in resultsPage: |
81 | 80 | results.append(result) |
| 81 | + page = page + 1 |
82 | 82 |
|
83 | 83 | for result in results: |
84 | 84 | try: |
|
95 | 95 | topContainers = [] |
96 | 96 | topContainerCount = len(resourceTopContainers) |
97 | 97 | print 'top containers', topContainerCount |
98 | | - f.writerow([title]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount]) |
| 98 | + f.writerow([title]+[bib]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount]) |
99 | 99 |
|
| 100 | +print 'top container links' |
100 | 101 | for topContainerLink in topContainerLinks: |
101 | 102 | f2.writerow([topContainerLink[:topContainerLink.index('|')]]+[topContainerLink[topContainerLink.index('|')+1:]]) |
102 | 103 |
|
| 104 | +print 'unique top containers' |
103 | 105 | for topContainer in uniqueTopContainers: |
104 | | - search = requests.get(baseURL+topContainer, headers=headers).json() |
105 | | - try: |
106 | | - indicator = search['indicator'] |
107 | | - except: |
108 | | - indicator = '' |
109 | | - |
110 | | - try: |
111 | | - barcode = search['barcode'] |
112 | | - except: |
113 | | - barcode = '' |
114 | | - f3.writerow([topContainer]+[indicator]+[barcode]) |
| 106 | + print topContainer |
| 107 | + search = requests.get(baseURL+topContainer, headers=headers).json() |
| 108 | + try: |
| 109 | + indicator = search['indicator'] |
| 110 | + except: |
| 111 | + indicator = '' |
115 | 112 |
|
| 113 | + try: |
| 114 | + barcode = search['barcode'] |
| 115 | + except: |
| 116 | + barcode = '' |
| 117 | + f3.writerow([topContainer]+[indicator]+[barcode]) |
| 118 | + |
116 | 119 | elapsedTime = time.time() - startTime |
117 | 120 | m, s = divmod(elapsedTime, 60) |
118 | 121 | h, m = divmod(m, 60) |
|
0 commit comments