Skip to content

Commit 8f999bd

Browse files
committed
Update eadToCsv.py
1 parent 0647444 commit 8f999bd

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

eadToCsv.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ def extractValuesFromComponentLevel (componentLevel):
77
unittitle = componentLevel.find('did').find('unittitle').text.replace('\n','').encode('utf-8')
88
controlAccess = []
99
originationList = []
10+
try:
11+
physdesc = componentLevel.find('did').find('physdesc').text.replace('\n','').encode('utf-8')
12+
except:
13+
physdesc = ''
1014
try:
1115
unitdate = componentLevel.find('did').find('unitdate')
1216
dateExpression = unitdate.text.encode('utf-8').replace('\n','').replace(' ',' ').replace(' ',' ').encode('utf-8')
@@ -73,22 +77,19 @@ def extractValuesFromComponentLevel (componentLevel):
7377
originationList = ''
7478
global sortOrder
7579
sortOrder += 1
76-
f.writerow([sortOrder]+[level]+[componentLevelLabel]+[containerType1]+[container1]+[containerType2]+[container2]+[unittitle]+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+[controlAccess]+[originationList]+[containerId1]+[containerId2])
80+
f.writerow([sortOrder]+[level]+[componentLevelLabel]+[containerType1]+[container1]+[containerType2]+[container2]+[unittitle]+[physdesc]+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+[controlAccess]+[originationList]+[containerId1]+[containerId2])
7781

78-
filepath = '/home/mjanowi3/archivesspace-api'
7982
filepath = ''
80-
fileName = 'Coll.011.xml'
83+
fileName = 'Coll.004_20181012_144804_UTC__ead.xml'
8184
xml = open(filepath+fileName)
8285

83-
84-
8586
f=csv.writer(open(filepath+'eadFields.csv', 'wb'))
86-
f.writerow(['sortOrder']+['hierarchy']+['level']+['containerType1']+['container1']+['containerType2']+['container2']+['unittitle']+['dateexpression']+['datetype']+['begindate']+['enddate']+['scopecontent']+['controlAccess']+['origination']+['containerId1']+['containerId2'])
87+
f.writerow(['sortOrder']+['hierarchy']+['level']+['containerType1']+['container1']+['containerType2']+['container2']+['unittitle']+['physdesc']+['dateexpression']+['datetype']+['begindate']+['enddate']+['scopecontent']+['controlAccess']+['origination']+['containerId1']+['containerId2'])
8788
upperComponentLevels = BeautifulSoup(xml, 'lxml').find('dsc').find_all('c01')
8889
sortOrder = 0
8990
for upperComponentLevel in upperComponentLevels:
9091
componentLevelLabel = upperComponentLevel['level']
91-
unittitle = upperComponentLevel.find('did').find('unittitle').text.encode('utf-8')
92+
unittitle = upperComponentLevel.find('did').find('unittitle').text.encode('utf-8').replace('\n','').replace(' ', ' ')
9293
try:
9394
unitdate = upperComponentLevel.find('did').find('unitdate')
9495
dateExpression = unitdate.text.encode('utf-8').replace('\n','').replace(' ',' ').replace(' ',' ').encode('utf-8')
@@ -117,7 +118,7 @@ def extractValuesFromComponentLevel (componentLevel):
117118
except:
118119
scopecontent = ''
119120
sortOrder += 1
120-
f.writerow([sortOrder]+['c01']+[componentLevelLabel]+['']+['']+['']+['']+[unittitle]+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+['']+['']+['']+[''])
121+
f.writerow([sortOrder]+['c01']+[componentLevelLabel]+['']+['']+['']+['']+[unittitle]+['']+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+['']+['']+['']+[''])
121122

122123
componentLevelArray = upperComponentLevel.find_all('c02')
123124
for componentLevel in componentLevelArray:

0 commit comments

Comments
 (0)