@@ -7,6 +7,10 @@ def extractValuesFromComponentLevel (componentLevel):
77 unittitle = componentLevel .find ('did' ).find ('unittitle' ).text .replace ('\n ' ,'' ).encode ('utf-8' )
88 controlAccess = []
99 originationList = []
10+ try :
11+ physdesc = componentLevel .find ('did' ).find ('physdesc' ).text .replace ('\n ' ,'' ).encode ('utf-8' )
12+ except :
13+ physdesc = ''
1014 try :
1115 unitdate = componentLevel .find ('did' ).find ('unitdate' )
1216 dateExpression = unitdate .text .encode ('utf-8' ).replace ('\n ' ,'' ).replace (' ' ,' ' ).replace (' ' ,' ' ).encode ('utf-8' )
@@ -73,22 +77,19 @@ def extractValuesFromComponentLevel (componentLevel):
7377 originationList = ''
7478 global sortOrder
7579 sortOrder += 1
76- f .writerow ([sortOrder ]+ [level ]+ [componentLevelLabel ]+ [containerType1 ]+ [container1 ]+ [containerType2 ]+ [container2 ]+ [unittitle ]+ [dateExpression ]+ [dateType ]+ [beginDate ]+ [endDate ]+ [scopecontent ]+ [controlAccess ]+ [originationList ]+ [containerId1 ]+ [containerId2 ])
80+ f .writerow ([sortOrder ]+ [level ]+ [componentLevelLabel ]+ [containerType1 ]+ [container1 ]+ [containerType2 ]+ [container2 ]+ [unittitle ]+ [physdesc ] + [ dateExpression ]+ [dateType ]+ [beginDate ]+ [endDate ]+ [scopecontent ]+ [controlAccess ]+ [originationList ]+ [containerId1 ]+ [containerId2 ])
7781
78- filepath = '/home/mjanowi3/archivesspace-api'
7982filepath = ''
80- fileName = 'Coll.011 .xml'
83+ fileName = 'Coll.004_20181012_144804_UTC__ead .xml'
8184xml = open (filepath + fileName )
8285
83-
84-
8586f = csv .writer (open (filepath + 'eadFields.csv' , 'wb' ))
86- f .writerow (['sortOrder' ]+ ['hierarchy' ]+ ['level' ]+ ['containerType1' ]+ ['container1' ]+ ['containerType2' ]+ ['container2' ]+ ['unittitle' ]+ ['dateexpression' ]+ ['datetype' ]+ ['begindate' ]+ ['enddate' ]+ ['scopecontent' ]+ ['controlAccess' ]+ ['origination' ]+ ['containerId1' ]+ ['containerId2' ])
87+ f .writerow (['sortOrder' ]+ ['hierarchy' ]+ ['level' ]+ ['containerType1' ]+ ['container1' ]+ ['containerType2' ]+ ['container2' ]+ ['unittitle' ]+ ['physdesc' ] + [ ' dateexpression' ]+ ['datetype' ]+ ['begindate' ]+ ['enddate' ]+ ['scopecontent' ]+ ['controlAccess' ]+ ['origination' ]+ ['containerId1' ]+ ['containerId2' ])
8788upperComponentLevels = BeautifulSoup (xml , 'lxml' ).find ('dsc' ).find_all ('c01' )
8889sortOrder = 0
8990for upperComponentLevel in upperComponentLevels :
9091 componentLevelLabel = upperComponentLevel ['level' ]
91- unittitle = upperComponentLevel .find ('did' ).find ('unittitle' ).text .encode ('utf-8' )
92+ unittitle = upperComponentLevel .find ('did' ).find ('unittitle' ).text .encode ('utf-8' ). replace ( ' \n ' , '' ). replace ( ' ' , ' ' )
9293 try :
9394 unitdate = upperComponentLevel .find ('did' ).find ('unitdate' )
9495 dateExpression = unitdate .text .encode ('utf-8' ).replace ('\n ' ,'' ).replace (' ' ,' ' ).replace (' ' ,' ' ).encode ('utf-8' )
@@ -117,7 +118,7 @@ def extractValuesFromComponentLevel (componentLevel):
117118 except :
118119 scopecontent = ''
119120 sortOrder += 1
120- f .writerow ([sortOrder ]+ ['c01' ]+ [componentLevelLabel ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ [unittitle ]+ [dateExpression ]+ [dateType ]+ [beginDate ]+ [endDate ]+ [scopecontent ]+ ['' ]+ ['' ]+ ['' ]+ ['' ])
121+ f .writerow ([sortOrder ]+ ['c01' ]+ [componentLevelLabel ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ [unittitle ]+ ['' ] + [ dateExpression ]+ [dateType ]+ [beginDate ]+ [endDate ]+ [scopecontent ]+ ['' ]+ ['' ]+ ['' ]+ ['' ])
121122
122123 componentLevelArray = upperComponentLevel .find_all ('c02' )
123124 for componentLevel in componentLevelArray :
0 commit comments