9
9
import traceback
10
10
from collections import deque
11
11
from datetime import datetime
12
+ from enum import Enum
12
13
from threading import Condition , Lock
13
- from typing import TYPE_CHECKING , Any , Callable , Deque , Dict , Iterable , List , Optional , Tuple
14
+ from typing import Any , Callable , Deque , Dict , Iterable , List , Mapping , Optional , TYPE_CHECKING , Tuple
14
15
from urllib .parse import quote as urlescape
15
16
16
17
import requests
34
35
)
35
36
36
37
if TYPE_CHECKING :
37
- from pyff .parse import PyffParser
38
+ from pyff .parse import ParserInfo , PyffParser
38
39
from pyff .pipes import PipelineCallback
39
40
from pyff .utils import Lambda
40
41
@@ -141,14 +142,14 @@ def thing_to_url(self, t: Resource) -> Optional[str]:
141
142
def i_handle (self , t : Resource , url = None , response = None , exception = None , last_fetched = None ):
142
143
try :
143
144
if exception is not None :
144
- t .info [ 'Exception' ] = exception
145
+ t .info . exception = exception
145
146
else :
146
147
children = t .parse (lambda u : response )
147
148
self .i_schedule (children )
148
149
except BaseException as ex :
149
150
log .debug (traceback .format_exc ())
150
151
log .error (f'Failed handling resource: { ex } ' )
151
- t .info [ 'Exception' ] = ex
152
+ t .info . exception = ex
152
153
153
154
154
155
class ResourceOpts (BaseModel ):
@@ -177,6 +178,50 @@ def to_dict(self) -> Dict[str, Any]:
177
178
return res
178
179
179
180
181
+ class ResourceLoadState (str , Enum ):
182
+ Fetched = 'Fetched'
183
+ Parsing = 'Parsing'
184
+ Parsed = 'Parsed'
185
+ Ready = 'Ready'
186
+
187
+
188
+ class ResourceInfo (BaseModel ):
189
+ resource : str # URL
190
+ state : Optional [ResourceLoadState ] = None
191
+ http_headers : Dict [str , Any ] = Field ({})
192
+ reason : Optional [str ] = None
193
+ status_code : Optional [str ] # HTTP status code as string. TODO: change to int
194
+ parser_info : Optional [ParserInfo ] = None
195
+ expired : Optional [bool ] = None
196
+ exception : Optional [BaseException ] = None
197
+
198
+ class Config :
199
+ arbitrary_types_allowed = True
200
+
201
+ def to_dict (self ):
202
+ def _format_key (k : str ) -> str :
203
+ special = {'http_headers' : 'HTTP Response Headers' }
204
+ if k in special :
205
+ return special [k ]
206
+ # Turn validation_errors into 'Validation Errors'
207
+ return k .replace ('_' , ' ' ).title ()
208
+
209
+ res = {_format_key (k ): v for k , v in self .dict ().items ()}
210
+
211
+ if self .parser_info :
212
+ # Move contents from sub-dict to top of dict, for backwards compatibility
213
+ res .update (self .parser_info .to_dict ())
214
+ del res ['Parser Info' ]
215
+
216
+ # backwards compat
217
+ if res ['Description' ] == 'SAML Metadata' :
218
+ del res ['Description' ]
219
+ if res ['Exception' ] is None :
220
+ del res ['Exception' ]
221
+
222
+ return res
223
+
224
+
180
225
class Resource (Watchable ):
181
226
def __init__ (self , url : Optional [str ], opts : ResourceOpts ):
182
227
super ().__init__ ()
@@ -189,7 +234,7 @@ def __init__(self, url: Optional[str], opts: ResourceOpts):
189
234
self .never_expires : bool = False
190
235
self .last_seen : Optional [datetime ] = None
191
236
self .last_parser : Optional ['PyffParser' ] = None # importing PyffParser in this module causes a loop
192
- self ._infos : Deque [Dict ] = deque (maxlen = config .info_buffer_size )
237
+ self ._infos : Deque [ResourceInfo ] = deque (maxlen = config .info_buffer_size )
193
238
self .children : Deque [Resource ] = deque ()
194
239
self ._setup ()
195
240
@@ -281,10 +326,8 @@ def is_expired(self) -> bool:
281
326
def is_valid (self ) -> bool :
282
327
return not self .is_expired () and self .last_seen is not None and self .last_parser is not None
283
328
284
- def add_info (self ) -> Dict [str , Any ]:
285
- info : Dict [str , Any ] = dict ()
286
- info ['State' ] = None
287
- info ['Resource' ] = self .url
329
+ def add_info (self ) -> ResourceInfo :
330
+ info = ResourceInfo (resource = self .url )
288
331
self ._infos .append (info )
289
332
return info
290
333
@@ -315,19 +358,12 @@ def name(self) -> Optional[str]:
315
358
return self .url
316
359
317
360
@property
318
- def info (self ):
361
+ def info (self ) -> ResourceInfo :
319
362
if self ._infos is None or not self ._infos :
320
- return dict ( )
363
+ return ResourceInfo ( resource = self . url )
321
364
else :
322
365
return self ._infos [- 1 ]
323
366
324
- @property
325
- def errors (self ):
326
- if 'Validation Errors' in self .info :
327
- return self .info ['Validation Errors' ]
328
- else :
329
- return []
330
-
331
367
def load_backup (self ) -> Optional [str ]:
332
368
if config .local_copy_dir is None :
333
369
return None
@@ -352,7 +388,7 @@ def save_backup(self, data: Optional[str]) -> None:
352
388
except IOError as ex :
353
389
log .warning ("unable to save backup copy of {}: {}" .format (self .url , ex ))
354
390
355
- def load_resource (self , getter : Callable [[str ], Response ]) -> Tuple [Optional [str ], int , Dict [ str , Any ] ]:
391
+ def load_resource (self , getter : Callable [[str ], Response ]) -> Tuple [Optional [str ], int , ResourceInfo ]:
356
392
data : Optional [str ] = None
357
393
status : int = 500
358
394
info = self .add_info ()
@@ -366,14 +402,14 @@ def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str
366
402
try :
367
403
r = getter (self .url )
368
404
369
- info [ 'HTTP Response Headers' ] = r .headers
405
+ info . http_headers = dict ( r .headers )
370
406
log .debug (
371
407
"got status_code={:d}, encoding={} from_cache={} from {}" .format (
372
408
r .status_code , r .encoding , getattr (r , "from_cache" , False ), self .url
373
409
)
374
410
)
375
411
status = r .status_code
376
- info [ 'Reason' ] = r .reason
412
+ info . reason = r .reason
377
413
378
414
if r .ok :
379
415
data = r .text
@@ -389,25 +425,25 @@ def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str
389
425
)
390
426
data = self .load_backup ()
391
427
if data is not None and len (data ) > 0 :
392
- info [ 'Reason' ] = "Retrieved from local cache because status: {} != 200" .format (status )
428
+ info . reason = "Retrieved from local cache because status: {} != 200" .format (status )
393
429
status = 218
394
430
395
- info [ 'Status Code' ] = str (status )
431
+ info . status_code = str (status )
396
432
397
433
except IOError as ex :
398
434
if self .local_copy_fn is not None :
399
435
log .warning ("caught exception from {} - trying local backup: {}" .format (self .url , ex ))
400
436
data = self .load_backup ()
401
437
if data is not None and len (data ) > 0 :
402
- info [ 'Reason' ] = "Retrieved from local cache because exception: {}" .format (ex )
438
+ info . reason = "Retrieved from local cache because exception: {}" .format (ex )
403
439
status = 218
404
440
if data is None or not len (data ) > 0 :
405
441
raise ex # propagate exception if we can't find a backup
406
442
407
443
if data is None or not len (data ) > 0 :
408
444
raise ResourceException ("failed to fetch {} (status: {:d})" .format (self .url , status ))
409
445
410
- info [ 'State' ] = ' Fetched'
446
+ info . state = ResourceLoadState . Fetched
411
447
412
448
return data , status , info
413
449
@@ -417,34 +453,33 @@ def parse(self, getter: Callable[[str], Response]) -> Deque[Resource]:
417
453
if not data :
418
454
raise ResourceException (f'Nothing to parse when loading resource { self } ' )
419
455
420
- info [ 'State' ] = ' Parsing'
456
+ info . state = ResourceLoadState . Parsing
421
457
# local import to avoid circular import
422
458
from pyff .parse import parse_resource
423
459
424
- parse_info = parse_resource (self , data )
425
- if parse_info is not None :
426
- info .update (parse_info )
460
+ info .parser_info = parse_resource (self , data )
427
461
428
462
if status != 218 : # write backup unless we just loaded from backup
429
463
self .last_seen = utc_now ().replace (microsecond = 0 )
430
464
self .save_backup (data )
431
465
432
- info [ 'State' ] = ' Parsed'
466
+ info . state = ResourceLoadState . Parsed
433
467
if self .t is not None :
434
468
if self .post :
435
469
for cb in self .post :
436
470
if self .t is not None :
437
471
self .t = cb (self .t , self .opts .dict ())
438
472
439
473
if self .is_expired ():
440
- info [ 'Expired' ] = True
474
+ info . expired = True
441
475
raise ResourceException ("Resource at {} expired on {}" .format (self .url , self .expire_time ))
442
476
else :
443
- info [ 'Expired' ] = False
477
+ info . expired = False
444
478
445
- for (eid , error ) in list (info ['Validation Errors' ].items ()):
446
- log .error (error )
479
+ if info .parser_info :
480
+ for (eid , error ) in list (info .parser_info .validation_errors .items ()):
481
+ log .error (error )
447
482
448
- info [ 'State' ] = ' Ready'
483
+ info . state = ResourceLoadState . Ready
449
484
450
485
return self .children
0 commit comments