Skip to content

Commit c454257

Browse files
authored
Merge pull request #223 from IdentityPython/ft-refactor_resource_info
refactor resource info
2 parents 94a970a + f4f1d35 commit c454257

File tree

5 files changed

+162
-88
lines changed

5 files changed

+162
-88
lines changed

src/pyff/api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from pyff.logs import get_log
2323
from pyff.pipes import plumbing
2424
from pyff.repo import MDRepository
25-
from pyff.resource import Resource
25+
from pyff.resource import Resource, ResourceInfo
2626
from pyff.samlmd import entity_display_name
2727
from pyff.utils import b2u, dumptree, duration2timedelta, hash_id, json_serializer, utc_now
2828

@@ -390,10 +390,10 @@ def resources_handler(request):
390390
"""
391391

392392
def _infos(resources: Iterable[Resource]) -> List[Mapping[str, Any]]:
393-
return list(filter(lambda i: 'State' in i and i['State'] is not None, [_info(r) for r in resources]))
393+
return [_info(r) for r in resources if r.info.state is not None]
394394

395395
def _info(r: Resource) -> Mapping[str, Any]:
396-
nfo = r.info
396+
nfo = r.info.to_dict()
397397
nfo['Valid'] = r.is_valid()
398398
nfo['Parser'] = r.last_parser
399399
if r.last_seen is not None:

src/pyff/parse.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import os
22
from abc import ABC
33
from collections import deque
4-
from typing import Any, List, Mapping, Optional
4+
from typing import Any, Dict, List, Optional
55

6+
from pydantic import BaseModel, Field
67
from xmlsec.crypto import CertDict
78

89
from pyff.constants import NS
@@ -15,6 +16,20 @@
1516
log = get_log(__name__)
1617

1718

19+
class ParserInfo(BaseModel):
20+
description: str
21+
expiration_time: str # TODO: Change expiration_time into a datetime
22+
validation_errors: Dict[str, Any] = Field({})
23+
24+
def to_dict(self):
25+
def _format_key(k: str) -> str:
26+
# Turn expiration_time into 'Expiration Time'
27+
return k.replace('_', ' ').title()
28+
29+
res = {_format_key(k): v for k, v in self.dict().items()}
30+
return res
31+
32+
1833
class ParserException(Exception):
1934
def __init__(self, msg, wrapped=None, data=None):
2035
self._wraped = wrapped
@@ -33,12 +48,8 @@ def magic(self, content: str):
3348
"""Return True if this parser is applicable to this content"""
3449
raise NotImplementedError()
3550

36-
def parse(self, resource: Resource, content: str) -> Mapping[str, Any]:
37-
"""Initialise/update a resource based on this content, returning information about it
38-
TODO: Determine what 'parse' actually means
39-
40-
TODO: Return something more structured than an arbitrary mapping
41-
"""
51+
def parse(self, resource: Resource, content: str) -> ParserInfo:
52+
"""Initialise/update a resource based on this content, returning information about it"""
4253
raise NotImplementedError()
4354

4455

@@ -52,7 +63,7 @@ def __str__(self):
5263
def magic(self, content: str) -> bool:
5364
return True
5465

55-
def parse(self, resource: Resource, content: str) -> Mapping[str, Any]:
66+
def parse(self, resource: Resource, content: str) -> ParserInfo:
5667
raise ParserException("No matching parser found for %s" % resource.url)
5768

5869

@@ -66,11 +77,9 @@ def __str__(self):
6677
def magic(self, content: str) -> bool:
6778
return os.path.isdir(content)
6879

69-
def parse(self, resource: Resource, content: str) -> Mapping[str, Any]:
80+
def parse(self, resource: Resource, content: str) -> ParserInfo:
7081
resource.children = deque()
71-
info = dict()
72-
info['Description'] = 'Directory'
73-
info['Expiration Time'] = 'never expires'
82+
info = ParserInfo(description='Directory', expiration_time='never expires')
7483
n = 0
7584
for fn in find_matching_files(content, self.extensions):
7685
child_opts = resource.opts.copy(update={'alias': None})
@@ -84,7 +93,7 @@ def parse(self, resource: Resource, content: str) -> Mapping[str, Any]:
8493
resource.expire_time = None
8594
resource.last_seen = utc_now().replace(microsecond=0)
8695

87-
return dict()
96+
return info
8897

8998

9099
class XRDParser(PyffParser):
@@ -97,10 +106,8 @@ def __str__(self):
97106
def magic(self, content: str) -> bool:
98107
return 'XRD' in content
99108

100-
def parse(self, resource: Resource, content: str) -> Mapping[str, Any]:
101-
info = dict()
102-
info['Description'] = "XRD links"
103-
info['Expiration Time'] = 'never expires'
109+
def parse(self, resource: Resource, content: str) -> ParserInfo:
110+
info = ParserInfo(description='XRD links', expiration_time='never expires')
104111
t = parse_xml(unicode_stream(content))
105112

106113
relt = root(t)
@@ -128,7 +135,7 @@ def add_parser(parser):
128135
_parsers.insert(0, parser)
129136

130137

131-
def parse_resource(resource: Resource, content: str) -> Optional[Mapping[str, Any]]:
138+
def parse_resource(resource: Resource, content: str) -> Optional[ParserInfo]:
132139
for parser in _parsers:
133140
if parser.magic(content):
134141
resource.last_parser = parser

src/pyff/resource.py

Lines changed: 70 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
import traceback
1010
from collections import deque
1111
from datetime import datetime
12+
from enum import Enum
1213
from threading import Condition, Lock
13-
from typing import TYPE_CHECKING, Any, Callable, Deque, Dict, Iterable, List, Optional, Tuple
14+
from typing import Any, Callable, Deque, Dict, Iterable, List, Mapping, Optional, TYPE_CHECKING, Tuple
1415
from urllib.parse import quote as urlescape
1516

1617
import requests
@@ -34,7 +35,7 @@
3435
)
3536

3637
if TYPE_CHECKING:
37-
from pyff.parse import PyffParser
38+
from pyff.parse import ParserInfo, PyffParser
3839
from pyff.pipes import PipelineCallback
3940
from pyff.utils import Lambda
4041

@@ -141,14 +142,14 @@ def thing_to_url(self, t: Resource) -> Optional[str]:
141142
def i_handle(self, t: Resource, url=None, response=None, exception=None, last_fetched=None):
142143
try:
143144
if exception is not None:
144-
t.info['Exception'] = exception
145+
t.info.exception = exception
145146
else:
146147
children = t.parse(lambda u: response)
147148
self.i_schedule(children)
148149
except BaseException as ex:
149150
log.debug(traceback.format_exc())
150151
log.error(f'Failed handling resource: {ex}')
151-
t.info['Exception'] = ex
152+
t.info.exception = ex
152153

153154

154155
class ResourceOpts(BaseModel):
@@ -177,6 +178,50 @@ def to_dict(self) -> Dict[str, Any]:
177178
return res
178179

179180

181+
class ResourceLoadState(str, Enum):
182+
Fetched = 'Fetched'
183+
Parsing = 'Parsing'
184+
Parsed = 'Parsed'
185+
Ready = 'Ready'
186+
187+
188+
class ResourceInfo(BaseModel):
189+
resource: str # URL
190+
state: Optional[ResourceLoadState] = None
191+
http_headers: Dict[str, Any] = Field({})
192+
reason: Optional[str] = None
193+
status_code: Optional[str] # HTTP status code as string. TODO: change to int
194+
parser_info: Optional[ParserInfo] = None
195+
expired: Optional[bool] = None
196+
exception: Optional[BaseException] = None
197+
198+
class Config:
199+
arbitrary_types_allowed = True
200+
201+
def to_dict(self):
202+
def _format_key(k: str) -> str:
203+
special = {'http_headers': 'HTTP Response Headers'}
204+
if k in special:
205+
return special[k]
206+
# Turn validation_errors into 'Validation Errors'
207+
return k.replace('_', ' ').title()
208+
209+
res = {_format_key(k): v for k, v in self.dict().items()}
210+
211+
if self.parser_info:
212+
# Move contents from sub-dict to top of dict, for backwards compatibility
213+
res.update(self.parser_info.to_dict())
214+
del res['Parser Info']
215+
216+
# backwards compat
217+
if res['Description'] == 'SAML Metadata':
218+
del res['Description']
219+
if res['Exception'] is None:
220+
del res['Exception']
221+
222+
return res
223+
224+
180225
class Resource(Watchable):
181226
def __init__(self, url: Optional[str], opts: ResourceOpts):
182227
super().__init__()
@@ -189,7 +234,7 @@ def __init__(self, url: Optional[str], opts: ResourceOpts):
189234
self.never_expires: bool = False
190235
self.last_seen: Optional[datetime] = None
191236
self.last_parser: Optional['PyffParser'] = None # importing PyffParser in this module causes a loop
192-
self._infos: Deque[Dict] = deque(maxlen=config.info_buffer_size)
237+
self._infos: Deque[ResourceInfo] = deque(maxlen=config.info_buffer_size)
193238
self.children: Deque[Resource] = deque()
194239
self._setup()
195240

@@ -281,10 +326,8 @@ def is_expired(self) -> bool:
281326
def is_valid(self) -> bool:
282327
return not self.is_expired() and self.last_seen is not None and self.last_parser is not None
283328

284-
def add_info(self) -> Dict[str, Any]:
285-
info: Dict[str, Any] = dict()
286-
info['State'] = None
287-
info['Resource'] = self.url
329+
def add_info(self) -> ResourceInfo:
330+
info = ResourceInfo(resource=self.url)
288331
self._infos.append(info)
289332
return info
290333

@@ -315,19 +358,12 @@ def name(self) -> Optional[str]:
315358
return self.url
316359

317360
@property
318-
def info(self):
361+
def info(self) -> ResourceInfo:
319362
if self._infos is None or not self._infos:
320-
return dict()
363+
return ResourceInfo(resource=self.url)
321364
else:
322365
return self._infos[-1]
323366

324-
@property
325-
def errors(self):
326-
if 'Validation Errors' in self.info:
327-
return self.info['Validation Errors']
328-
else:
329-
return []
330-
331367
def load_backup(self) -> Optional[str]:
332368
if config.local_copy_dir is None:
333369
return None
@@ -352,7 +388,7 @@ def save_backup(self, data: Optional[str]) -> None:
352388
except IOError as ex:
353389
log.warning("unable to save backup copy of {}: {}".format(self.url, ex))
354390

355-
def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str], int, Dict[str, Any]]:
391+
def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str], int, ResourceInfo]:
356392
data: Optional[str] = None
357393
status: int = 500
358394
info = self.add_info()
@@ -366,14 +402,14 @@ def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str
366402
try:
367403
r = getter(self.url)
368404

369-
info['HTTP Response Headers'] = r.headers
405+
info.http_headers = dict(r.headers)
370406
log.debug(
371407
"got status_code={:d}, encoding={} from_cache={} from {}".format(
372408
r.status_code, r.encoding, getattr(r, "from_cache", False), self.url
373409
)
374410
)
375411
status = r.status_code
376-
info['Reason'] = r.reason
412+
info.reason = r.reason
377413

378414
if r.ok:
379415
data = r.text
@@ -389,25 +425,25 @@ def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str
389425
)
390426
data = self.load_backup()
391427
if data is not None and len(data) > 0:
392-
info['Reason'] = "Retrieved from local cache because status: {} != 200".format(status)
428+
info.reason = "Retrieved from local cache because status: {} != 200".format(status)
393429
status = 218
394430

395-
info['Status Code'] = str(status)
431+
info.status_code = str(status)
396432

397433
except IOError as ex:
398434
if self.local_copy_fn is not None:
399435
log.warning("caught exception from {} - trying local backup: {}".format(self.url, ex))
400436
data = self.load_backup()
401437
if data is not None and len(data) > 0:
402-
info['Reason'] = "Retrieved from local cache because exception: {}".format(ex)
438+
info.reason = "Retrieved from local cache because exception: {}".format(ex)
403439
status = 218
404440
if data is None or not len(data) > 0:
405441
raise ex # propagate exception if we can't find a backup
406442

407443
if data is None or not len(data) > 0:
408444
raise ResourceException("failed to fetch {} (status: {:d})".format(self.url, status))
409445

410-
info['State'] = 'Fetched'
446+
info.state = ResourceLoadState.Fetched
411447

412448
return data, status, info
413449

@@ -417,34 +453,33 @@ def parse(self, getter: Callable[[str], Response]) -> Deque[Resource]:
417453
if not data:
418454
raise ResourceException(f'Nothing to parse when loading resource {self}')
419455

420-
info['State'] = 'Parsing'
456+
info.state = ResourceLoadState.Parsing
421457
# local import to avoid circular import
422458
from pyff.parse import parse_resource
423459

424-
parse_info = parse_resource(self, data)
425-
if parse_info is not None:
426-
info.update(parse_info)
460+
info.parser_info = parse_resource(self, data)
427461

428462
if status != 218: # write backup unless we just loaded from backup
429463
self.last_seen = utc_now().replace(microsecond=0)
430464
self.save_backup(data)
431465

432-
info['State'] = 'Parsed'
466+
info.state = ResourceLoadState.Parsed
433467
if self.t is not None:
434468
if self.post:
435469
for cb in self.post:
436470
if self.t is not None:
437471
self.t = cb(self.t, self.opts.dict())
438472

439473
if self.is_expired():
440-
info['Expired'] = True
474+
info.expired = True
441475
raise ResourceException("Resource at {} expired on {}".format(self.url, self.expire_time))
442476
else:
443-
info['Expired'] = False
477+
info.expired = False
444478

445-
for (eid, error) in list(info['Validation Errors'].items()):
446-
log.error(error)
479+
if info.parser_info:
480+
for (eid, error) in list(info.parser_info.validation_errors.items()):
481+
log.error(error)
447482

448-
info['State'] = 'Ready'
483+
info.state = ResourceLoadState.Ready
449484

450485
return self.children

0 commit comments

Comments
 (0)