Skip to content

Commit deb7900

Browse files
authored
Merge pull request #379 from AzureAD/http-cache
Http cache
2 parents ef89877 + b4401a1 commit deb7900

File tree

5 files changed

+684
-0
lines changed

5 files changed

+684
-0
lines changed

msal/application.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from .token_cache import TokenCache
2222
import msal.telemetry
2323
from .region import _detect_region
24+
from .throttled_http_client import ThrottledHttpClient
2425

2526

2627
# The __init__.py will import this. Not the other way around.
@@ -336,6 +337,10 @@ def __init__(
336337
a = requests.adapters.HTTPAdapter(max_retries=1)
337338
self.http_client.mount("http://", a)
338339
self.http_client.mount("https://", a)
340+
self.http_client = ThrottledHttpClient(
341+
self.http_client,
342+
{} # Hard code an in-memory cache, for now
343+
)
339344

340345
self.app_name = app_name
341346
self.app_version = app_version
@@ -433,6 +438,7 @@ def _build_client(self, client_credential, authority, skip_regional_client=False
433438
"x-client-sku": "MSAL.Python", "x-client-ver": __version__,
434439
"x-client-os": sys.platform,
435440
"x-client-cpu": "x64" if sys.maxsize > 2 ** 32 else "x86",
441+
"x-ms-lib-capability": "retry-after, h429",
436442
}
437443
if self.app_name:
438444
default_headers['x-app-name'] = self.app_name

msal/individual_cache.py

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
from functools import wraps
2+
import time
3+
try:
4+
from collections.abc import MutableMapping # Python 3.3+
5+
except ImportError:
6+
from collections import MutableMapping # Python 2.7+
7+
import heapq
8+
from threading import Lock
9+
10+
11+
class _ExpiringMapping(MutableMapping):
12+
_INDEX = "_index_"
13+
14+
def __init__(self, mapping=None, capacity=None, expires_in=None, lock=None,
15+
*args, **kwargs):
16+
"""Items in this mapping can have individual shelf life,
17+
just like food items in your refrigerator have their different shelf life
18+
determined by each food, not by the refrigerator.
19+
20+
Expired items will be automatically evicted.
21+
The clean-up will be done at each time when adding a new item,
22+
or when looping or counting the entire mapping.
23+
(This is better than being done indecisively by a background thread,
24+
which might not always happen before your accessing the mapping.)
25+
26+
This implementation uses no dependency other than Python standard library.
27+
28+
:param MutableMapping mapping:
29+
A dict-like key-value mapping, which needs to support __setitem__(),
30+
__getitem__(), __delitem__(), get(), pop().
31+
32+
The default mapping is an in-memory dict.
33+
34+
You could potentially supply a file-based dict-like object, too.
35+
This implementation deliberately avoid mapping.__iter__(),
36+
which could be slow on a file-based mapping.
37+
38+
:param int capacity:
39+
How many items this mapping will hold.
40+
When you attempt to add new item into a full mapping,
41+
it will automatically delete the item that is expiring soonest.
42+
43+
The default value is None, which means there is no capacity limit.
44+
45+
:param int expires_in:
46+
How many seconds an item would expire and be purged from this mapping.
47+
Also known as time-to-live (TTL).
48+
You can also use :func:`~set()` to provide per-item expires_in value.
49+
50+
:param Lock lock:
51+
A locking mechanism with context manager interface.
52+
If no lock is provided, a threading.Lock will be used.
53+
But you may want to supply a different lock,
54+
if your customized mapping is being shared differently.
55+
"""
56+
super(_ExpiringMapping, self).__init__(*args, **kwargs)
57+
self._mapping = mapping if mapping is not None else {}
58+
self._capacity = capacity
59+
self._expires_in = expires_in
60+
self._lock = Lock() if lock is None else lock
61+
62+
def _validate_key(self, key):
63+
if key == self._INDEX:
64+
raise ValueError("key {} is a reserved keyword in {}".format(
65+
key, self.__class__.__name__))
66+
67+
def set(self, key, value, expires_in):
68+
# This method's name was chosen so that it matches its cousin __setitem__(),
69+
# and it also complements the counterpart get().
70+
# The downside is such a name shadows the built-in type set in this file,
71+
# but you can overcome that by defining a global alias for set.
72+
"""It sets the key-value pair into this mapping, with its per-item expires_in.
73+
74+
It will take O(logN) time, because it will run some maintenance.
75+
This worse-than-constant time is acceptable, because in a cache scenario,
76+
__setitem__() would only be called during a cache miss,
77+
which would already incur an expensive target function call anyway.
78+
79+
By the way, most other methods of this mapping still have O(1) constant time.
80+
"""
81+
with self._lock:
82+
self._set(key, value, expires_in)
83+
84+
def _set(self, key, value, expires_in):
85+
# This internal implementation powers both set() and __setitem__(),
86+
# so that they don't depend on each other.
87+
self._validate_key(key)
88+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
89+
self._maintenance(sequence, timestamps) # O(logN)
90+
now = int(time.time())
91+
expires_at = now + expires_in
92+
entry = [expires_at, now, key]
93+
is_new_item = key not in timestamps
94+
is_beyond_capacity = self._capacity and len(timestamps) >= self._capacity
95+
if is_new_item and is_beyond_capacity:
96+
self._drop_indexed_entry(timestamps, heapq.heappushpop(sequence, entry))
97+
else: # Simply add new entry. The old one would become a harmless orphan.
98+
heapq.heappush(sequence, entry)
99+
timestamps[key] = [expires_at, now] # It overwrites existing key, if any
100+
self._mapping[key] = value
101+
self._mapping[self._INDEX] = sequence, timestamps
102+
103+
def _maintenance(self, sequence, timestamps): # O(logN)
104+
"""It will modify input sequence and timestamps in-place"""
105+
now = int(time.time())
106+
while sequence: # Clean up expired items
107+
expires_at, created_at, key = sequence[0]
108+
if created_at <= now < expires_at: # Then all remaining items are fresh
109+
break
110+
self._drop_indexed_entry(timestamps, sequence[0]) # It could error out
111+
heapq.heappop(sequence) # Only pop it after a successful _drop_indexed_entry()
112+
while self._capacity is not None and len(timestamps) > self._capacity:
113+
self._drop_indexed_entry(timestamps, sequence[0]) # It could error out
114+
heapq.heappop(sequence) # Only pop it after a successful _drop_indexed_entry()
115+
116+
def _drop_indexed_entry(self, timestamps, entry):
117+
"""For an entry came from index, drop it from timestamps and self._mapping"""
118+
expires_at, created_at, key = entry
119+
if [expires_at, created_at] == timestamps.get(key): # So it is not an orphan
120+
self._mapping.pop(key, None) # It could raise exception
121+
timestamps.pop(key, None) # This would probably always succeed
122+
123+
def __setitem__(self, key, value):
124+
"""Implements the __setitem__().
125+
126+
Same characteristic as :func:`~set()`,
127+
but use class-wide expires_in which was specified by :func:`~__init__()`.
128+
"""
129+
if self._expires_in is None:
130+
raise ValueError("Need a numeric value for expires_in during __init__()")
131+
with self._lock:
132+
self._set(key, value, self._expires_in)
133+
134+
def __getitem__(self, key): # O(1)
135+
"""If the item you requested already expires, KeyError will be raised."""
136+
self._validate_key(key)
137+
with self._lock:
138+
# Skip self._maintenance(), because it would need O(logN) time
139+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
140+
expires_at, created_at = timestamps[key] # Would raise KeyError accordingly
141+
now = int(time.time())
142+
if not created_at <= now < expires_at:
143+
self._mapping.pop(key, None)
144+
timestamps.pop(key, None)
145+
self._mapping[self._INDEX] = sequence, timestamps
146+
raise KeyError("{} {}".format(
147+
key,
148+
"expired" if now >= expires_at else "created in the future?",
149+
))
150+
return self._mapping[key] # O(1)
151+
152+
def __delitem__(self, key): # O(1)
153+
"""If the item you requested already expires, KeyError will be raised."""
154+
self._validate_key(key)
155+
with self._lock:
156+
# Skip self._maintenance(), because it would need O(logN) time
157+
self._mapping.pop(key, None) # O(1)
158+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
159+
del timestamps[key] # O(1)
160+
self._mapping[self._INDEX] = sequence, timestamps
161+
162+
def __len__(self): # O(logN)
163+
"""Drop all expired items and return the remaining length"""
164+
with self._lock:
165+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
166+
self._maintenance(sequence, timestamps) # O(logN)
167+
self._mapping[self._INDEX] = sequence, timestamps
168+
return len(timestamps) # Faster than iter(self._mapping) when it is on disk
169+
170+
def __iter__(self):
171+
"""Drop all expired items and return an iterator of the remaining items"""
172+
with self._lock:
173+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
174+
self._maintenance(sequence, timestamps) # O(logN)
175+
self._mapping[self._INDEX] = sequence, timestamps
176+
return iter(timestamps) # Faster than iter(self._mapping) when it is on disk
177+
178+
179+
class _IndividualCache(object):
180+
# The code structure below can decorate both function and method.
181+
# It is inspired by https://stackoverflow.com/a/9417088
182+
# We may potentially switch to build upon
183+
# https://github.com/micheles/decorator/blob/master/docs/documentation.md#statement-of-the-problem
184+
def __init__(self, mapping=None, key_maker=None, expires_in=None):
185+
"""Constructs a cache decorator that allows item-by-item control on
186+
how to cache the return value of the decorated function.
187+
188+
:param MutableMapping mapping:
189+
The cached items will be stored inside.
190+
You'd want to use a ExpiringMapping
191+
if you plan to utilize the ``expires_in`` behavior.
192+
193+
If nothing is provided, an in-memory dict will be used,
194+
but it will provide no expiry functionality.
195+
196+
.. note::
197+
198+
When using this class as a decorator,
199+
your mapping needs to be available at "compile" time,
200+
so it would typically be a global-, module- or class-level mapping::
201+
202+
module_mapping = {}
203+
204+
@IndividualCache(mapping=module_mapping, ...)
205+
def foo():
206+
...
207+
208+
If you want to use a mapping available only at run-time,
209+
you have to manually decorate your function at run-time, too::
210+
211+
def foo():
212+
...
213+
214+
def bar(runtime_mapping):
215+
foo = IndividualCache(mapping=runtime_mapping...)(foo)
216+
217+
:param callable key_maker:
218+
A callable which should have signature as
219+
``lambda function, args, kwargs: "return a string as key"``.
220+
221+
If key_maker happens to return ``None``, the cache will be bypassed,
222+
the underlying function will be invoked directly,
223+
and the invoke result will not be cached either.
224+
225+
:param callable expires_in:
226+
The default value is ``None``,
227+
which means the content being cached has no per-item expiry,
228+
and will subject to the underlying mapping's global expiry time.
229+
230+
It can be an integer indicating
231+
how many seconds the result will be cached.
232+
In particular, if the value is 0,
233+
it means the result expires after zero second (i.e. immediately),
234+
therefore the result will *not* be cached.
235+
(Mind the difference between ``expires_in=0`` and ``expires_in=None``.)
236+
237+
Or it can be a callable with the signature as
238+
``lambda function=function, args=args, kwargs=kwargs, result=result: 123``
239+
to calculate the expiry on the fly.
240+
Its return value will be interpreted in the same way as above.
241+
"""
242+
self._mapping = mapping if mapping is not None else {}
243+
self._key_maker = key_maker or (lambda function, args, kwargs: (
244+
function, # This default implementation uses function as part of key,
245+
# so that the cache is partitioned by function.
246+
# However, you could have many functions to use same namespace,
247+
# so different decorators could share same cache.
248+
args,
249+
tuple(kwargs.items()), # raw kwargs is not hashable
250+
))
251+
self._expires_in = expires_in
252+
253+
def __call__(self, function):
254+
255+
@wraps(function)
256+
def wrapper(*args, **kwargs):
257+
key = self._key_maker(function, args, kwargs)
258+
if key is None: # Then bypass the cache
259+
return function(*args, **kwargs)
260+
261+
now = int(time.time())
262+
try:
263+
return self._mapping[key]
264+
except KeyError:
265+
# We choose to NOT call function(...) in this block, otherwise
266+
# potential exception from function(...) would become a confusing
267+
# "During handling of the above exception, another exception occurred"
268+
pass
269+
value = function(*args, **kwargs)
270+
271+
expires_in = self._expires_in(
272+
function=function,
273+
args=args,
274+
kwargs=kwargs,
275+
result=value,
276+
) if callable(self._expires_in) else self._expires_in
277+
if expires_in == 0:
278+
return value
279+
if expires_in is None:
280+
self._mapping[key] = value
281+
else:
282+
self._mapping.set(key, value, expires_in)
283+
return value
284+
285+
return wrapper
286+

0 commit comments

Comments
 (0)