3
3
import enum
4
4
import functools
5
5
import itertools
6
+ import json
6
7
import logging
7
8
import os
8
9
import re
9
10
from hashlib import sha256
10
11
from pathlib import Path
11
12
from typing import (
12
13
TYPE_CHECKING ,
14
+ Any ,
13
15
Dict ,
14
16
FrozenSet ,
15
17
Iterable ,
26
28
from pip ._vendor .packaging .version import _BaseVersion
27
29
from pip ._vendor .packaging .version import parse as parse_version
28
30
29
- from pip ._internal .cache import FetchResolveCache
31
+ from pip ._internal .cache import FetchResolveCache , SerializableEntry
30
32
from pip ._internal .exceptions import (
31
33
BestVersionAlreadyInstalled ,
32
34
DistributionNotFound ,
36
38
from pip ._internal .index .collector import IndexContent , LinkCollector , parse_links
37
39
from pip ._internal .models .candidate import InstallationCandidate
38
40
from pip ._internal .models .format_control import FormatControl
39
- from pip ._internal .models .link import Link
41
+ from pip ._internal .models .link import Link , PersistentLinkCacheArgs
40
42
from pip ._internal .models .search_scope import SearchScope
41
43
from pip ._internal .models .selection_prefs import SelectionPreferences
42
44
from pip ._internal .models .target_python import TargetPython
@@ -119,14 +121,41 @@ class LinkType(enum.Enum):
119
121
requires_python_mismatch = enum .auto ()
120
122
121
123
122
- class LinkEvaluator :
124
+ class LinkEvaluator ( SerializableEntry ) :
123
125
124
126
"""
125
127
Responsible for evaluating links for a particular project.
126
128
"""
127
129
130
+ @classmethod
131
+ def suffix (cls ) -> str :
132
+ return ".evaluation"
133
+
128
134
_py_version_re = re .compile (r"-py([123]\.?[0-9]?)$" )
129
135
136
+ def serialize (self ) -> Dict [str , Any ]:
137
+ return dict (
138
+ project_name = self .project_name ,
139
+ canonical_name = self ._canonical_name ,
140
+ # Sort these for determinism.
141
+ formats = sorted (self ._formats ),
142
+ target_python = self ._target_python .format_given (),
143
+ allow_yanked = self ._allow_yanked ,
144
+ ignore_requires_python = self ._ignore_requires_python ,
145
+ )
146
+
147
+ def to_json (self ) -> str :
148
+ return json .dumps (self .serialize (), sort_keys = True )
149
+
150
+ def __eq__ (self , other : Any ) -> bool :
151
+ return isinstance (other , type (self )) and self .to_json () == other .to_json ()
152
+
153
+ def __ne__ (self , other : Any ) -> bool :
154
+ return not self == other
155
+
156
+ def __hash__ (self ) -> int :
157
+ return hash (self .to_json ())
158
+
130
159
# Don't include an allow_yanked default value to make sure each call
131
160
# site considers whether yanked releases are allowed. This also causes
132
161
# that decision to be made explicit in the calling code, which helps
@@ -900,6 +929,91 @@ def _write_http_cache_info(
900
929
901
930
return (new_etag , new_date , new_checksum , page_unmodified )
902
931
932
+ @staticmethod
933
+ def _try_load_parsed_links_cache (parsed_links_path : Path ) -> Optional [List [Link ]]:
934
+ page_links : Optional [List [Link ]] = None
935
+ try :
936
+ with parsed_links_path .open ("r" ) as f :
937
+ logger .debug ("reading page links from cache %s" , parsed_links_path )
938
+ cached_links = json .load (f )
939
+ page_links = []
940
+ for cache_info in cached_links :
941
+ link = Link .from_cache_args (
942
+ PersistentLinkCacheArgs .from_json (cache_info )
943
+ )
944
+ assert link is not None
945
+ page_links .append (link )
946
+ except (OSError , json .decoder .JSONDecodeError , KeyError ) as e :
947
+ logger .debug (
948
+ "could not read page links from cache file %s %s(%s)" ,
949
+ parsed_links_path ,
950
+ e .__class__ .__name__ ,
951
+ str (e ),
952
+ )
953
+ return page_links
954
+
955
+ @staticmethod
956
+ def _write_parsed_links_cache (
957
+ parsed_links_path : Path , links : Iterable [Link ]
958
+ ) -> List [Link ]:
959
+ cacheable_links : List [Dict [str , Any ]] = []
960
+ page_links : List [Link ] = []
961
+ for link in links :
962
+ cache_info = link .cache_args ()
963
+ assert cache_info is not None
964
+ cacheable_links .append (cache_info .to_json ())
965
+ page_links .append (link )
966
+
967
+ logger .debug ("writing page links to %s" , parsed_links_path )
968
+ with parsed_links_path .open ("w" ) as f :
969
+ json .dump (cacheable_links , f )
970
+
971
+ return page_links
972
+
973
+ @staticmethod
974
+ def _try_load_installation_candidate_cache (
975
+ cached_candidates_path : Path ,
976
+ ) -> Optional [List [InstallationCandidate ]]:
977
+ try :
978
+ with cached_candidates_path .open ("r" ) as f :
979
+ serialized_candidates = json .load (f )
980
+ logger .debug ("read serialized candidates from %s" , cached_candidates_path )
981
+ package_links : List [InstallationCandidate ] = []
982
+ for cand in serialized_candidates :
983
+ link_cache_args = PersistentLinkCacheArgs .from_json (cand ["link" ])
984
+ link = Link .from_cache_args (link_cache_args )
985
+ package_links .append (
986
+ InstallationCandidate (cand ["name" ], cand ["version" ], link )
987
+ )
988
+ return package_links
989
+ except (OSError , json .decoder .JSONDecodeError , KeyError ) as e :
990
+ logger .debug (
991
+ "could not read cached candidates at %s %s(%s)" ,
992
+ cached_candidates_path ,
993
+ e .__class__ .__name__ ,
994
+ str (e ),
995
+ )
996
+ return None
997
+
998
+ @staticmethod
999
+ def _write_installation_candidate_cache (
1000
+ cached_candidates_path : Path ,
1001
+ candidates : Iterable [InstallationCandidate ],
1002
+ ) -> List [InstallationCandidate ]:
1003
+ candidates = list (candidates )
1004
+ serialized_candidates = [
1005
+ dict (
1006
+ name = candidate .name ,
1007
+ version = str (candidate .version ),
1008
+ link = candidate .link .cache_args ().to_json (),
1009
+ )
1010
+ for candidate in candidates
1011
+ ]
1012
+ with cached_candidates_path .open ("w" ) as f :
1013
+ logger .debug ("writing serialized candidates to %s" , f .name )
1014
+ json .dump (serialized_candidates , f )
1015
+ return candidates
1016
+
903
1017
def _process_project_url_uncached (
904
1018
self , project_url : Link , link_evaluator : LinkEvaluator
905
1019
) -> List [InstallationCandidate ]:
@@ -926,6 +1040,10 @@ def process_project_url(
926
1040
etag_path = cached_path / "etag"
927
1041
date_path = cached_path / "modified-since-date"
928
1042
checksum_path = cached_path / "checksum"
1043
+ parsed_links_path = cached_path / "parsed-links"
1044
+ cached_candidates_path = self ._fetch_resolve_cache .hashed_entry_path (
1045
+ project_url , link_evaluator
1046
+ )
929
1047
930
1048
headers : Dict [str , str ] = {}
931
1049
# NB: mutates headers!
@@ -962,12 +1080,40 @@ def process_project_url(
962
1080
prev_checksum = prev_checksum ,
963
1081
)
964
1082
965
- page_links = parse_links (index_response )
1083
+ page_links : Optional [List [Link ]] = None
1084
+ # Only try our persistent link parsing and evaluation caches if we know the page
1085
+ # was unmodified via checksum.
1086
+ if page_unmodified :
1087
+ cached_candidates = self ._try_load_installation_candidate_cache (
1088
+ cached_candidates_path
1089
+ )
1090
+ if cached_candidates is not None :
1091
+ return cached_candidates
1092
+
1093
+ page_links = self ._try_load_parsed_links_cache (parsed_links_path )
1094
+ else :
1095
+ try :
1096
+ parsed_links_path .unlink ()
1097
+ except OSError :
1098
+ pass
1099
+ self ._fetch_resolve_cache .clear_hashed_entries (project_url , LinkEvaluator )
1100
+
1101
+ if page_links is None :
1102
+ logger .debug (
1103
+ "extracting new parsed links from index response %s" , index_response
1104
+ )
1105
+ page_links = self ._write_parsed_links_cache (
1106
+ parsed_links_path ,
1107
+ parse_links (index_response ),
1108
+ )
966
1109
967
1110
with indent_log ():
968
- package_links = self .evaluate_links (
969
- link_evaluator ,
970
- links = page_links ,
1111
+ package_links = self ._write_installation_candidate_cache (
1112
+ cached_candidates_path ,
1113
+ self .evaluate_links (
1114
+ link_evaluator ,
1115
+ links = page_links ,
1116
+ ),
971
1117
)
972
1118
973
1119
return package_links
0 commit comments