1
1
"""Routines related to PyPI, indexes"""
2
2
3
3
import binascii
4
+ import bz2
4
5
import datetime
5
6
import enum
6
7
import functools
7
8
import itertools
9
+ import json
8
10
import logging
9
11
import os
10
12
import re
14
16
from pathlib import Path
15
17
from typing import (
16
18
TYPE_CHECKING ,
19
+ Any ,
20
+ Callable ,
17
21
Dict ,
18
22
FrozenSet ,
19
23
Iterable ,
30
34
from pip ._vendor .packaging .version import InvalidVersion , _BaseVersion
31
35
from pip ._vendor .packaging .version import parse as parse_version
32
36
33
- from pip ._internal .cache import FetchResolveCache
37
+ from pip ._internal .cache import FetchResolveCache , SerializableEntry
34
38
from pip ._internal .exceptions import (
35
39
BestVersionAlreadyInstalled ,
36
40
DistributionNotFound ,
40
44
from pip ._internal .index .collector import IndexContent , LinkCollector , parse_links
41
45
from pip ._internal .models .candidate import InstallationCandidate
42
46
from pip ._internal .models .format_control import FormatControl
43
- from pip ._internal .models .link import Link
47
+ from pip ._internal .models .link import Link , PersistentLinkCacheArgs
44
48
from pip ._internal .models .search_scope import SearchScope
45
49
from pip ._internal .models .selection_prefs import SelectionPreferences
46
50
from pip ._internal .models .target_python import TargetPython
@@ -123,13 +127,28 @@ class LinkType(enum.Enum):
123
127
requires_python_mismatch = enum .auto ()
124
128
125
129
126
- class LinkEvaluator :
130
+ class LinkEvaluator ( SerializableEntry ) :
127
131
"""
128
132
Responsible for evaluating links for a particular project.
129
133
"""
130
134
135
+ @classmethod
136
+ def suffix (cls ) -> str :
137
+ return ".evaluation"
138
+
131
139
_py_version_re = re .compile (r"-py([123]\.?[0-9]?)$" )
132
140
141
+ def serialize (self ) -> Dict [str , Any ]:
142
+ return {
143
+ "project_name" : self .project_name ,
144
+ "canonical_name" : self ._canonical_name ,
145
+ # Sort these for determinism.
146
+ "formats" : sorted (self ._formats ),
147
+ "target_python" : self ._target_python .format_given (),
148
+ "allow_yanked" : self ._allow_yanked ,
149
+ "ignore_requires_python" : self ._ignore_requires_python ,
150
+ }
151
+
133
152
# Don't include an allow_yanked default value to make sure each call
134
153
# site considers whether yanked releases are allowed. This also causes
135
154
# that decision to be made explicit in the calling code, which helps
@@ -594,6 +613,19 @@ def compute_best_candidate(
594
613
)
595
614
596
615
616
+ _FindCandidates = Callable [["PackageFinder" , str ], List [InstallationCandidate ]]
617
+
618
+
619
+ def _canonicalize_arg (func : _FindCandidates ) -> _FindCandidates :
620
+ @functools .wraps (func )
621
+ def wrapper (
622
+ self : "PackageFinder" , project_name : str
623
+ ) -> List [InstallationCandidate ]:
624
+ return func (self , canonicalize_name (project_name ))
625
+
626
+ return wrapper
627
+
628
+
597
629
class PackageFinder :
598
630
"""This finds packages.
599
631
@@ -954,6 +986,91 @@ def _write_http_cache_info(
954
986
955
987
return (new_etag , new_date , new_checksum , page_unmodified )
956
988
989
+ @staticmethod
990
+ def _try_load_parsed_links_cache (parsed_links_path : Path ) -> Optional [List [Link ]]:
991
+ page_links : Optional [List [Link ]] = None
992
+ try :
993
+ with bz2 .open (parsed_links_path , mode = "rt" , encoding = "utf-8" ) as f :
994
+ logger .debug ("reading page links from cache %s" , parsed_links_path )
995
+ cached_links = json .load (f )
996
+ page_links = []
997
+ for cache_info in cached_links :
998
+ link = Link .from_cache_args (
999
+ PersistentLinkCacheArgs .from_json (cache_info )
1000
+ )
1001
+ assert link is not None
1002
+ page_links .append (link )
1003
+ except (OSError , json .decoder .JSONDecodeError , KeyError ) as e :
1004
+ logger .debug (
1005
+ "could not read page links from cache file %s %s(%s)" ,
1006
+ parsed_links_path ,
1007
+ e .__class__ .__name__ ,
1008
+ str (e ),
1009
+ )
1010
+ return page_links
1011
+
1012
+ @staticmethod
1013
+ def _write_parsed_links_cache (
1014
+ parsed_links_path : Path , links : Iterable [Link ]
1015
+ ) -> List [Link ]:
1016
+ cacheable_links : List [Dict [str , Any ]] = []
1017
+ page_links : List [Link ] = []
1018
+ for link in links :
1019
+ cache_info = link .cache_args ()
1020
+ assert cache_info is not None
1021
+ cacheable_links .append (cache_info .to_json ())
1022
+ page_links .append (link )
1023
+
1024
+ logger .debug ("writing page links to %s" , parsed_links_path )
1025
+ with bz2 .open (parsed_links_path , mode = "wt" , encoding = "utf-8" ) as f :
1026
+ json .dump (cacheable_links , f )
1027
+
1028
+ return page_links
1029
+
1030
+ @staticmethod
1031
+ def _try_load_installation_candidate_cache (
1032
+ cached_candidates_path : Path ,
1033
+ ) -> Optional [List [InstallationCandidate ]]:
1034
+ try :
1035
+ with bz2 .open (cached_candidates_path , mode = "rt" , encoding = "utf-8" ) as f :
1036
+ serialized_candidates = json .load (f )
1037
+ logger .debug ("read serialized candidates from %s" , cached_candidates_path )
1038
+ package_links : List [InstallationCandidate ] = []
1039
+ for cand in serialized_candidates :
1040
+ link_cache_args = PersistentLinkCacheArgs .from_json (cand ["link" ])
1041
+ link = Link .from_cache_args (link_cache_args )
1042
+ package_links .append (
1043
+ InstallationCandidate (cand ["name" ], cand ["version" ], link )
1044
+ )
1045
+ return package_links
1046
+ except (OSError , json .decoder .JSONDecodeError , KeyError ) as e :
1047
+ logger .debug (
1048
+ "could not read cached candidates at %s %s(%s)" ,
1049
+ cached_candidates_path ,
1050
+ e .__class__ .__name__ ,
1051
+ str (e ),
1052
+ )
1053
+ return None
1054
+
1055
+ @staticmethod
1056
+ def _write_installation_candidate_cache (
1057
+ cached_candidates_path : Path ,
1058
+ candidates : Iterable [InstallationCandidate ],
1059
+ ) -> List [InstallationCandidate ]:
1060
+ candidates = list (candidates )
1061
+ serialized_candidates = [
1062
+ {
1063
+ "name" : candidate .name ,
1064
+ "version" : str (candidate .version ),
1065
+ "link" : candidate .link .cache_args ().to_json (),
1066
+ }
1067
+ for candidate in candidates
1068
+ ]
1069
+ with bz2 .open (cached_candidates_path , mode = "wt" , encoding = "utf-8" ) as f :
1070
+ logger .debug ("writing serialized candidates to %s" , cached_candidates_path )
1071
+ json .dump (serialized_candidates , f )
1072
+ return candidates
1073
+
957
1074
def _process_project_url_uncached (
958
1075
self , project_url : Link , link_evaluator : LinkEvaluator
959
1076
) -> List [InstallationCandidate ]:
@@ -972,7 +1089,6 @@ def _process_project_url_uncached(
972
1089
package_links = self .evaluate_links (link_evaluator , links = page_links )
973
1090
return package_links
974
1091
975
- @functools .lru_cache (maxsize = None )
976
1092
def process_project_url (
977
1093
self , project_url : Link , link_evaluator : LinkEvaluator
978
1094
) -> List [InstallationCandidate ]:
@@ -985,6 +1101,10 @@ def process_project_url(
985
1101
etag_path = cached_path / "etag"
986
1102
date_path = cached_path / "modified-since-date"
987
1103
checksum_path = cached_path / "checksum"
1104
+ parsed_links_path = cached_path / "parsed-links"
1105
+ cached_candidates_path = self ._fetch_resolve_cache .hashed_entry_path (
1106
+ project_url , link_evaluator
1107
+ )
988
1108
989
1109
headers : Dict [str , str ] = {}
990
1110
# NB: mutates headers!
@@ -1021,16 +1141,45 @@ def process_project_url(
1021
1141
prev_checksum = prev_checksum ,
1022
1142
)
1023
1143
1024
- page_links = parse_links (index_response )
1144
+ page_links : Optional [List [Link ]] = None
1145
+ # Only try our persistent link parsing and evaluation caches if we know the page
1146
+ # was unmodified via checksum.
1147
+ if page_unmodified :
1148
+ cached_candidates = self ._try_load_installation_candidate_cache (
1149
+ cached_candidates_path
1150
+ )
1151
+ if cached_candidates is not None :
1152
+ return cached_candidates
1153
+
1154
+ page_links = self ._try_load_parsed_links_cache (parsed_links_path )
1155
+ else :
1156
+ try :
1157
+ parsed_links_path .unlink ()
1158
+ except OSError :
1159
+ pass
1160
+ self ._fetch_resolve_cache .clear_hashed_entries (project_url , LinkEvaluator )
1161
+
1162
+ if page_links is None :
1163
+ logger .debug (
1164
+ "extracting new parsed links from index response %s" , index_response
1165
+ )
1166
+ page_links = self ._write_parsed_links_cache (
1167
+ parsed_links_path ,
1168
+ parse_links (index_response ),
1169
+ )
1025
1170
1026
1171
with indent_log ():
1027
- package_links = self .evaluate_links (
1028
- link_evaluator ,
1029
- links = page_links ,
1172
+ package_links = self ._write_installation_candidate_cache (
1173
+ cached_candidates_path ,
1174
+ self .evaluate_links (
1175
+ link_evaluator ,
1176
+ links = page_links ,
1177
+ ),
1030
1178
)
1031
1179
1032
1180
return package_links
1033
1181
1182
+ @_canonicalize_arg
1034
1183
@functools .lru_cache (maxsize = None )
1035
1184
def find_all_candidates (self , project_name : str ) -> List [InstallationCandidate ]:
1036
1185
"""Find all available InstallationCandidate for project_name
0 commit comments