70
70
71
71
class UrlParseTestCase (unittest .TestCase ):
72
72
73
- def checkRoundtrips (self , url , parsed , split ):
73
+ def checkRoundtrips (self , url , parsed , split , url2 = None ):
74
+ if url2 is None :
75
+ url2 = url
74
76
result = urllib .parse .urlparse (url )
75
77
self .assertEqual (result , parsed )
76
78
t = (result .scheme , result .netloc , result .path ,
77
79
result .params , result .query , result .fragment )
78
80
self .assertEqual (t , parsed )
79
81
# put it back together and it should be the same
80
82
result2 = urllib .parse .urlunparse (result )
81
- self .assertEqual (result2 , url )
82
- self .assertEqual (result2 , result .geturl ())
83
+ self .assertEqual (result2 , url2 )
83
84
84
85
# the result of geturl() is a fixpoint; we can always parse it
85
86
# again to get the same result:
@@ -104,8 +105,7 @@ def checkRoundtrips(self, url, parsed, split):
104
105
result .query , result .fragment )
105
106
self .assertEqual (t , split )
106
107
result2 = urllib .parse .urlunsplit (result )
107
- self .assertEqual (result2 , url )
108
- self .assertEqual (result2 , result .geturl ())
108
+ self .assertEqual (result2 , url2 )
109
109
110
110
# check the fixpoint property of re-parsing the result of geturl()
111
111
result3 = urllib .parse .urlsplit (result .geturl ())
@@ -142,9 +142,39 @@ def test_qs(self):
142
142
143
143
def test_roundtrips (self ):
144
144
str_cases = [
145
+ ('path/to/file' ,
146
+ ('' , '' , 'path/to/file' , '' , '' , '' ),
147
+ ('' , '' , 'path/to/file' , '' , '' )),
148
+ ('/path/to/file' ,
149
+ ('' , '' , '/path/to/file' , '' , '' , '' ),
150
+ ('' , '' , '/path/to/file' , '' , '' )),
151
+ ('//path/to/file' ,
152
+ ('' , 'path' , '/to/file' , '' , '' , '' ),
153
+ ('' , 'path' , '/to/file' , '' , '' )),
154
+ ('////path/to/file' ,
155
+ ('' , '' , '//path/to/file' , '' , '' , '' ),
156
+ ('' , '' , '//path/to/file' , '' , '' )),
157
+ ('scheme:path/to/file' ,
158
+ ('scheme' , '' , 'path/to/file' , '' , '' , '' ),
159
+ ('scheme' , '' , 'path/to/file' , '' , '' )),
160
+ ('scheme:/path/to/file' ,
161
+ ('scheme' , '' , '/path/to/file' , '' , '' , '' ),
162
+ ('scheme' , '' , '/path/to/file' , '' , '' )),
163
+ ('scheme://path/to/file' ,
164
+ ('scheme' , 'path' , '/to/file' , '' , '' , '' ),
165
+ ('scheme' , 'path' , '/to/file' , '' , '' )),
166
+ ('scheme:////path/to/file' ,
167
+ ('scheme' , '' , '//path/to/file' , '' , '' , '' ),
168
+ ('scheme' , '' , '//path/to/file' , '' , '' )),
145
169
('file:///tmp/junk.txt' ,
146
170
('file' , '' , '/tmp/junk.txt' , '' , '' , '' ),
147
171
('file' , '' , '/tmp/junk.txt' , '' , '' )),
172
+ ('file:////tmp/junk.txt' ,
173
+ ('file' , '' , '//tmp/junk.txt' , '' , '' , '' ),
174
+ ('file' , '' , '//tmp/junk.txt' , '' , '' )),
175
+ ('file://///tmp/junk.txt' ,
176
+ ('file' , '' , '///tmp/junk.txt' , '' , '' , '' ),
177
+ ('file' , '' , '///tmp/junk.txt' , '' , '' )),
148
178
('imap://mail.python.org/mbox1' ,
149
179
('imap' , 'mail.python.org' , '/mbox1' , '' , '' , '' ),
150
180
('imap' , 'mail.python.org' , '/mbox1' , '' , '' )),
@@ -175,6 +205,38 @@ def _encode(t):
175
205
for url , parsed , split in str_cases + bytes_cases :
176
206
self .checkRoundtrips (url , parsed , split )
177
207
208
+ def test_roundtrips_normalization (self ):
209
+ str_cases = [
210
+ ('///path/to/file' ,
211
+ '/path/to/file' ,
212
+ ('' , '' , '/path/to/file' , '' , '' , '' ),
213
+ ('' , '' , '/path/to/file' , '' , '' )),
214
+ ('scheme:///path/to/file' ,
215
+ 'scheme:/path/to/file' ,
216
+ ('scheme' , '' , '/path/to/file' , '' , '' , '' ),
217
+ ('scheme' , '' , '/path/to/file' , '' , '' )),
218
+ ('file:/tmp/junk.txt' ,
219
+ 'file:///tmp/junk.txt' ,
220
+ ('file' , '' , '/tmp/junk.txt' , '' , '' , '' ),
221
+ ('file' , '' , '/tmp/junk.txt' , '' , '' )),
222
+ ('http:/tmp/junk.txt' ,
223
+ 'http:///tmp/junk.txt' ,
224
+ ('http' , '' , '/tmp/junk.txt' , '' , '' , '' ),
225
+ ('http' , '' , '/tmp/junk.txt' , '' , '' )),
226
+ ('https:/tmp/junk.txt' ,
227
+ 'https:///tmp/junk.txt' ,
228
+ ('https' , '' , '/tmp/junk.txt' , '' , '' , '' ),
229
+ ('https' , '' , '/tmp/junk.txt' , '' , '' )),
230
+ ]
231
+ def _encode (t ):
232
+ return (t [0 ].encode ('ascii' ),
233
+ t [1 ].encode ('ascii' ),
234
+ tuple (x .encode ('ascii' ) for x in t [2 ]),
235
+ tuple (x .encode ('ascii' ) for x in t [3 ]))
236
+ bytes_cases = [_encode (x ) for x in str_cases ]
237
+ for url , url2 , parsed , split in str_cases + bytes_cases :
238
+ self .checkRoundtrips (url , parsed , split , url2 )
239
+
178
240
def test_http_roundtrips (self ):
179
241
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
180
242
# so we test both 'http:' and 'https:' in all the following.
0 commit comments