Skip to content

Commit d1a4395

Browse files
committed
Merge branch 'fix-cjs' of github.com:privatenumber/parse-url into new-version
2 parents 9cacf38 + c2735c9 commit d1a4395

File tree

5 files changed

+498
-9
lines changed

5 files changed

+498
-9
lines changed

dist/index.js

Lines changed: 248 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,257 @@
11
'use strict';
22

33
var parsePath = require('parse-path');
4-
var normalizeUrl = require('normalize-url');
54

65
function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
76

87
var parsePath__default = /*#__PURE__*/_interopDefaultLegacy(parsePath);
9-
var normalizeUrl__default = /*#__PURE__*/_interopDefaultLegacy(normalizeUrl);
8+
9+
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
10+
const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain';
11+
const DATA_URL_DEFAULT_CHARSET = 'us-ascii';
12+
13+
const testParameter = (name, filters) => filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
14+
15+
const normalizeDataURL = (urlString, {stripHash}) => {
16+
const match = /^data:(?<type>[^,]*?),(?<data>[^#]*?)(?:#(?<hash>.*))?$/.exec(urlString);
17+
18+
if (!match) {
19+
throw new Error(`Invalid URL: ${urlString}`);
20+
}
21+
22+
let {type, data, hash} = match.groups;
23+
const mediaType = type.split(';');
24+
hash = stripHash ? '' : hash;
25+
26+
let isBase64 = false;
27+
if (mediaType[mediaType.length - 1] === 'base64') {
28+
mediaType.pop();
29+
isBase64 = true;
30+
}
31+
32+
// Lowercase MIME type
33+
const mimeType = (mediaType.shift() || '').toLowerCase();
34+
const attributes = mediaType
35+
.map(attribute => {
36+
let [key, value = ''] = attribute.split('=').map(string => string.trim());
37+
38+
// Lowercase `charset`
39+
if (key === 'charset') {
40+
value = value.toLowerCase();
41+
42+
if (value === DATA_URL_DEFAULT_CHARSET) {
43+
return '';
44+
}
45+
}
46+
47+
return `${key}${value ? `=${value}` : ''}`;
48+
})
49+
.filter(Boolean);
50+
51+
const normalizedMediaType = [
52+
...attributes,
53+
];
54+
55+
if (isBase64) {
56+
normalizedMediaType.push('base64');
57+
}
58+
59+
if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) {
60+
normalizedMediaType.unshift(mimeType);
61+
}
62+
63+
return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`;
64+
};
65+
66+
function normalizeUrl(urlString, options) {
67+
options = {
68+
defaultProtocol: 'http:',
69+
normalizeProtocol: true,
70+
forceHttp: false,
71+
forceHttps: false,
72+
stripAuthentication: true,
73+
stripHash: false,
74+
stripTextFragment: true,
75+
stripWWW: true,
76+
removeQueryParameters: [/^utm_\w+/i],
77+
removeTrailingSlash: true,
78+
removeSingleSlash: true,
79+
removeDirectoryIndex: false,
80+
sortQueryParameters: true,
81+
...options,
82+
};
83+
84+
urlString = urlString.trim();
85+
86+
// Data URL
87+
if (/^data:/i.test(urlString)) {
88+
return normalizeDataURL(urlString, options);
89+
}
90+
91+
if (/^view-source:/i.test(urlString)) {
92+
throw new Error('`view-source:` is not supported as it is a non-standard protocol');
93+
}
94+
95+
const hasRelativeProtocol = urlString.startsWith('//');
96+
const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
97+
98+
// Prepend protocol
99+
if (!isRelativeUrl) {
100+
urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol);
101+
}
102+
103+
const urlObject = new URL(urlString);
104+
105+
if (options.forceHttp && options.forceHttps) {
106+
throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
107+
}
108+
109+
if (options.forceHttp && urlObject.protocol === 'https:') {
110+
urlObject.protocol = 'http:';
111+
}
112+
113+
if (options.forceHttps && urlObject.protocol === 'http:') {
114+
urlObject.protocol = 'https:';
115+
}
116+
117+
// Remove auth
118+
if (options.stripAuthentication) {
119+
urlObject.username = '';
120+
urlObject.password = '';
121+
}
122+
123+
// Remove hash
124+
if (options.stripHash) {
125+
urlObject.hash = '';
126+
} else if (options.stripTextFragment) {
127+
urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, '');
128+
}
129+
130+
// Remove duplicate slashes if not preceded by a protocol
131+
// NOTE: This could be implemented using a single negative lookbehind
132+
// regex, but we avoid that to maintain compatibility with older js engines
133+
// which do not have support for that feature.
134+
if (urlObject.pathname) {
135+
// TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(?<!\b[a-z][a-z\d+\-.]{1,50}:)\/{2,}/g, '/');` when Safari supports negative lookbehind.
136+
137+
// Split the string by occurrences of this protocol regex, and perform
138+
// duplicate-slash replacement on the strings between those occurrences
139+
// (if any).
140+
const protocolRegex = /\b[a-z][a-z\d+\-.]{1,50}:\/\//g;
141+
142+
let lastIndex = 0;
143+
let result = '';
144+
for (;;) {
145+
const match = protocolRegex.exec(urlObject.pathname);
146+
if (!match) {
147+
break;
148+
}
149+
150+
const protocol = match[0];
151+
const protocolAtIndex = match.index;
152+
const intermediate = urlObject.pathname.slice(lastIndex, protocolAtIndex);
153+
154+
result += intermediate.replace(/\/{2,}/g, '/');
155+
result += protocol;
156+
lastIndex = protocolAtIndex + protocol.length;
157+
}
158+
159+
const remnant = urlObject.pathname.slice(lastIndex, urlObject.pathname.length);
160+
result += remnant.replace(/\/{2,}/g, '/');
161+
162+
urlObject.pathname = result;
163+
}
164+
165+
// Decode URI octets
166+
if (urlObject.pathname) {
167+
try {
168+
urlObject.pathname = decodeURI(urlObject.pathname);
169+
} catch {}
170+
}
171+
172+
// Remove directory index
173+
if (options.removeDirectoryIndex === true) {
174+
options.removeDirectoryIndex = [/^index\.[a-z]+$/];
175+
}
176+
177+
if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
178+
let pathComponents = urlObject.pathname.split('/');
179+
const lastComponent = pathComponents[pathComponents.length - 1];
180+
181+
if (testParameter(lastComponent, options.removeDirectoryIndex)) {
182+
pathComponents = pathComponents.slice(0, -1);
183+
urlObject.pathname = pathComponents.slice(1).join('/') + '/';
184+
}
185+
}
186+
187+
if (urlObject.hostname) {
188+
// Remove trailing dot
189+
urlObject.hostname = urlObject.hostname.replace(/\.$/, '');
190+
191+
// Remove `www.`
192+
if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) {
193+
// Each label should be max 63 at length (min: 1).
194+
// Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
195+
// Each TLD should be up to 63 characters long (min: 2).
196+
// It is technically possible to have a single character TLD, but none currently exist.
197+
urlObject.hostname = urlObject.hostname.replace(/^www\./, '');
198+
}
199+
}
200+
201+
// Remove query unwanted parameters
202+
if (Array.isArray(options.removeQueryParameters)) {
203+
// eslint-disable-next-line unicorn/no-useless-spread -- We are intentionally spreading to get a copy.
204+
for (const key of [...urlObject.searchParams.keys()]) {
205+
if (testParameter(key, options.removeQueryParameters)) {
206+
urlObject.searchParams.delete(key);
207+
}
208+
}
209+
}
210+
211+
if (options.removeQueryParameters === true) {
212+
urlObject.search = '';
213+
}
214+
215+
// Sort query parameters
216+
if (options.sortQueryParameters) {
217+
urlObject.searchParams.sort();
218+
219+
// Calling `.sort()` encodes the search parameters, so we need to decode them again.
220+
try {
221+
urlObject.search = decodeURIComponent(urlObject.search);
222+
} catch {}
223+
}
224+
225+
if (options.removeTrailingSlash) {
226+
urlObject.pathname = urlObject.pathname.replace(/\/$/, '');
227+
}
228+
229+
const oldUrlString = urlString;
230+
231+
// Take advantage of many of the Node `url` normalizations
232+
urlString = urlObject.toString();
233+
234+
if (!options.removeSingleSlash && urlObject.pathname === '/' && !oldUrlString.endsWith('/') && urlObject.hash === '') {
235+
urlString = urlString.replace(/\/$/, '');
236+
}
237+
238+
// Remove ending `/` unless removeSingleSlash is false
239+
if ((options.removeTrailingSlash || urlObject.pathname === '/') && urlObject.hash === '' && options.removeSingleSlash) {
240+
urlString = urlString.replace(/\/$/, '');
241+
}
242+
243+
// Restore relative protocol, if applicable
244+
if (hasRelativeProtocol && !options.normalizeProtocol) {
245+
urlString = urlString.replace(/^http:\/\//, '//');
246+
}
247+
248+
// Remove http/https
249+
if (options.stripProtocol) {
250+
urlString = urlString.replace(/^(?:https?:)?\/\//, '');
251+
}
252+
253+
return urlString;
254+
}
10255

11256
// Dependencies
12257

@@ -65,7 +310,7 @@ const parseUrl = (url, normalize = false) => {
65310
stripHash: false
66311
};
67312
}
68-
url = normalizeUrl__default["default"](url, normalize);
313+
url = normalizeUrl(url, normalize);
69314
}
70315

71316
const parsed = parsePath__default["default"](url);

0 commit comments

Comments
 (0)