@@ -25,6 +25,168 @@ public static function can_parse( $url, $base = null ) {
2525 return URL ::canParse ( $ url , $ base );
2626 }
2727
28+ /**
29+ * Replaces the "base" of a URL — scheme, host (and port), and the portion of the path that
30+ * belongs to the old base — with a new base while keeping the remainder of the URL intact.
31+ *
32+ * This is intended for content migrations, where URLs embedded in block markup, HTML attributes,
33+ * or inline text must be moved from one site root to another without losing the rest of the path,
34+ * query, or fragment. It handles simple domain swaps, ports, and deep path bases. When the old
35+ * base includes path segments, only that matched prefix is substituted and the unmatched tail is
36+ * carried over to the target base.
37+ *
38+ * For example:
39+ * * URL: https://example.com/a/b/c/d/e/f/g/h/i/j/page/
40+ * * Old base: https://example.com/a/b/c/d/e/f/
41+ * * New base: https://example.org/docs/
42+ * * Result: https://example.org/docs/g/h/i/j/page/
43+ *
44+ * ## Trailing slash handling
45+ *
46+ * Trailing slash style is preserved from the original URL. If it has no trailing slash, the
47+ * result will also omit the trailing slash and vice versa.
48+ *
49+ * For example, here the final result has no trailing slash:
50+ * * URL: https://example.com/uploads/file.txt
51+ * * Old base: https://example.com/uploads/
52+ * * New base: https://example.org/docs/
53+ * * Result: https://example.org/docs/file.txt
54+ *
55+ * And here it does:
56+ * * URL: https://example.com/uploads/2018/
57+ * * Old base: https://example.com/uploads/
58+ * * New base: https://example.org/docs/
59+ * * Result: https://example.org/docs/2018/
60+ *
61+ * ## URL-encoded path segments
62+ *
63+ * URL-encoded path segments are respected and not inadvertently decoded or re-encoded. Only the
64+ * matched base prefix is considered for alignment, so inputs that contain percent-encoded content
65+ * keep that content exactly as-is in the output. This prevents data corruption in tricky cases such
66+ * as "/~jappleseed/1997.10.1/%2561-reasons-to-migrate-data/" where the "%2561" must remain
67+ * double-escaped after the move.
68+ *
69+ * ## Relative URLs
70+ *
71+ * This method can preserve the relative nature of the original URL. Say you are processing a markup
72+ * that contains `<a href="/uploads/file.txt">`. The original URL string is "/uploads/file.txt",
73+ * and the URL actually resolves to "https://example.com/uploads/file.txt". If you want to replace
74+ * the base URL from "https://example.com/uploads/" to "https://newsite.com/files/" but keep the
75+ * URL relative, you can pass the raw URL string via the "raw_url" option.
76+ *
77+ * For example:
78+ * * URL: https://example.com/uploads/file.txt
79+ * * Raw URL: /uploads/file.txt
80+ * * Old base: https://example.com/uploads/
81+ * * New base: https://example.org/files/
82+ * * Result: /files/file.txt
83+ *
84+ * The method also supports relative inputs commonly found in markup. If you pass the raw URL
85+ * string via the "raw_url" option, the method can infer whether the author originally wrote a
86+ * relative URL like "docs/page.html" or an absolute one. You may also explicitly
87+ * assert relativity with "is_relative" to avoid inference.
88+ *
89+ * @param string|URL $url The URL to replace the base of.
90+ * @param array $options Associative options: old_base_url, new_base_url; optional raw_url.
91+ * @return ConvertedUrl|false Returns a ConvertedUrl value object on success, or false when parsing
92+ * or replacement cannot be performed.
93+ */
94+ public static function replace_base_url ( $ url , $ options ) {
95+ if ( ! is_array ( $ options ) ) {
96+ return false ;
97+ }
98+
99+ foreach ( array ( 'old_base_url ' , 'new_base_url ' ) as $ required ) {
100+ if ( ! array_key_exists ( $ required , $ options ) || null === $ options [ $ required ] ) {
101+ return false ;
102+ }
103+ }
104+
105+ $ old_base_url = self ::parse ( $ options ['old_base_url ' ] );
106+ $ new_base_url = self ::parse ( $ options ['new_base_url ' ] );
107+ $ url = self ::parse ( $ url , $ old_base_url ? $ old_base_url ->toString () : null );
108+
109+ if ( false === $ old_base_url || false === $ new_base_url || false === $ url ) {
110+ return false ;
111+ }
112+
113+ $ updated_url = clone $ url ;
114+
115+ $ updated_url ->hostname = $ new_base_url ->hostname ;
116+ $ updated_url ->protocol = $ new_base_url ->protocol ;
117+ $ updated_url ->port = $ new_base_url ->port ;
118+
119+ $ from_pathname = $ url ->pathname ;
120+ $ to_pathname = $ new_base_url ->pathname ;
121+ $ base_pathname = $ old_base_url ->pathname ;
122+
123+ if ( $ base_pathname !== $ to_pathname ) {
124+ $ base_pathname_with_trailing_slash = rtrim ( $ base_pathname , '/ ' ) . '/ ' ;
125+ $ decoded_matched_pathname = urldecode_n (
126+ $ from_pathname ,
127+ strlen ( $ base_pathname_with_trailing_slash )
128+ );
129+ $ to_pathname_with_trailing_slash = rtrim ( $ to_pathname , '/ ' ) . '/ ' ;
130+ $ remaining_pathname = substr (
131+ $ decoded_matched_pathname ,
132+ strlen ( $ base_pathname_with_trailing_slash )
133+ );
134+
135+ $ updated_url ->pathname = $ to_pathname_with_trailing_slash . $ remaining_pathname ;
136+ }
137+
138+ /*
139+ * Stylistic choice – if the updated URL has no trailing slash,
140+ * do not add it to the new URL. The WHATWG URL parser will
141+ * add one automatically if the path is empty, so we have to
142+ * explicitly remove it.
143+ */
144+ $ new_raw_url = $ updated_url ->toString ();
145+ $ should_trim_trailing_slash = (
146+ '' !== $ from_pathname &&
147+ '/ ' !== substr ( $ from_pathname , -1 ) &&
148+ '/ ' !== $ from_pathname &&
149+ '' === $ url ->search &&
150+ '' === $ url ->hash
151+ );
152+ if ( $ should_trim_trailing_slash ) {
153+ $ new_raw_url = rtrim ( $ new_raw_url , '/ ' );
154+ }
155+ if ( ! $ new_raw_url ) {
156+ // This may technically happen, but does it happen in practice?
157+ return false ;
158+ }
159+
160+ $ converted_url = new ConvertedUrl ();
161+ $ converted_url ->new_url = $ updated_url ;
162+ $ converted_url ->new_raw_url = $ new_raw_url ;
163+
164+ // Preserve the relative nature of the original URL.
165+ if ( array_key_exists ( 'raw_url ' , $ options ) && is_string ( $ options ['raw_url ' ] ) ) {
166+ if ( ! array_key_exists ( 'is_relative ' , $ options ) ) {
167+ $ options ['is_relative ' ] = self ::can_parse ( $ options ['raw_url ' ] );
168+ }
169+ if ( $ options ['is_relative ' ] ) {
170+ $ relative_url = $ updated_url ->pathname ;
171+ // Remove the trailing slash if it's not the root path.
172+ if ( strlen ( $ relative_url ) > 1 && $ should_trim_trailing_slash ) {
173+ $ relative_url = rtrim ( $ relative_url , '/ ' );
174+ }
175+ if ( '' !== $ updated_url ->search ) {
176+ $ relative_url .= $ updated_url ->search ;
177+ }
178+ if ( '' !== $ updated_url ->hash ) {
179+ $ relative_url .= $ updated_url ->hash ;
180+ }
181+
182+ $ converted_url ->was_relative = true ;
183+ $ converted_url ->new_raw_relative_url = $ relative_url ;
184+ }
185+ }
186+
187+ return $ converted_url ;
188+ }
189+
28190 /**
29191 * Prepends a protocol to any matched URL without the double slash.
30192 *
0 commit comments