@@ -10,7 +10,7 @@ use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
1010
1111use std:: borrow:: Cow ;
1212use std:: fmt:: Write ;
13- use std:: path:: Path ;
13+ use std:: path:: { Component , Path , PathBuf } ;
1414
1515pub use self :: string:: {
1616 take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
@@ -63,30 +63,87 @@ pub fn id_from_content(content: &str) -> String {
6363 normalize_id ( trimmed)
6464}
6565
66+ /// https://stackoverflow.com/a/68233480
67+ /// Improve the path to try remove and solve .. token. Return the path id
68+ /// by replacing the directory separator with a hyphen.
69+ ///
70+ /// This assumes that `a/b/../c` is `a/c` which might be different from
71+ /// what the OS would have chosen when b is a link. This is OK
72+ /// for broot verb arguments but can't be generally used elsewhere
73+ ///
74+ /// This function ensures a given path ending with '/' will
75+ /// end with '-' after normalization.
76+ pub fn normalize_path_id < P : AsRef < Path > > ( path : P ) -> String {
77+ let ends_with_slash = path. as_ref ( ) . to_str ( ) . map_or ( false , |s| s. ends_with ( '/' ) ) ;
78+ let mut normalized = PathBuf :: new ( ) ;
79+ for component in path. as_ref ( ) . components ( ) {
80+ match & component {
81+ Component :: ParentDir => {
82+ if !normalized. pop ( ) {
83+ normalized. push ( component) ;
84+ }
85+ }
86+ _ => {
87+ normalized. push ( component) ;
88+ }
89+ }
90+ }
91+ if ends_with_slash {
92+ normalized. push ( "" ) ;
93+ }
94+ normalized
95+ . to_str ( )
96+ . unwrap ( )
97+ . replace ( "\\ " , "-" )
98+ . replace ( "/" , "-" )
99+ }
100+
66101/// Fix links to the correct location.
67102///
68103/// This adjusts links, such as turning `.md` extensions to `.html`.
69104///
70105/// `path` is the path to the page being rendered relative to the root of the
71106/// book. This is used for the `print.html` page so that links on the print
72- /// page go to the original location. Normal page rendering sets `path` to
73- /// None. Ideally, print page links would link to anchors on the print page,
74- /// but that is very difficult.
107+ /// page go to the anchors that has a path id prefix. Normal page rendering
108+ /// sets `path` to None.
75109fn adjust_links < ' a > ( event : Event < ' a > , path : Option < & Path > ) -> Event < ' a > {
76110 lazy_static ! {
77111 static ref SCHEME_LINK : Regex = Regex :: new( r"^[a-z][a-z0-9+.-]*:" ) . unwrap( ) ;
78112 static ref MD_LINK : Regex = Regex :: new( r"(?P<link>.*)\.md(?P<anchor>#.*)?" ) . unwrap( ) ;
113+ static ref HTML_MD_LINK : Regex =
114+ Regex :: new( r"(?P<link>.*)\.(html|md)(?P<anchor>#.*)?" ) . unwrap( ) ;
79115 }
80116
81117 fn fix < ' a > ( dest : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
118+ // Don't modify links with schemes like `https`.
119+ if !SCHEME_LINK . is_match ( & dest) {
120+ // This is a relative link, adjust it as necessary.
121+ let mut fixed_link = String :: new ( ) ;
122+ if let Some ( path) = path {
123+ let base = path
124+ . parent ( )
125+ . expect ( "path can't be empty" )
126+ . to_str ( )
127+ . expect ( "utf-8 paths only" ) ;
128+ if !base. is_empty ( ) {
129+ write ! ( fixed_link, "{}/" , base) . unwrap ( ) ;
130+ }
131+ }
132+ fixed_link. push_str ( & dest) ;
133+ return CowStr :: from ( fixed_link) ;
134+ }
135+ dest
136+ }
137+
138+ fn fix_a_links < ' a > ( dest : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
82139 if dest. starts_with ( '#' ) {
83140 // Fragment-only link.
84141 if let Some ( path) = path {
85142 let mut base = path. display ( ) . to_string ( ) ;
86143 if base. ends_with ( ".md" ) {
87- base. replace_range ( base. len ( ) - 3 .., ".html " ) ;
144+ base. replace_range ( base. len ( ) - 3 .., "" ) ;
88145 }
89- return format ! ( "{}{}" , base, dest) . into ( ) ;
146+ return format ! ( "# {}{}" , normalize_path_id ( base) , dest. replace ( "#" , "-" ) ) . into ( ) ;
90147 } else {
91148 return dest;
92149 }
@@ -106,7 +163,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
106163 }
107164 }
108165
109- if let Some ( caps) = MD_LINK . captures ( & dest) {
166+ if let Some ( caps) = HTML_MD_LINK . captures ( & dest) {
110167 fixed_link. push_str ( & caps[ "link" ] ) ;
111168 fixed_link. push_str ( ".html" ) ;
112169 if let Some ( anchor) = caps. name ( "anchor" ) {
@@ -115,6 +172,21 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
115172 } else {
116173 fixed_link. push_str ( & dest) ;
117174 } ;
175+
176+ let path_id = normalize_path_id ( & fixed_link)
177+ . replace ( ".html" , "" )
178+ . replace ( "#" , "-" ) ;
179+ // Judge if the html link is inside the book.
180+ if !path_id. contains ( ".." ) {
181+ if let Some ( _) = path {
182+ // In `print.html`, print page links would all link to anchors on the print page.
183+ let mut fixed_anchor_for_print = String :: new ( ) ;
184+ fixed_anchor_for_print. push_str ( "#" ) ;
185+ fixed_anchor_for_print. push_str ( & path_id) ;
186+ return CowStr :: from ( fixed_anchor_for_print) ;
187+ }
188+ }
189+ // In normal page rendering, links to anchors on another page.
118190 return CowStr :: from ( fixed_link) ;
119191 }
120192 dest
@@ -130,22 +202,29 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
130202 // feel free to add more tags if desired; these are the only ones I
131203 // care about right now.
132204 lazy_static ! {
133- static ref HTML_LINK : Regex =
134- Regex :: new( r#"(<(?:a| img) [^>]*?(?: src|href) =")([^"]+?)""# ) . unwrap( ) ;
205+ static ref A_LINK : Regex = Regex :: new ( r#"(<a [^>]*?href=")([^"]+?)""# ) . unwrap ( ) ;
206+ static ref HTML_LINK : Regex = Regex :: new( r#"(<img [^>]*?src=")([^"]+?)""# ) . unwrap( ) ;
135207 }
136208
137- HTML_LINK
209+ let temp_html = HTML_LINK
138210 . replace_all ( & html, |caps : & regex:: Captures < ' _ > | {
139211 let fixed = fix ( caps[ 2 ] . into ( ) , path) ;
140212 format ! ( "{}{}\" " , & caps[ 1 ] , fixed)
141213 } )
214+ . into_owned ( ) ;
215+
216+ A_LINK
217+ . replace_all ( & temp_html, |caps : & regex:: Captures < ' _ > | {
218+ let fixed = fix_a_links ( caps[ 2 ] . into ( ) , path) ;
219+ format ! ( "{}{}\" " , & caps[ 1 ] , fixed)
220+ } )
142221 . into_owned ( )
143222 . into ( )
144223 }
145224
146225 match event {
147226 Event :: Start ( Tag :: Link ( link_type, dest, title) ) => {
148- Event :: Start ( Tag :: Link ( link_type, fix ( dest, path) , title) )
227+ Event :: Start ( Tag :: Link ( link_type, fix_a_links ( dest, path) , title) )
149228 }
150229 Event :: Start ( Tag :: Image ( link_type, dest, title) ) => {
151230 Event :: Start ( Tag :: Image ( link_type, fix ( dest, path) , title) )
0 commit comments