Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ Features added
text
* #9176: i18n: Emit a debug message if message catalog file not found under
:confval:`locale_dirs`
* #9016: linkcheck: Support checking anchors on github.com
* #9016: linkcheck: Add a new event :event:`linkcheck-process-uri` to modify
URIs before checking hyperlinks
* #1874: py domain: Support union types using ``|`` in info-field-list
* #9097: Optimize the paralell build
* #9131: Add :confval:`nitpick_ignore_regex` to ignore nitpicky warnings using
Expand Down
8 changes: 8 additions & 0 deletions doc/extdev/appapi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,14 @@ Here is a more detailed list of these events.
.. versionchanged:: 1.3
The return value can now specify a template name.

.. event:: linkcheck-process-uri (app, uri)

Emitted when the linkcheck builder collects hyperlinks from document. *uri*
is a collected URI. The event handlers can modify the URI by returning a
string.

.. versionadded:: 4.1

.. event:: build-finished (app, exception)

Emitted when a build has finished, before Sphinx exits, usually used for
Expand Down
28 changes: 27 additions & 1 deletion sphinx/builders/linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from threading import Thread
from typing import (Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple,
Union, cast)
from urllib.parse import unquote, urlparse
from urllib.parse import unquote, urlparse, urlunparse

from docutils import nodes
from docutils.nodes import Element
Expand Down Expand Up @@ -627,6 +627,10 @@ def run(self, **kwargs: Any) -> None:
if 'refuri' not in refnode:
continue
uri = refnode['refuri']
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri

lineno = get_node_line(refnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
Expand All @@ -636,12 +640,31 @@ def run(self, **kwargs: Any) -> None:
for imgnode in self.document.traverse(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri

lineno = get_node_line(imgnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info


def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]:
"""Rewrite anchor name of the hyperlink to github.com

The hyperlink anchors in github.com are dynamically generated. This rewrites
them before checking and makes them comparable.
"""
parsed = urlparse(uri)
if parsed.hostname == "github.com" and parsed.fragment:
prefixed = parsed.fragment.startswith('user-content-')
if not prefixed:
fragment = f'user-content-{parsed.fragment}'
return urlunparse(parsed._replace(fragment=fragment))
return None


def setup(app: Sphinx) -> Dict[str, Any]:
app.add_builder(CheckExternalLinksBuilder)
app.add_post_transform(HyperlinkCollector)
Expand All @@ -658,6 +681,9 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None)

app.add_event('linkcheck-process-uri')
app.connect('linkcheck-process-uri', rewrite_github_anchor)

return {
'version': 'builtin',
'parallel_read_safe': True,
Expand Down
2 changes: 2 additions & 0 deletions tests/roots/test-linkcheck/links.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Some additional anchors to exercise ignore code
* `Complete nonsense <https://localhost:7777/doesnotexist>`_
* `Example valid local file <conf.py>`_
* `Example invalid local file <path/to/notfound>`_
* https://github.com/sphinx-doc/sphinx#documentation
* https://github.com/sphinx-doc/sphinx#user-content-testing

.. image:: https://www.google.com/image.png
.. figure:: https://www.google.com/image2.png
10 changes: 7 additions & 3 deletions tests/test_build_linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def test_defaults_json(app):
"info"]:
assert attr in row

assert len(content.splitlines()) == 10
assert len(rows) == 10
assert len(content.splitlines()) == 12
assert len(rows) == 12
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows}
Expand All @@ -87,7 +87,7 @@ def test_defaults_json(app):
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 18,
'lineno': 20,
'status': 'broken',
'code': 0,
'uri': 'https://www.google.com/image2.png',
Expand All @@ -101,6 +101,10 @@ def test_defaults_json(app):
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"]
# The anchor of the URI for github.com is automatically modified
assert 'https://github.com/sphinx-doc/sphinx#documentation' not in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-documentation' in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-testing' in rowsby


@pytest.mark.sphinx(
Expand Down