Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 580a15e

Browse files
authored
Request JSON for oEmbed requests (and ignore XML only providers). (#10759)
This adds the format to the request arguments / URL to ensure that JSON data is returned (which is all that Synapse supports). This also adds additional error checking / filtering to the configuration file to ignore XML-only providers.
1 parent aacdce8 commit 580a15e

File tree

4 files changed

+98
-8
lines changed

4 files changed

+98
-8
lines changed

changelog.d/10759.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow configuration of the oEmbed URLs used for URL previews.

synapse/config/oembed.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
import json
1515
import re
16-
from typing import Any, Dict, Iterable, List, Pattern
16+
from typing import Any, Dict, Iterable, List, Optional, Pattern
1717
from urllib import parse as urlparse
1818

1919
import attr
@@ -31,6 +31,8 @@ class OEmbedEndpointConfig:
3131
api_endpoint: str
3232
# The patterns to match.
3333
url_patterns: List[Pattern]
34+
# The supported formats.
35+
formats: Optional[List[str]]
3436

3537

3638
class OembedConfig(Config):
@@ -93,11 +95,22 @@ def _parse_and_validate_provider(
9395
# might have multiple patterns to match.
9496
for endpoint in provider["endpoints"]:
9597
api_endpoint = endpoint["url"]
98+
99+
# The API endpoint must be an HTTP(S) URL.
100+
results = urlparse.urlparse(api_endpoint)
101+
if results.scheme not in {"http", "https"}:
102+
raise ConfigError(
103+
f"Unsupported oEmbed scheme ({results.scheme}) for endpoint {api_endpoint}",
104+
config_path,
105+
)
106+
96107
patterns = [
97108
self._glob_to_pattern(glob, config_path)
98109
for glob in endpoint["schemes"]
99110
]
100-
yield OEmbedEndpointConfig(api_endpoint, patterns)
111+
yield OEmbedEndpointConfig(
112+
api_endpoint, patterns, endpoint.get("formats")
113+
)
101114

102115
def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
103116
"""
@@ -114,9 +127,12 @@ def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
114127
"""
115128
results = urlparse.urlparse(glob)
116129

117-
# Ensure the scheme does not have wildcards (and is a sane scheme).
130+
# The scheme must be HTTP(S) (and cannot contain wildcards).
118131
if results.scheme not in {"http", "https"}:
119-
raise ConfigError(f"Insecure oEmbed scheme: {results.scheme}", config_path)
132+
raise ConfigError(
133+
f"Unsupported oEmbed scheme ({results.scheme}) for pattern: {glob}",
134+
config_path,
135+
)
120136

121137
pattern = urlparse.urlunparse(
122138
[

synapse/rest/media/v1/oembed.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,24 @@ class OEmbedProvider:
4949
def __init__(self, hs: "HomeServer", client: SimpleHttpClient):
5050
self._oembed_patterns = {}
5151
for oembed_endpoint in hs.config.oembed.oembed_patterns:
52+
api_endpoint = oembed_endpoint.api_endpoint
53+
54+
# Only JSON is supported at the moment. This could be declared in
55+
# the formats field. Otherwise, if the endpoint ends in .xml assume
56+
# it doesn't support JSON.
57+
if (
58+
oembed_endpoint.formats is not None
59+
and "json" not in oembed_endpoint.formats
60+
) or api_endpoint.endswith(".xml"):
61+
logger.info(
62+
"Ignoring oEmbed endpoint due to not supporting JSON: %s",
63+
api_endpoint,
64+
)
65+
continue
66+
67+
# Iterate through each URL pattern and point it to the endpoint.
5268
for pattern in oembed_endpoint.url_patterns:
53-
self._oembed_patterns[pattern] = oembed_endpoint.api_endpoint
69+
self._oembed_patterns[pattern] = api_endpoint
5470
self._client = client
5571

5672
def get_oembed_url(self, url: str) -> Optional[str]:
@@ -86,11 +102,15 @@ async def get_oembed_content(self, endpoint: str, url: str) -> OEmbedResult:
86102
"""
87103
try:
88104
logger.debug("Trying to get oEmbed content for url '%s'", url)
105+
106+
# Note that only the JSON format is supported, some endpoints want
107+
# this in the URL, others want it as an argument.
108+
endpoint = endpoint.replace("{format}", "json")
109+
89110
result = await self._client.get_json(
90111
endpoint,
91112
# TODO Specify max height / width.
92-
# Note that only the JSON format is supported.
93-
args={"url": url},
113+
args={"url": url, "format": "json"},
94114
)
95115

96116
# Ensure there's a version of 1.0.

tests/rest/media/v1/test_url_preview.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,15 @@ def make_homeserver(self, reactor, clock):
9292
url_patterns=[
9393
re.compile(r"http://twitter\.com/.+/status/.+"),
9494
],
95-
)
95+
formats=None,
96+
),
97+
OEmbedEndpointConfig(
98+
api_endpoint="http://www.hulu.com/api/oembed.{format}",
99+
url_patterns=[
100+
re.compile(r"http://www\.hulu\.com/watch/.+"),
101+
],
102+
formats=["json"],
103+
),
96104
]
97105

98106
return hs
@@ -656,3 +664,48 @@ def test_oembed_rich(self):
656664
channel.json_body,
657665
{"og:title": None, "og:description": "Content Preview"},
658666
)
667+
668+
def test_oembed_format(self):
669+
"""Test an oEmbed endpoint which requires the format in the URL."""
670+
self.lookups["www.hulu.com"] = [(IPv4Address, "10.1.2.3")]
671+
672+
result = {
673+
"version": "1.0",
674+
"type": "rich",
675+
"html": "<div>Content Preview</div>",
676+
}
677+
end_content = json.dumps(result).encode("utf-8")
678+
679+
channel = self.make_request(
680+
"GET",
681+
"preview_url?url=http://www.hulu.com/watch/12345",
682+
shorthand=False,
683+
await_result=False,
684+
)
685+
self.pump()
686+
687+
client = self.reactor.tcpClients[0][2].buildProtocol(None)
688+
server = AccumulatingProtocol()
689+
server.makeConnection(FakeTransport(client, self.reactor))
690+
client.makeConnection(FakeTransport(server, self.reactor))
691+
client.dataReceived(
692+
(
693+
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
694+
b'Content-Type: application/json; charset="utf8"\r\n\r\n'
695+
)
696+
% (len(end_content),)
697+
+ end_content
698+
)
699+
700+
self.pump()
701+
702+
# The {format} should have been turned into json.
703+
self.assertIn(b"/api/oembed.json", server.data)
704+
# A URL parameter of format=json should be provided.
705+
self.assertIn(b"format=json", server.data)
706+
707+
self.assertEqual(channel.code, 200)
708+
self.assertEqual(
709+
channel.json_body,
710+
{"og:title": None, "og:description": "Content Preview"},
711+
)

0 commit comments

Comments
 (0)