From 1cad688bda57dcc8f9c09dff30fcbce818a3a20d Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 10 Sep 2020 08:47:44 -0400 Subject: [PATCH 1/5] Additional fixes for rejecting invalid JSON. This augments #8106 to handle: * Cases where we use treq to decode JSON. * A bug in the exception that gets raised during invalid JSON handling. --- synapse/http/matrixfederationclient.py | 4 +++- synapse/util/__init__.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 775fad3be461..359ebc0b85de 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -54,6 +54,7 @@ start_active_span, tags, ) +from synapse.util import _reject_invalid_json from synapse.util.async_helpers import timeout_deferred from synapse.util.metrics import Measure @@ -164,7 +165,8 @@ async def _handle_json_response( try: check_content_type_is_json(response.headers) - d = treq.json_content(response) + # Reject Python extensions to JSON. + d = treq.json_content(response, parse_constant=_reject_invalid_json) d = timeout_deferred(d, timeout=timeout_sec, reactor=reactor) body = await make_deferred_yieldable(d) diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index 3ad4b28fc73b..a13f11f8d861 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -28,7 +28,7 @@ def _reject_invalid_json(val): """Do not allow Infinity, -Infinity, or NaN values in JSON.""" - raise json.JSONDecodeError("Invalid JSON value: '%s'" % val) + raise ValueError("Invalid JSON value: '%s'" % val) # Create a custom encoder to reduce the whitespace produced by JSON encoding and From 66ff8c13b2280831267b8d7735d6d1a74a09b8da Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 10 Sep 2020 10:57:52 -0400 Subject: [PATCH 2/5] Add some tests. --- tests/federation/test_federation_server.py | 33 +++++++++ tests/http/test_fedclient.py | 48 +++++++++++++ tests/http/test_servlet.py | 80 ++++++++++++++++++++++ 3 files changed, 161 insertions(+) create mode 100644 tests/http/test_servlet.py diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py index 296dc887bef6..da933ecd7595 100644 --- a/tests/federation/test_federation_server.py +++ b/tests/federation/test_federation_server.py @@ -15,6 +15,8 @@ # limitations under the License. import logging +from parameterized import parameterized + from synapse.events import make_event_from_dict from synapse.federation.federation_server import server_matches_acl_event from synapse.rest import admin @@ -23,6 +25,37 @@ from tests import unittest +class FederationServerTests(unittest.FederatingHomeserverTestCase): + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + @parameterized.expand([(b"",), (b"foo",), (b'{"limit": Infinity}',)]) + def test_bad_request(self, query_content): + """ + Querying with bad data returns a reasonable error code. + """ + u1 = self.register_user("u1", "pass") + u1_token = self.login("u1", "pass") + + room_1 = self.helper.create_room_as(u1, tok=u1_token) + self.inject_room_member(room_1, "@user:other.example.com", "join") + + "/get_missing_events/(?P[^/]*)/?" + + request, channel = self.make_request( + "POST", + "/_matrix/federation/v1/get_missing_events/%s" % (room_1,), + query_content, + ) + self.render(request) + self.assertEquals(400, channel.code, channel.result) + self.assertEqual(channel.json_body["errcode"], "M_NOT_JSON") + + class ServerACLsTestCase(unittest.TestCase): def test_blacklisted_server(self): e = _create_acl_event({"allow": ["*"], "deny": ["evil.com"]}) diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py index 7f1dfb2128d7..5604af379522 100644 --- a/tests/http/test_fedclient.py +++ b/tests/http/test_fedclient.py @@ -16,6 +16,7 @@ from mock import Mock from netaddr import IPSet +from parameterized import parameterized from twisted.internet import defer from twisted.internet.defer import TimeoutError @@ -511,3 +512,50 @@ def test_closes_connection(self): self.reactor.advance(120) self.assertTrue(conn.disconnecting) + + @parameterized.expand([(b"",), (b"foo",), (b'{"a": Infinity}',)]) + def test_json_error(self, return_value): + """ + Test what happens if invalid JSON is returned from the remote endpoint. + """ + + test_d = defer.ensureDeferred(self.cl.get_json("testserv:8008", "foo/bar")) + + self.pump() + + # Nothing happened yet + self.assertNoResult(test_d) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 8008) + + # complete the connection and wire it up to a fake transport + protocol = factory.buildProtocol(None) + transport = StringTransport() + protocol.makeConnection(transport) + + # that should have made it send the request to the transport + self.assertRegex(transport.value(), b"^GET /foo/bar") + self.assertRegex(transport.value(), b"Host: testserv:8008") + + # Deferred is still without a result + self.assertNoResult(test_d) + + # Send it the HTTP response + protocol.dataReceived( + b"HTTP/1.1 200 OK\r\n" + b"Server: Fake\r\n" + b"Content-Type: application/json\r\n" + b"Content-Length: %i\r\n" + b"\r\n" + b"%s" % (len(return_value), return_value) + ) + + self.pump() + + f = self.failureResultOf(test_d) + self.assertIsInstance(f.value, ValueError) diff --git a/tests/http/test_servlet.py b/tests/http/test_servlet.py new file mode 100644 index 000000000000..45089158ceb6 --- /dev/null +++ b/tests/http/test_servlet.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +from io import BytesIO + +from mock import Mock + +from synapse.api.errors import SynapseError +from synapse.http.servlet import ( + parse_json_object_from_request, + parse_json_value_from_request, +) + +from tests import unittest + + +def make_request(content): + """Make an object that acts enough like a request.""" + request = Mock(spec=["content"]) + + if isinstance(content, dict): + content = json.dumps(content).encode("utf8") + + request.content = BytesIO(content) + return request + + +class TestServletUtils(unittest.TestCase): + def test_parse_json_value(self): + """Basic tests for parse_json_value_from_request.""" + # Test round-tripping. + obj = {"foo": 1} + result = parse_json_value_from_request(make_request(obj)) + self.assertEqual(result, obj) + + # Results don't have to be objects. + result = parse_json_value_from_request(make_request(b'["foo"]')) + self.assertEqual(result, ["foo"]) + + # Test empty. + with self.assertRaises(SynapseError): + parse_json_value_from_request(make_request(b"")) + + result = parse_json_value_from_request(make_request(b""), allow_empty_body=True) + self.assertIsNone(result) + + # Invalid UTF-8. + with self.assertRaises(SynapseError): + parse_json_value_from_request(make_request(b"\xFF\x00")) + + # Invalid JSON. + with self.assertRaises(SynapseError): + parse_json_value_from_request(make_request(b"foo")) + + with self.assertRaises(SynapseError): + parse_json_value_from_request(make_request(b'{"foo": Infinity}')) + + def test_parse_json_object(self): + """Basic tests for parse_json_object_from_request.""" + # Test empty. + result = parse_json_object_from_request( + make_request(b""), allow_empty_body=True + ) + self.assertEqual(result, {}) + + # Test not an object + with self.assertRaises(SynapseError): + parse_json_object_from_request(make_request(b'["foo"]')) From 5e116d69457e53fab7f5d0b0ba2328435ca67851 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 10 Sep 2020 12:26:00 -0400 Subject: [PATCH 3/5] Add a changelog. --- changelog.d/8291.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/8291.bugfix diff --git a/changelog.d/8291.bugfix b/changelog.d/8291.bugfix new file mode 100644 index 000000000000..bc01d26f53e5 --- /dev/null +++ b/changelog.d/8291.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.20.0rc1 that the wrong exception was raised when invalid JSON data is encountered. From 6304ddde8e57c4b4116563f3eaca5df09e86d04e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 10 Sep 2020 13:30:39 -0400 Subject: [PATCH 4/5] Bump treq. --- synapse/python_dependencies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 2d995ec456a5..36d9cb62c42d 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -53,7 +53,8 @@ # Twisted 18.9 introduces some logger improvements that the structured # logger utilises "Twisted>=18.9.0", - "treq>=15.1", + # treq 18.6.0 allows customization of the json decoder. + "treq>=18.6.0", # Twisted has required pyopenssl 16.0 since about Twisted 16.6. "pyopenssl>=16.0.0", "pyyaml>=3.11", From 4d654058dec5a77e2d1c50251ac8cd86c2e8e98f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 10 Sep 2020 13:38:59 -0400 Subject: [PATCH 5/5] Inline json_content for old treq. --- synapse/http/matrixfederationclient.py | 7 ++++--- synapse/python_dependencies.py | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 359ebc0b85de..5eaf3151ce24 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -54,7 +54,7 @@ start_active_span, tags, ) -from synapse.util import _reject_invalid_json +from synapse.util import json_decoder from synapse.util.async_helpers import timeout_deferred from synapse.util.metrics import Measure @@ -165,8 +165,9 @@ async def _handle_json_response( try: check_content_type_is_json(response.headers) - # Reject Python extensions to JSON. - d = treq.json_content(response, parse_constant=_reject_invalid_json) + # Use the custom JSON decoder (partially re-implements treq.json_content). + d = treq.text_content(response, encoding="utf-8") + d.addCallback(json_decoder.decode) d = timeout_deferred(d, timeout=timeout_sec, reactor=reactor) body = await make_deferred_yieldable(d) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 36d9cb62c42d..2d995ec456a5 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -53,8 +53,7 @@ # Twisted 18.9 introduces some logger improvements that the structured # logger utilises "Twisted>=18.9.0", - # treq 18.6.0 allows customization of the json decoder. - "treq>=18.6.0", + "treq>=15.1", # Twisted has required pyopenssl 16.0 since about Twisted 16.6. "pyopenssl>=16.0.0", "pyyaml>=3.11",