diff --git a/.gitignore b/.gitignore index 50003f37..ee50e4ab 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ dist # documentation docs/_build + +.DS_Store +pytestdebug.log diff --git a/tests/client/conftest.py b/tests/client/conftest.py index cf4ceed7..1b62cafd 100644 --- a/tests/client/conftest.py +++ b/tests/client/conftest.py @@ -1,7 +1,4 @@ import os -import zlib -import base64 -import pickle import vcr import pytest @@ -12,18 +9,16 @@ from scrapinghub.hubstorage.serialization import MSGPACK_AVAILABLE from ..conftest import request_accept_header_matcher - - -TEST_PROJECT_ID = "2222222" -TEST_SPIDER_NAME = 'hs-test-spider' -TEST_FRONTIER_SLOT = 'site.com' -TEST_BOTGROUP = 'python-hubstorage-test' -TEST_COLLECTION_NAME = "test_collection_123" -TEST_ADMIN_AUTH = os.getenv('AUTH', 'f' * 32) -TEST_USER_AUTH = os.getenv('USER_AUTH', 'e' * 32) -TEST_DASH_ENDPOINT = os.getenv('DASH_ENDPOINT', 'http://33.33.33.51:8080/api/') -TEST_HS_ENDPOINT = os.getenv('HS_ENDPOINT', - 'http://storage.vm.scrapinghub.com') +from ..conftest import VCRGzipSerializer +from ..conftest import ( + TEST_SPIDER_NAME, + TEST_FRONTIER_SLOT, + TEST_COLLECTION_NAME, + TEST_ENDPOINT, + TEST_PROJECT_ID, + TEST_ADMIN_AUTH, + TEST_DASH_ENDPOINT, +) # use some fixed timestamp to represent current time TEST_TS = 1476803148638 @@ -31,23 +26,6 @@ # vcrpy creates the cassetes automatically under VCR_CASSETES_DIR VCR_CASSETES_DIR = 'tests/client/cassetes' - -class VCRGzipSerializer(object): - """Custom ZIP serializer for VCR.py.""" - - def serialize(self, cassette_dict): - # receives a dict, must return a string - # there can be binary data inside some of the requests, - # so it's impossible to use json for serialization to string - compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2)) - return base64.b64encode(compressed).decode('utf8') - - def deserialize(self, cassette_string): - # receives a string, must return a dict - decoded = base64.b64decode(cassette_string.encode('utf8')) - return pickle.loads(zlib.decompress(decoded)) - - my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once') my_vcr.register_serializer('gz', VCRGzipSerializer()) my_vcr.register_matcher('accept_header', request_accept_header_matcher) @@ -79,7 +57,7 @@ def is_using_real_services(request): @pytest.fixture(scope='session') def client(): return ScrapinghubClient(auth=TEST_ADMIN_AUTH, - endpoint=TEST_HS_ENDPOINT, + endpoint=TEST_ENDPOINT, dash_endpoint=TEST_DASH_ENDPOINT) diff --git a/tests/client/test_activity.py b/tests/client/test_activity.py index e74785ec..5593ec54 100644 --- a/tests/client/test_activity.py +++ b/tests/client/test_activity.py @@ -2,7 +2,7 @@ import pytest -from .conftest import TEST_PROJECT_ID +from ..conftest import TEST_PROJECT_ID def _add_test_activity(project): diff --git a/tests/client/test_client.py b/tests/client/test_client.py index 52a8a0d2..8bc64aeb 100644 --- a/tests/client/test_client.py +++ b/tests/client/test_client.py @@ -6,7 +6,7 @@ from scrapinghub.client.jobs import Job from scrapinghub.client.projects import Projects, Project -from .conftest import TEST_PROJECT_ID +from ..conftest import TEST_PROJECT_ID # ScrapinghubClient class tests diff --git a/tests/client/test_collections.py b/tests/client/test_collections.py index 8dae301b..775d8fb8 100644 --- a/tests/client/test_collections.py +++ b/tests/client/test_collections.py @@ -7,7 +7,7 @@ from scrapinghub.client.exceptions import NotFound from scrapinghub.client.exceptions import ValueTooLarge -from .conftest import TEST_COLLECTION_NAME +from ..conftest import TEST_COLLECTION_NAME def _mkitem(): diff --git a/tests/client/test_frontiers.py b/tests/client/test_frontiers.py index 5ea37d40..1dbcb827 100644 --- a/tests/client/test_frontiers.py +++ b/tests/client/test_frontiers.py @@ -5,7 +5,7 @@ from six import string_types from scrapinghub.client.frontiers import Frontiers, Frontier, FrontierSlot -from .conftest import TEST_FRONTIER_SLOT +from ..conftest import TEST_FRONTIER_SLOT def _add_test_requests_to_frontier(frontier): diff --git a/tests/client/test_job.py b/tests/client/test_job.py index 5db365a8..af376fc7 100644 --- a/tests/client/test_job.py +++ b/tests/client/test_job.py @@ -9,8 +9,8 @@ from scrapinghub.client.requests import Requests from scrapinghub.client.samples import Samples -from .conftest import TEST_PROJECT_ID -from .conftest import TEST_SPIDER_NAME +from ..conftest import TEST_PROJECT_ID +from ..conftest import TEST_SPIDER_NAME def test_job_base(client, spider): diff --git a/tests/client/test_projects.py b/tests/client/test_projects.py index 367c1ebf..1d2299bd 100644 --- a/tests/client/test_projects.py +++ b/tests/client/test_projects.py @@ -17,8 +17,8 @@ from scrapinghub.hubstorage.utils import apipoll -from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME -from .conftest import TEST_USER_AUTH, TEST_DASH_ENDPOINT +from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME +from ..conftest import TEST_USER_AUTH, TEST_DASH_ENDPOINT from .utils import validate_default_meta diff --git a/tests/client/test_spiders.py b/tests/client/test_spiders.py index db22db8a..48ca2832 100644 --- a/tests/client/test_spiders.py +++ b/tests/client/test_spiders.py @@ -12,7 +12,7 @@ from scrapinghub.client.spiders import Spider from scrapinghub.client.utils import JobKey -from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME +from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME from .utils import validate_default_meta diff --git a/tests/client/utils.py b/tests/client/utils.py index acb3504f..8e60cb39 100644 --- a/tests/client/utils.py +++ b/tests/client/utils.py @@ -1,5 +1,5 @@ -from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME -from .conftest import TEST_DASH_ENDPOINT +from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME +from ..conftest import TEST_DASH_ENDPOINT def validate_default_meta(meta, state='pending', units=1, diff --git a/tests/conftest.py b/tests/conftest.py index f32fa62c..ea2efd47 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,92 @@ # -*- coding: utf-8 -*- +import base64 +import os +import pickle import pytest import re +import zlib from scrapinghub.hubstorage.serialization import MSGPACK_AVAILABLE +from scrapinghub import HubstorageClient +from scrapinghub.legacy import Connection + + +DEFAULT_PROJECT_ID = "2222222" +DEFAULT_ENDPOINT = 'http://storage.vm.scrapinghub.com' +DEFAULT_DASH_ENDPOINT = 'http://33.33.33.51:8080/api/' +DEFAULT_ADMIN_AUTH = 'f' * 32 +DEFAULT_USER_AUTH = 'e' * 32 + + +TEST_PROJECT_ID = os.getenv('HS_PROJECT_ID', DEFAULT_PROJECT_ID) +TEST_SPIDER_NAME = 'hs-test-spider' +TEST_FRONTIER_SLOT = 'site.com' +TEST_BOTGROUP = 'python-hubstorage-test' +TEST_COLLECTION_NAME = "test_collection_123" +TEST_AUTH = os.getenv('HS_AUTH', DEFAULT_ADMIN_AUTH) +TEST_ENDPOINT = os.getenv('HS_ENDPOINT', DEFAULT_ENDPOINT) +TEST_COLLECTION_NAME = "test_collection_123" +TEST_ADMIN_AUTH = os.getenv('AUTH', DEFAULT_ADMIN_AUTH) +TEST_USER_AUTH = os.getenv('USER_AUTH', DEFAULT_USER_AUTH) +TEST_DASH_ENDPOINT = os.getenv('DASH_ENDPOINT', DEFAULT_DASH_ENDPOINT) + + +class VCRGzipSerializer(object): + """Custom ZIP serializer for VCR.py.""" + + def serialize(self, cassette_dict): + # receives a dict, must return a string + # there can be binary data inside some of the requests, + # so it's impossible to use json for serialization to string + cassette_dict = normalize_cassette(cassette_dict) + compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2)) + return base64.b64encode(compressed).decode('utf8') + + def deserialize(self, cassette_string): + # receives a string, must return a dict + decoded = base64.b64decode(cassette_string.encode('utf8')) + return pickle.loads(zlib.decompress(decoded)) + + +def normalize_endpoint(uri, endpoint, default_endpoint): + return uri.replace(endpoint.rstrip('/'), default_endpoint.rstrip('/')) + + +def normalize_cassette(cassette_dict): + """ + This function normalizes the cassette dict trying to make sure + we are always making API requests with the same variables: + - project id + - endpoint + - authentication header + """ + interactions = [] + for interaction in cassette_dict['interactions']: + uri = interaction['request']['uri'] + uri = uri.replace(TEST_PROJECT_ID, DEFAULT_PROJECT_ID) + + hs_endpoint = TEST_ENDPOINT or HubstorageClient.DEFAULT_ENDPOINT + uri = normalize_endpoint(uri, hs_endpoint, DEFAULT_ENDPOINT) + + dash_endpoint = TEST_DASH_ENDPOINT or Connection.DEFAULT_ENDPOINT + uri = normalize_endpoint(uri, dash_endpoint, DEFAULT_DASH_ENDPOINT) + + interaction['request']['uri'] = uri + + if 'Authorization' in interaction['request']['headers']: + del interaction['request']['headers']['Authorization'] + interaction['request']['headers']['Authorization'] = ( + 'Basic {}'.format( + base64.b64encode( + '{}:'.format(DEFAULT_ADMIN_AUTH).encode('utf-8') + ).decode('utf-8') + ) + ) + + interactions.append(interaction) + + cassette_dict['interactions'] = interactions + return cassette_dict def pytest_addoption(parser): diff --git a/tests/hubstorage/conftest.py b/tests/hubstorage/conftest.py index 14f81d2d..ba2354fb 100644 --- a/tests/hubstorage/conftest.py +++ b/tests/hubstorage/conftest.py @@ -1,7 +1,4 @@ import os -import zlib -import base64 -import pickle import vcr import pytest @@ -14,36 +11,19 @@ from scrapinghub.hubstorage.serialization import MSGPACK_AVAILABLE from ..conftest import request_accept_header_matcher - - -TEST_PROJECT_ID = "2222222" -TEST_SPIDER_NAME = 'hs-test-spider' -TEST_FRONTIER_SLOT = 'site.com' -TEST_BOTGROUP = 'python-hubstorage-test' -TEST_COLLECTION_NAME = "test_collection_123" -TEST_AUTH = os.getenv('HS_AUTH', 'f' * 32) -TEST_ENDPOINT = os.getenv('HS_ENDPOINT', 'http://storage.vm.scrapinghub.com') +from ..conftest import VCRGzipSerializer +from ..conftest import ( + TEST_PROJECT_ID, + TEST_ENDPOINT, + TEST_AUTH, + TEST_BOTGROUP, + TEST_COLLECTION_NAME, + TEST_SPIDER_NAME, +) # vcrpy creates the cassetes automatically under VCR_CASSETES_DIR VCR_CASSETES_DIR = 'tests/hubstorage/cassetes' - -class VCRGzipSerializer(object): - """Custom ZIP serializer for VCR.py.""" - - def serialize(self, cassette_dict): - # receives a dict, must return a string - # there can be binary data inside some of the requests, - # so it's impossible to use json for serialization to string - compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2)) - return base64.b64encode(compressed).decode('utf8') - - def deserialize(self, cassette_string): - # receives a string, must return a dict - decoded = base64.b64decode(cassette_string.encode('utf8')) - return pickle.loads(zlib.decompress(decoded)) - - my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once') my_vcr.register_serializer('gz', VCRGzipSerializer()) my_vcr.register_matcher('accept_header', request_accept_header_matcher) diff --git a/tests/hubstorage/test_batchuploader.py b/tests/hubstorage/test_batchuploader.py index 19dcbb10..435bee24 100644 --- a/tests/hubstorage/test_batchuploader.py +++ b/tests/hubstorage/test_batchuploader.py @@ -7,7 +7,7 @@ from collections import defaultdict from scrapinghub.hubstorage import ValueTooLarge -from .conftest import TEST_SPIDER_NAME, TEST_AUTH +from ..conftest import TEST_SPIDER_NAME, TEST_AUTH from .conftest import start_job diff --git a/tests/hubstorage/test_client.py b/tests/hubstorage/test_client.py index 532d9472..d6533c31 100644 --- a/tests/hubstorage/test_client.py +++ b/tests/hubstorage/test_client.py @@ -4,8 +4,8 @@ from scrapinghub import HubstorageClient from scrapinghub.hubstorage.utils import apipoll -from .conftest import TEST_AUTH, TEST_ENDPOINT -from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME +from ..conftest import TEST_AUTH, TEST_ENDPOINT +from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME from .conftest import start_job diff --git a/tests/hubstorage/test_collections.py b/tests/hubstorage/test_collections.py index c15648e5..0ecff900 100644 --- a/tests/hubstorage/test_collections.py +++ b/tests/hubstorage/test_collections.py @@ -8,7 +8,7 @@ from scrapinghub import HubstorageClient from six.moves import range -from .conftest import TEST_COLLECTION_NAME +from ..conftest import TEST_COLLECTION_NAME from .testutil import failing_downloader diff --git a/tests/hubstorage/test_frontier.py b/tests/hubstorage/test_frontier.py index be17c7c4..fedc154f 100644 --- a/tests/hubstorage/test_frontier.py +++ b/tests/hubstorage/test_frontier.py @@ -3,7 +3,7 @@ """ import pytest -from .conftest import TEST_FRONTIER_SLOT +from ..conftest import TEST_FRONTIER_SLOT @pytest.fixture(autouse=True) diff --git a/tests/hubstorage/test_jobq.py b/tests/hubstorage/test_jobq.py index 7781887d..8bc41278 100644 --- a/tests/hubstorage/test_jobq.py +++ b/tests/hubstorage/test_jobq.py @@ -9,7 +9,7 @@ from scrapinghub.hubstorage.jobq import DuplicateJobError from scrapinghub.hubstorage.utils import apipoll -from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME +from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME from .conftest import hsspiderid diff --git a/tests/hubstorage/test_jobsmeta.py b/tests/hubstorage/test_jobsmeta.py index d17c0825..6f5ff7db 100644 --- a/tests/hubstorage/test_jobsmeta.py +++ b/tests/hubstorage/test_jobsmeta.py @@ -3,7 +3,7 @@ System tests for operations on stored job metadata """ -from .conftest import TEST_SPIDER_NAME +from ..conftest import TEST_SPIDER_NAME from .conftest import start_job diff --git a/tests/hubstorage/test_project.py b/tests/hubstorage/test_project.py index eee68c86..4d849e4b 100644 --- a/tests/hubstorage/test_project.py +++ b/tests/hubstorage/test_project.py @@ -9,7 +9,7 @@ from scrapinghub import HubstorageClient -from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME +from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME from .conftest import hsspiderid from .conftest import start_job from .conftest import set_testbotgroup, unset_testbotgroup diff --git a/tests/hubstorage/test_retry.py b/tests/hubstorage/test_retry.py index b9ef1345..6dca47ba 100644 --- a/tests/hubstorage/test_retry.py +++ b/tests/hubstorage/test_retry.py @@ -11,8 +11,8 @@ from scrapinghub import HubstorageClient from six.moves.http_client import BadStatusLine -from .conftest import TEST_AUTH, TEST_ENDPOINT -from .conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME +from ..conftest import TEST_AUTH, TEST_ENDPOINT +from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME GET = responses.GET diff --git a/tests/hubstorage/test_system.py b/tests/hubstorage/test_system.py index c67d373c..bdbc2152 100644 --- a/tests/hubstorage/test_system.py +++ b/tests/hubstorage/test_system.py @@ -7,8 +7,8 @@ from scrapinghub import HubstorageClient from scrapinghub.hubstorage.utils import millitime -from .conftest import TEST_ENDPOINT, TEST_SPIDER_NAME -from .conftest import TEST_PROJECT_ID, TEST_AUTH +from ..conftest import TEST_ENDPOINT, TEST_SPIDER_NAME +from ..conftest import TEST_PROJECT_ID, TEST_AUTH from .conftest import start_job