Skip to content

Commit d4d668b

Browse files
committed
getting rid of legacy logging system
1 parent 035be29 commit d4d668b

File tree

11 files changed

+54
-277
lines changed

11 files changed

+54
-277
lines changed

docs/source/topics/frontera-settings.rst

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -131,16 +131,6 @@ Delay between calls to backend for new batches in Scrapy scheduler, when queue s
131131
without hitting backend on every request. Increase it if calls to your backend is taking too long, and decrease
132132
if you need a fast spider bootstrap from seeds.
133133

134-
135-
.. setting:: EVENT_LOGGER
136-
137-
EVENT_LOGGER
138-
------------
139-
140-
Default: ``'frontera.logger.events.EventLogManager'``
141-
142-
The EventLoggerManager class to be used by the Frontier.
143-
144134
.. setting:: KAFKA_GET_TIMEOUT
145135

146136
KAFKA_GET_TIMEOUT
@@ -150,15 +140,6 @@ Default: ``5.0``
150140

151141
Time process should block until requested amount of data will be received from message bus.
152142

153-
.. setting:: LOGGER
154-
155-
LOGGER
156-
------
157-
158-
Default: ``'frontera.logger.FrontierLogger'``
159-
160-
The Logger class to be used by the Frontier.
161-
162143
.. setting:: MAX_NEXT_REQUESTS
163144

164145
MAX_NEXT_REQUESTS
@@ -638,7 +619,6 @@ Logging default settings
638619

639620
Values::
640621

641-
LOGGER = 'frontera.logger.FrontierLogger'
642622
LOGGING_ENABLED = True
643623

644624
LOGGING_EVENTS_ENABLED = False
@@ -667,5 +647,3 @@ Values::
667647
"frontera.logger.handlers.COLOR_CONSOLE_DEBUGGING",
668648
]
669649

670-
EVENT_LOG_MANAGER = 'frontera.logger.events.EventLogManager'
671-

frontera/contrib/backends/hbase.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from frontera.core.models import Request
1818
from frontera.contrib.backends.partitioners import Crc32NamePartitioner
1919
from frontera.utils.misc import chunks
20+
import logging
2021

2122

2223
_pack_functions = {
@@ -60,11 +61,11 @@ class HBaseQueue(Queue):
6061

6162
GET_RETRIES = 3
6263

63-
def __init__(self, connection, partitions, logger, table_name, drop=False):
64+
def __init__(self, connection, partitions, table_name, drop=False):
6465
self.connection = connection
6566
self.partitions = [i for i in range(0, partitions)]
6667
self.partitioner = Crc32NamePartitioner(self.partitions)
67-
self.logger = logger
68+
self.logger = logging.getLogger("hbase.queue")
6869
self.table_name = table_name
6970

7071
tables = set(self.connection.tables())
@@ -88,7 +89,7 @@ def schedule(self, batch):
8889
if 'domain' not in request.meta:
8990
_, hostname, _, _, _, _ = parse_domain_from_url_fast(request.url)
9091
if not hostname:
91-
self.logger.error("Can't get hostname for URL %s, fingerprint %s" % (request.url, fprint))
92+
self.logger.error("Can't get hostname for URL %s, fingerprint %s", request.url, fprint)
9293
request.meta['domain'] = {'name': hostname}
9394
to_schedule.append((score, fprint, request.meta['domain'], request.url))
9495
self._schedule(to_schedule)
@@ -183,7 +184,8 @@ def get_next_requests(self, max_n_requests, partition_id, **kwargs):
183184
while tries < self.GET_RETRIES:
184185
tries += 1
185186
limit *= 5.5 if tries > 1 else 1.0
186-
self.logger.debug("Try %d, limit %d, last attempt: requests %d, hosts %d" % (tries, limit, count, len(queue.keys())))
187+
self.logger.debug("Try %d, limit %d, last attempt: requests %d, hosts %d",
188+
tries, limit, count, len(queue.keys()))
187189
meta_map.clear()
188190
queue.clear()
189191
count = 0
@@ -213,7 +215,7 @@ def get_next_requests(self, max_n_requests, partition_id, **kwargs):
213215
continue
214216
break
215217

216-
self.logger.debug("Finished: tries %d, hosts %d, requests %d" % (tries, len(queue.keys()), count))
218+
self.logger.debug("Finished: tries %d, hosts %d, requests %d", tries, len(queue.keys()), count)
217219

218220
# For every fingerprint collect it's row keys and return all fingerprints from them
219221
fprint_map = {}
@@ -241,7 +243,7 @@ def get_next_requests(self, max_n_requests, partition_id, **kwargs):
241243
with table.batch(transaction=True) as b:
242244
for rk in trash_can:
243245
b.delete(rk)
244-
self.logger.debug("%d row keys removed" % (len(trash_can)))
246+
self.logger.debug("%d row keys removed", len(trash_can))
245247
return results
246248

247249
def count(self):
@@ -250,10 +252,10 @@ def count(self):
250252

251253
class HBaseState(States):
252254

253-
def __init__(self, connection, table_name, logger, cache_size_limit):
255+
def __init__(self, connection, table_name, cache_size_limit):
254256
self.connection = connection
255257
self._table_name = table_name
256-
self.logger = logger
258+
self.logger = logging.getLogger("hbase.states")
257259
self._state_cache = {}
258260
self._cache_size_limit = cache_size_limit
259261

@@ -373,7 +375,7 @@ class HBaseBackend(DistributedBackend):
373375

374376
def __init__(self, manager):
375377
self.manager = manager
376-
self.logger = manager.logger.backend
378+
self.logger = logging.getLogger("hbase.backend")
377379
settings = manager.settings
378380
port = settings.get('HBASE_THRIFT_PORT')
379381
hosts = settings.get('HBASE_THRIFT_HOST')
@@ -400,7 +402,7 @@ def __init__(self, manager):
400402
def strategy_worker(cls, manager):
401403
o = cls(manager)
402404
settings = manager.settings
403-
o._states = HBaseState(o.connection, settings.get('HBASE_METADATA_TABLE'), o.manager.logger.backend,
405+
o._states = HBaseState(o.connection, settings.get('HBASE_METADATA_TABLE'),
404406
settings.get('HBASE_STATE_CACHE_SIZE_LIMIT'))
405407
return o
406408

@@ -409,7 +411,7 @@ def db_worker(cls, manager):
409411
o = cls(manager)
410412
settings = manager.settings
411413
drop_all_tables = settings.get('HBASE_DROP_ALL_TABLES')
412-
o._queue = HBaseQueue(o.connection, o.queue_partitions, o.manager.logger.backend,
414+
o._queue = HBaseQueue(o.connection, o.queue_partitions,
413415
settings.get('HBASE_QUEUE_TABLE'), drop=drop_all_tables)
414416
o._metadata = HBaseMetadata(o.connection, settings.get('HBASE_METADATA_TABLE'), drop_all_tables,
415417
settings.get('HBASE_USE_SNAPPY'), settings.get('HBASE_BATCH_SIZE'),
@@ -461,5 +463,5 @@ def get_next_requests(self, max_next_requests, **kwargs):
461463
results = self.queue.get_next_requests(max_next_requests, partition_id, min_requests=64,
462464
min_hosts=24, max_requests_per_host=128)
463465
next_pages.extend(results)
464-
self.logger.debug("Got %d requests for partition id %d" % (len(results), partition_id))
466+
self.logger.debug("Got %d requests for partition id %d", len(results), partition_id)
465467
return next_pages

frontera/contrib/backends/remote/messagebus.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from frontera.core import OverusedBuffer
44
from codecs.msgpack import Encoder, Decoder
55
from frontera.utils.misc import load_object
6+
import logging
67

78

89
class MessageBusBackend(Backend):
910
def __init__(self, manager):
10-
self._manager = manager
11-
settings = self._manager.settings
11+
settings = manager.settings
1212
messagebus = load_object(settings.get('MESSAGE_BUS'))
1313
self.mb = messagebus(settings)
1414
store_content = settings.get('STORE_CONTENT')
@@ -21,6 +21,7 @@ def __init__(self, manager):
2121
self._get_timeout = float(settings.get('KAFKA_GET_TIMEOUT'))
2222
self._buffer = OverusedBuffer(self._get_next_requests,
2323
manager.logger.manager.debug)
24+
self._logger = logging.getLogger("messagebus-backend")
2425

2526
@classmethod
2627
def from_manager(clas, manager):
@@ -47,8 +48,7 @@ def _get_next_requests(self, max_n_requests, **kwargs):
4748
try:
4849
request = self._decoder.decode_request(encoded)
4950
except Exception, exc:
50-
self._manager.logger.backend.warning("Could not decode message: {0}, error {1}".format(encoded,
51-
str(exc)))
51+
self._logger.warning("Could not decode message: {0}, error {1}".format(encoded, str(exc)))
5252
else:
5353
requests.append(request)
5454
self.spider_log_producer.send('0123456789abcdef0123456789abcdef012345678',

frontera/contrib/messagebus/kafkabus.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from logging import getLogger
1313
from time import sleep
1414

15-
logger = getLogger("kafkabus")
15+
logger = getLogger("messagebus.kafka")
1616

1717

1818
class Consumer(BaseStreamConsumer):

frontera/contrib/messagebus/zeromq/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def send(self, key, *messages):
143143
self.counters[0] = counter
144144

145145

146-
class ScorinLogStream(BaseScoringLogStream):
146+
class ScoringLogStream(BaseScoringLogStream):
147147
def __init__(self, messagebus):
148148
self.context = messagebus.context
149149
self.in_location = messagebus.socket_config.sw_out()
@@ -215,7 +215,7 @@ def spider_log(self):
215215
return SpiderLogStream(self)
216216

217217
def scoring_log(self):
218-
return ScorinLogStream(self)
218+
return ScoringLogStream(self)
219219

220220
def spider_feed(self):
221221
return SpiderFeedStream(self)

0 commit comments

Comments
 (0)