Skip to content

Handle BufferErrors by backing off and retrying #673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 14, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 27 additions & 7 deletions python/hsfs/engine/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,10 @@ def _write_dataframe_kafka(
# setup row writer function
writer = self._get_encoder_func(feature_group._get_encoded_avro_schema())

def acked(err, msg):
if err is not None:
print("Failed to deliver message: %s: %s" % (str(msg), str(err)))

# loop over rows
for r in dataframe.itertuples(index=False):
# itertuples returns Python NamedTyple, to be able to serialize it using
Expand Down Expand Up @@ -744,13 +748,27 @@ def _write_dataframe_kafka(
# assemble key
key = "".join([str(row[pk]) for pk in sorted(feature_group.primary_key)])

# produce
producer.produce(
topic=feature_group._online_topic_name, key=key, value=encoded_row
)
while True:
# if BufferError is thrown, we can be sure, message hasn't been send so we retry
try:
# produce
producer.produce(
topic=feature_group._online_topic_name,
key=key,
value=encoded_row,
callback=acked
if offline_write_options.get("debug_kafka", False)
else None,
)

# Trigger internal callbacks to empty op queue
producer.poll(0)
# Trigger internal callbacks to empty op queue
producer.poll(0)
break
except BufferError as e:
if offline_write_options.get("debug_kafka", False):
print("Caught: {}".format(e))
# backoff for 1 second
producer.poll(1)

# make sure producer blocks and everything is delivered
producer.flush()
Expand Down Expand Up @@ -795,12 +813,15 @@ def _get_encoder_func(self, writer_schema: str) -> callable:
return lambda record, outf: writer.write(record, avro.io.BinaryEncoder(outf))

def _get_kafka_config(self, write_options: dict = {}) -> dict:
# producer configuration properties
# https://docs.confluent.io/platform/current/clients/librdkafka/html/md_CONFIGURATION.html
config = {
"security.protocol": "SSL",
"ssl.ca.location": client.get_instance()._get_ca_chain_path(),
"ssl.certificate.location": client.get_instance()._get_client_cert_path(),
"ssl.key.location": client.get_instance()._get_client_key_path(),
"client.id": socket.gethostname(),
**write_options.get("kafka_producer_config", {}),
}

if isinstance(client.get_instance(), hopsworks.Client) or write_options.get(
Expand All @@ -823,5 +844,4 @@ def _get_kafka_config(self, write_options: dict = {}) -> dict:
)
]
)

return config