From 32edb610712afe42fc6577bde0a9a2c807f80a2f Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Sat, 3 Dec 2022 13:55:22 -0500 Subject: [PATCH] Add some bus failure handling Signed-off-by: Bill Erickson --- src/libopensrf/transport_connection.c | 9 +++++++++ src/perl/lib/OpenSRF/Transport/Redis/BusConnection.pm | 6 +++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/libopensrf/transport_connection.c b/src/libopensrf/transport_connection.c index d3f12cb..1bef22e 100644 --- a/src/libopensrf/transport_connection.c +++ b/src/libopensrf/transport_connection.c @@ -1,4 +1,5 @@ #include +#include transport_con* transport_con_new(const char* domain) { @@ -253,5 +254,13 @@ int handle_redis_error(redisReply *reply, const char* command, ...) { osrfLogError(OSRF_LOG_MARK, "REDIS Error [%s] %s", err, VA_BUF); freeReplyObject(reply); + // Some bus error conditions can lead to looping on an unusable + // connection. Avoid flooding the logs by inserting a short + // wait after any Redis errors. Note, these should never happen + // under normal wear and tear. It's possible we should just exit + // here, but need to collect some data first. + osrfLogError(OSRF_LOG_MARK, "Resting for a few seconds after bus failure..."); + sleep(3); + return 1; } diff --git a/src/perl/lib/OpenSRF/Transport/Redis/BusConnection.pm b/src/perl/lib/OpenSRF/Transport/Redis/BusConnection.pm index 03e4aae..a93b705 100644 --- a/src/perl/lib/OpenSRF/Transport/Redis/BusConnection.pm +++ b/src/perl/lib/OpenSRF/Transport/Redis/BusConnection.pm @@ -107,7 +107,11 @@ sub send { eval { $self->redis->rpush($dest_stream, $msg_json) }; - if ($@) { $logger->error("RPUSH error: $@"); } + if ($@) { + $logger->error("RPUSH error: $@"); + $logger->error("BusConnection pausing for a few seconds after bus error"); + sleep(3); + } } # $timeout=0 means check for data without blocking -- 2.11.0