Skip to content

Commit 1b31d1c

Browse files
rustyrussellcdecker
authored andcommitted
bitcoind: handle up to 60 seconds of outage.
Seems to go out to lunch on reorgs: +136792.168286138 lightningd(9465):BROKEN: bitcoin-cli getchaintips exited 28: 'error code: -28 error message: Rewinding blocks... Closes: #286 Signed-off-by: Rusty Russell <[email protected]>
1 parent a2d4e09 commit 1b31d1c

File tree

2 files changed

+30
-5
lines changed

2 files changed

+30
-5
lines changed

lightningd/bitcoind.c

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,34 @@ static void bcli_finished(struct io_conn *conn, struct bitcoin_cli *bcli)
117117

118118
if (!bcli->exitstatus) {
119119
if (WEXITSTATUS(status) != 0) {
120-
fatal("%s exited %u: '%.*s'", bcli_args(bcli),
121-
WEXITSTATUS(status),
122-
(int)bcli->output_bytes,
123-
bcli->output);
120+
/* Allow 60 seconds of spurious errors, eg. reorg. */
121+
struct timerel t;
122+
123+
log_unusual(bcli->bitcoind->log,
124+
"%s exited with status %u",
125+
bcli_args(bcli),
126+
WEXITSTATUS(status));
127+
128+
if (!bitcoind->error_count)
129+
bitcoind->first_error_time = time_mono();
130+
131+
t = timemono_between(time_mono(),
132+
bitcoind->first_error_time);
133+
if (time_greater(t, time_from_sec(60)))
134+
fatal("%s exited %u (after %u other errors) '%.*s'",
135+
bcli_args(bcli),
136+
WEXITSTATUS(status),
137+
bitcoind->error_count,
138+
(int)bcli->output_bytes,
139+
bcli->output);
140+
bitcoind->error_count++;
124141
}
125142
} else
126143
*bcli->exitstatus = WEXITSTATUS(status);
127144

145+
if (WEXITSTATUS(status) == 0)
146+
bitcoind->error_count = 0;
147+
128148
bitcoind->req_running = false;
129149

130150
/* Don't continue if were only here because we were freed for shutdown */
@@ -154,7 +174,6 @@ static void next_bcli(struct bitcoind *bitcoind)
154174

155175
bitcoind->req_running = true;
156176
conn = io_new_conn(bitcoind, bcli->fd, output_init, bcli);
157-
tal_steal(conn, bcli);
158177
io_set_finish(conn, bcli_finished, bcli);
159178
}
160179

@@ -526,6 +545,7 @@ struct bitcoind *new_bitcoind(const tal_t *ctx, struct log *log)
526545
bitcoind->log = log;
527546
bitcoind->req_running = false;
528547
bitcoind->shutdown = false;
548+
bitcoind->error_count = 0;
529549
list_head_init(&bitcoind->pending);
530550
tal_add_destructor(bitcoind, destroy_bitcoind);
531551

lightningd/bitcoind.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <ccan/list/list.h>
66
#include <ccan/short_types/short_types.h>
77
#include <ccan/tal/tal.h>
8+
#include <ccan/time/time.h>
89
#include <ccan/typesafe_cb/typesafe_cb.h>
910
#include <stdbool.h>
1011

@@ -37,6 +38,10 @@ struct bitcoind {
3738
/* What network are we on? */
3839
const struct chainparams *chainparams;
3940

41+
/* If non-zero, time we first hit a bitcoind error. */
42+
unsigned int error_count;
43+
struct timemono first_error_time;
44+
4045
/* Ignore results, we're shutting down. */
4146
bool shutdown;
4247
};

0 commit comments

Comments
 (0)