Skip to content

Commit 1240e63

Browse files
authored
Merge pull request #183 from rabbitmq/replica-handle-nodedown
Replica: Handle nodedowns when starting replica reader.
2 parents d39da5a + ec8cb9f commit 1240e63

File tree

3 files changed

+36
-13
lines changed

3 files changed

+36
-13
lines changed

src/osiris_replica.erl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,12 @@ handle_info({'EXIT', RRPid, Info},
531531
replica_reader_pid = RRPid}} = State) ->
532532
%% any replica reader exit is troublesome and requires the replica to also
533533
%% terminate
534-
?ERROR_(Name, "replica reader ~w exited with ~w", [RRPid, Info]),
534+
case lists:member(Info, [normal, shutdown]) of
535+
true ->
536+
?DEBUG_(Name, "replica reader ~w exited with ~w", [RRPid, Info]);
537+
false ->
538+
?ERROR_(Name, "replica reader ~w exited with ~w", [RRPid, Info])
539+
end,
535540
{stop, {shutdown, Info}, State};
536541
handle_info({'EXIT', Ref, normal},
537542
#?MODULE{cfg = #cfg{name = Name}} = State) ->

src/osiris_replica_reader.erl

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,26 @@ stop(Pid) ->
6767
gen_server:cast(Pid, stop).
6868

6969
start(Node, ReplicaReaderConf) when is_map(ReplicaReaderConf) ->
70-
supervisor:start_child({osiris_replica_reader_sup, Node},
71-
#{id => make_ref(),
72-
start =>
70+
try
71+
supervisor:start_child({osiris_replica_reader_sup, Node},
72+
#{id => make_ref(),
73+
start =>
7374
{osiris_replica_reader, start_link,
7475
[ReplicaReaderConf]},
75-
%% replica readers should never be
76-
%% restarted by their sups
77-
%% instead they need to be re-started
78-
%% by their replica
79-
restart => temporary,
80-
shutdown => 5000,
81-
type => worker,
82-
modules => [osiris_replica_reader]}).
76+
%% replica readers should never be
77+
%% restarted by their sups
78+
%% instead they need to be re-started
79+
%% by their replica
80+
restart => temporary,
81+
shutdown => 5000,
82+
type => worker,
83+
modules => [osiris_replica_reader]})
84+
catch
85+
exit:{{nodedown, _} = Res, _Stack} ->
86+
{error, Res};
87+
exit:{noproc = Res, _Stack} ->
88+
{error, Res}
89+
end.
8390

8491
%%%===================================================================
8592
%%% gen_server callbacks

test/osiris_SUITE.erl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ all_tests() ->
8585
single_node_reader_counters,
8686
cluster_reader_counters,
8787
combine_ips_hosts_test,
88-
empty_last_segment].
88+
empty_last_segment,
89+
replica_reader_nodedown_noproc].
8990

9091
%% Isolated to avoid test interference
9192
ipv6_tests() ->
@@ -1895,6 +1896,16 @@ empty_last_segment(Config) ->
18951896
?assert(erlang:is_process_alive(Leader2)),
18961897
ok.
18971898

1899+
replica_reader_nodedown_noproc(_Config) ->
1900+
%% unit test to ensure we handle down nodes gracefully.
1901+
{error, {nodedown, 'banana@fruit'}} =
1902+
osiris_replica_reader:start('banana@fruit', #{}),
1903+
1904+
_ = application:stop(osiris),
1905+
{error, noproc} =
1906+
osiris_replica_reader:start(node(), #{}),
1907+
ok.
1908+
18981909
%% Utility
18991910

19001911
write_n(Pid, N, Written) ->

0 commit comments

Comments
 (0)