diff --git a/deps/rabbit/src/rabbit_networking.erl b/deps/rabbit/src/rabbit_networking.erl index 6b02ebe775b4..ffd137d3df0c 100644 --- a/deps/rabbit/src/rabbit_networking.erl +++ b/deps/rabbit/src/rabbit_networking.erl @@ -61,6 +61,14 @@ -define(FIRST_TEST_BIND_PORT, 49152). -define(ETS_TABLE, rabbit_listener_ets). +%% Number of re-try in case of no_epmd_port +%% it can happen when the DNS is not ready +%% for example, in Kubernetes during the start-up phase +-define(PORT_PLEASE_ATTEMPTS, 10). + +%% Wait for retry when erl_epmd:port_please fails +%% See erl_epmd_port_please +-define(PORT_PLEASE_ATTEMPTS_WAIT, 5000). %%---------------------------------------------------------------------------- @@ -400,7 +408,33 @@ tcp_listener_stopped_ets(L) -> -spec record_distribution_listener() -> ok | no_return(). record_distribution_listener() -> - {Name, Host} = rabbit_nodes:parts(node()), + {Name, Host} = rabbit_nodes:parts(node()), + epmd_port_please(Name, Host). + + +-spec epmd_port_please(string(),string()) -> ok | no_return(). + +epmd_port_please(Name, Host) -> + epmd_port_please(Name, Host, ?PORT_PLEASE_ATTEMPTS). +%% erl_epmd:port_please could fail if the DNS is not ready yet +%% for example in Kubernetes. We retry a few times. +%% (PORT_PLEASE_ATTEMPTS * PORT_PLEASE_ATTEMPTS_WAIT) +-spec epmd_port_please(string(),string(), integer()) -> ok | no_return(). +epmd_port_please(Name, Host, 0) -> + maybe_get_epmd_port(Name, Host); +epmd_port_please(Name, Host, RetriesLeft) -> + rabbit_log:debug("Getting epmd port node '~s', ~b retries left", + [Name, RetriesLeft]), + case catch maybe_get_epmd_port(Name, Host) of + ok -> ok; + {error, _} -> + timer:sleep(?PORT_PLEASE_ATTEMPTS_WAIT), + epmd_port_please(Name, Host, RetriesLeft - 1) + end. + +-spec maybe_get_epmd_port(string(),string()) -> ok | no_return(). + +maybe_get_epmd_port(Name, Host) -> case erl_epmd:port_please(list_to_atom(Name), Host, infinity) of {port, Port, _Version} -> IPAddress = @@ -413,6 +447,7 @@ record_distribution_listener() -> throw({error, no_epmd_port}) end. + -spec active_listeners() -> [rabbit_types:listener()]. active_listeners() -> diff --git a/deps/rabbit_common/src/rabbit_nodes_common.erl b/deps/rabbit_common/src/rabbit_nodes_common.erl index 2a0017fc0071..3787acff3fae 100644 --- a/deps/rabbit_common/src/rabbit_nodes_common.erl +++ b/deps/rabbit_common/src/rabbit_nodes_common.erl @@ -10,6 +10,7 @@ -define(EPMD_OPERATION_TIMEOUT, 6000). -define(NAME_LOOKUP_ATTEMPTS, 10). -define(TCP_DIAGNOSTIC_TIMEOUT, 5000). +-define(NXDOMAIN_RETRY_WAIT, 5000). -define(ERROR_LOGGER_HANDLER, rabbit_error_logger_handler). -include_lib("kernel/include/inet.hrl"). @@ -57,7 +58,7 @@ names(Hostname, RetriesLeft) -> noport -> names(Hostname, RetriesLeft - 1); {error, nxdomain} -> - timer:sleep(3000), + timer:sleep(?NXDOMAIN_RETRY_WAIT), names(Hostname, RetriesLeft - 1); {error, _} -> names(Hostname, RetriesLeft - 1)