Skip to content

Useless advertise addr option when no direct conn to masters on consul start #22169

@Alexsandr-Random

Description

@Alexsandr-Random

Overview of the Issue

When I use the consul, I expect the advertise_addr or advertise_addr_ipv4 option. The address assigned to a particular IP will perform the following function:
If I have any problems with the Internet, then in combination with the Bind_addr option: "0.0.0.0" I can hope that even if my VPN does not start right away and will not create the address indicated in "advertise_addr" then when the Internet appears - The consul will be able to use this option to advertise themselves to other cluster participants without fixing directly on this address. Consul does this.
However, for some reason, the consul directly depends on the availability of a direct connection with the masters.
THOSE. The consul agent is not autonomous and will not work without a direct connection with the cluster when starting. What makes many of his features such as local DNS strongly limited, because We should have a connection with the masters, otherwise when restarting - we will not be able to launch the consul with its entuns and locally - nothing will work.

Our ultimate goal is to make the consul autonomous from masters at launch.
But we see that if when the server is restarted (not the Consul service), which is the most strange, there will be no connection with the master servers, then the consul will not launch its endpoints and local DNS server. What is stupid, because they are local to be autonomous and not depend on others.
At the same time, we observe this behavior only if we restart the server itself where the consul is located. If you disconnect VPN (according to which the Consul cluster is carried out) and restart the consul service - everything will be in order. I do not understand why it is like this and it looks like a bug and some cache files largely affect the efficiency of the consul.

See more:
https://developer.hashicorp.com/consul/docs/agent/config/cli-flags#_advertise
and
https://developer.hashicorp.com/consul/docs/agent/config/cli-flags#_bind

Reproduction Steps

Steps to reproduce this issue, eg:

  1. Consul cluster with at least 3 masters and 1 alive agent separated by each other and communicating in the local or VPN network.
  2. Test1 - You must turn off access to the network through which the consul communicates. Then restart the consul service. Then check the work of the local features of the consul (Endpoints and/or DNS)
  3. Test2 - You must turn off access to the network through which the consul communicates. Then restart the whole server where consul agent was running. Then check the work of the local features of the consul (Endpoints and/or DNS) - its not launched!
    -->

Consul info for both Client and Server

Client info
Output from client 'consul info' command here

agent:
        check_monitors = 0
        check_ttls = 2
        checks = 7
        services = 7
build:
        prerelease = 
        revision = 33e5727a
        version = 1.20.2
        version_metadata = 
consul:
        acl = enabled
        known_servers = 3
        server = false
runtime:
        arch = amd64
        cpu_count = 8
        goroutines = 75
        max_procs = 8
        os = linux
        version = go1.22.7
serf_lan:
        coordinate_resets = 0
        encrypted = true
        event_queue = 0
        event_time = 351
        failed = 0
        health_score = 0
        intent_queue = 0
        left = 0
        member_time = 58820
        members = 79
        query_queue = 0
        query_time = 4

Client agent HCL config

{
    "primary_datacenter": "fsn1",
    "datacenter": "hel1",
    "node_name": "784",
    "bind_addr": "0.0.0.0",
    "advertise_addr": "10.100.20.37",
    "advertise_addr_ipv4": "10.100.20.37",
    "client_addr": "127.0.0.1",
    "ports": {
        "dns":  8600,
        "http": 8500,
        "serf_lan": 8301,
        "grpc": 8502
    },
    "domain": "consul",
    "recursors": ["1.1.1.1", "8.8.8.8"],
    "dns_config": {
        "enable_truncate": true,
        "only_passing": true
    },
    "retry_join": ["10.100.10.11", "10.100.10.12", "10.100.10.13"],
    "rejoin_after_leave": true,
    "data_dir": "/var/local/consul",
    "log_file": "/var/log/consul-debug.log",
    "enable_syslog": false,
    "enable_debug": true,
    "log_level": "debug",
    "log_json": true,
    "encrypt": "redacted",
    "encrypt_verify_incoming": true,
    "encrypt_verify_outgoing": true,
    "leave_on_terminate": true,
    "acl": {
        "enabled": true,
        "default_policy": "deny",
        "enable_token_persistence": true,
        "tokens": {
            "agent": "redacted"
        }
    },
    "tls": {
        "defaults": {
            "verify_incoming": true,
            "verify_outgoing": true,
            "ca_file": "/usr/local/etc/ssl/consul-agent-ca.pem"
        },
        "internal_rpc": {
            "verify_server_hostname": true
        }
    },
    "auto_encrypt": {
        "tls": true
    },
    "limits": {
        "rpc_rate": 10,
        "rpc_max_burst": 100,
        "rpc_client_timeout": "300s",
        "rpc_max_conns_per_client": 50
    },
    "cache": {
        "entry_fetch_max_burst": 3,
        "entry_fetch_rate": 0.333
    },
    "disable_update_check": true,
    "check_update_interval": "60m",
    "enable_script_checks": true,
    "gossip_lan": {
       "probe_interval": "6s",
       "probe_timeout": "3s",
       "retransmit_mult": 3,
       "suspicion_mult": 5,
       "gossip_interval": "1s",
       "gossip_nodes": 3
   }
}

Server info
Output from server 'consul info' command here

agent:
        check_monitors = 0
        check_ttls = 0
        checks = 4
        services = 4
build:
        prerelease = 
        revision = 33e5727a
        version = 1.20.2
        version_metadata = 
consul:
        acl = enabled
        bootstrap = false
        known_datacenters = 2
        leader = true
        leader_addr = 10.100.10.13:8300
        server = true
raft:
        applied_index = 12409662
        commit_index = 12409662
        fsm_pending = 0
        last_contact = 0
        last_log_index = 12409662
        last_log_term = 1371
        last_snapshot_index = 12398574
        last_snapshot_term = 1371
        latest_configuration = [{Suffrage:Voter ID:f48b1b28-0af1-876b-f949-9ba9ccedde51 Address:10.100.10.13:8300} {Suffrage:Voter ID:1fac6648-f334-90c0-9657-368c9626fe00 Address:10.100.10.11:8300} {Suffrage:Voter ID:d368c927-8d7a-fb00-3571-f53b693c9394 Address:10.100.10.12:8300}]
        latest_configuration_index = 0
        num_peers = 2
        protocol_version = 3
        protocol_version_max = 3
        protocol_version_min = 0
        snapshot_version_max = 1
        snapshot_version_min = 0
        state = Leader
        term = 1371
runtime:
        arch = amd64
        cpu_count = 2
        goroutines = 1091
        max_procs = 2
        os = linux
        version = go1.22.7
serf_lan:
        coordinate_resets = 0
        encrypted = true
        event_queue = 0
        event_time = 351
        failed = 1
        health_score = 0
        intent_queue = 0
        left = 0
        member_time = 58820
        members = 80
        query_queue = 0
        query_time = 4
serf_wan:
        coordinate_resets = 0
        encrypted = true
        event_queue = 0
        event_time = 1
        failed = 0
        health_score = 0
        intent_queue = 0
        left = 0
        member_time = 1913
        members = 6
        query_queue = 0
        query_time = 1

Server agent HCL config

{
    "bootstrap_expect": 3,
    "server": true,
    "advertise_addr": "10.100.10.13",
    "advertise_addr_wan": "10.44.10.3",
    "client_addr": "127.0.0.1",
    "datacenter": "hel1",
    "primary_datacenter": "fsn1",
    "data_dir": "/var/local/consul",
    "domain": "consul",
    "recursors": ["1.1.1.1", "8.8.8.8"],
    "enable_script_checks": true,
    "dns_config": {
        "enable_truncate": true,
        "only_passing": true
    },
    "performance": {
        "raft_multiplier": 1
    },
    "ports": {
        "dns": 53
    },
    "enable_syslog": true,
    "encrypt": "secret",
    "encrypt_verify_incoming": true,
    "encrypt_verify_outgoing": true,
    "leave_on_terminate": true,
    "log_level": "info",
    "rejoin_after_leave": true,
    "retry_join": ["10.44.10.1", "10.44.10.2", "10.44.10.3"],
    "retry_join_wan": ["10.44.0.2", "10.44.0.3", "10.44.0.4"],
    "acl": {
        "enabled": true,
        "default_policy": "deny",
        "down_policy": "extend-cache",
        "enable_token_persistence": true,
        "enable_token_replication": true
    },
    "cache": {
        "entry_fetch_max_burst": 3,
        "entry_fetch_rate": 0.333
    },
    "disable_update_check": true,
    "check_update_interval": "10m",
    "gossip_lan": {
       "probe_interval": "6s",
       "probe_timeout": "3s",
       "retransmit_mult": 3,
       "suspicion_mult": 5,
       "gossip_interval": "1s",
       "gossip_nodes": 3
   },
    "ui_config": {
        "enabled": true
    },
    "connect": {
        "enabled": true
    }
}

Operating system and Environment details

Run on ubuntu 24.04 LTS

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions