diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d59d97c043..ac7848cf97b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ * [ENHANCEMENT] Add `cortex_ruler_config_last_reload_successful` and `cortex_ruler_config_last_reload_successful_seconds` to check status of users rule manager. #3056 * [ENHANCEMENT] Memcached dial() calls now have an optional circuit-breaker to avoid hammering a broken cache #3051 * [ENHANCEMENT] Add TLS support to etcd client. #3102 +* [ENHANCEMENT] When a tenant accesses the Alertmanager UI or its API, if we have valid `-alertmanager.configs.fallback` we'll use that to start the manager and avoid failing the request. #3073 * [BUGFIX] Query-frontend: Fixed rounding for incoming query timestamps, to be 100% Prometheus compatible. #2990 * [BUGFIX] Querier: Merge results from chunks and blocks ingesters when using streaming of results. #3013 * [BUGFIX] Querier: query /series from ingesters regardless the `-querier.query-ingesters-within` setting. #3035 diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go index 7751f97c923..79b86c46ece 100644 --- a/pkg/alertmanager/multitenant.go +++ b/pkg/alertmanager/multitenant.go @@ -463,12 +463,47 @@ func (am *MultitenantAlertmanager) ServeHTTP(w http.ResponseWriter, req *http.Re userAM, ok := am.alertmanagers[userID] am.alertmanagersMtx.Unlock() - if !ok || !userAM.IsActive() { - http.Error(w, "the Alertmanager is not configured", http.StatusNotFound) + if ok { + if !userAM.IsActive() { + http.Error(w, "the Alertmanager is not configured", http.StatusNotFound) + return + } + + userAM.mux.ServeHTTP(w, req) + return + } + + if am.fallbackConfig != "" { + userAM, err = am.alertmanagerFromFallbackConfig(userID) + if err != nil { + http.Error(w, "Failed to initialize the Alertmanager", http.StatusInternalServerError) + return + } + + userAM.mux.ServeHTTP(w, req) return } - userAM.mux.ServeHTTP(w, req) + http.Error(w, "the Alertmanager is not configured", http.StatusNotFound) +} + +func (am *MultitenantAlertmanager) alertmanagerFromFallbackConfig(userID string) (*Alertmanager, error) { + // Upload an empty config so that the Alertmanager is no de-activated in the next poll + cfgDesc := alerts.ToProto("", nil, userID) + err := am.store.SetAlertConfig(context.Background(), cfgDesc) + if err != nil { + return nil, err + } + + // Calling setConfig with an empty configuration will use the fallback config. + err = am.setConfig(cfgDesc) + if err != nil { + return nil, err + } + + am.alertmanagersMtx.Lock() + defer am.alertmanagersMtx.Unlock() + return am.alertmanagers[userID], nil } // GetStatusHandler returns the status handler for this multi-tenant diff --git a/pkg/alertmanager/multitenant_test.go b/pkg/alertmanager/multitenant_test.go index b3e18307eac..c8acffd218b 100644 --- a/pkg/alertmanager/multitenant_test.go +++ b/pkg/alertmanager/multitenant_test.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "io/ioutil" + "net/http" "net/http/httptest" "os" "testing" @@ -48,7 +49,8 @@ func (m *mockAlertStore) GetAlertConfig(ctx context.Context, user string) (alert } func (m *mockAlertStore) SetAlertConfig(ctx context.Context, cfg alerts.AlertConfigDesc) error { - return fmt.Errorf("not implemented") + m.configs[cfg.User] = cfg + return nil } func (m *mockAlertStore) DeleteAlertConfig(ctx context.Context, user string) error { @@ -241,3 +243,68 @@ func TestAlertmanager_ServeHTTP(t *testing.T) { body, _ = ioutil.ReadAll(resp.Body) require.Equal(t, "the Alertmanager is not configured\n", string(body)) } + +func TestAlertmanager_ServeHTTPWithFallbackConfig(t *testing.T) { + mockStore := &mockAlertStore{ + configs: map[string]alerts.AlertConfigDesc{}, + } + + externalURL := flagext.URLValue{} + err := externalURL.Set("http://localhost:8080/alertmanager") + require.NoError(t, err) + + tempDir, err := ioutil.TempDir(os.TempDir(), "alertmanager") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + + fallbackCfg := ` +global: + smtp_smarthost: 'localhost:25' + smtp_from: 'youraddress@example.org' +route: + receiver: example-email +receivers: + - name: example-email + email_configs: + - to: 'youraddress@example.org' +` + + // Create the Multitenant Alertmanager. + am := createMultitenantAlertmanager(&MultitenantAlertmanagerConfig{ + ExternalURL: externalURL, + DataDir: tempDir, + }, nil, nil, mockStore, log.NewNopLogger(), nil) + am.fallbackConfig = fallbackCfg + + // Request when no user configuration is present. + req := httptest.NewRequest("GET", externalURL.String()+"/api/v1/status", nil) + req.Header.Add(user.OrgIDHeaderName, "user1") + w := httptest.NewRecorder() + + am.ServeHTTP(w, req) + + resp := w.Result() + + // It succeeds and the Alertmanager is started + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Len(t, am.alertmanagers, 1) + require.True(t, am.alertmanagers["user1"].IsActive()) + + // Even after a poll it does not pause your Alertmanager + err = am.updateConfigs() + require.NoError(t, err) + + require.True(t, am.alertmanagers["user1"].IsActive()) + require.Len(t, am.alertmanagers, 1) + + // Pause the alertmanager + am.alertmanagers["user1"].Pause() + + // Request when user configuration is paused. + w = httptest.NewRecorder() + am.ServeHTTP(w, req) + + resp = w.Result() + body, _ := ioutil.ReadAll(resp.Body) + require.Equal(t, "the Alertmanager is not configured\n", string(body)) +}