Skip to content

Commit 38cd1bc

Browse files
committed
Review feedback:
- Simplify AM starting logic - Don't upload any config to object store Signed-off-by: gotjosh <[email protected]>
1 parent 512fec8 commit 38cd1bc

File tree

3 files changed

+85
-16
lines changed

3 files changed

+85
-16
lines changed

CHANGELOG.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,8 @@
2828
* [ENHANCEMENT] Add "integration" as a label for `cortex_alertmanager_notifications_total` and `cortex_alertmanager_notifications_failed_total` metrics. #3056
2929
* [ENHANCEMENT] Add `cortex_ruler_config_last_reload_successful` and `cortex_ruler_config_last_reload_successful_seconds` to check status of users rule manager. #3056
3030
* [ENHANCEMENT] Memcached dial() calls now have an optional circuit-breaker to avoid hammering a broken cache #3051
31-
<<<<<<< HEAD
3231
* [ENHANCEMENT] Add TLS support to etcd client. #3102
33-
=======
34-
* [ENHANCEMENT] When a tenant accesses the Alertmanager UI or its API, if we have valid `-alertmanager.configs.fallback` we'll use that to start the manager and avoid failing the request. #XXXX
35-
>>>>>>> baf7a0e40... Start an Alertmanager when the first request is received
32+
* [ENHANCEMENT] When a tenant accesses the Alertmanager UI or its API, if we have valid `-alertmanager.configs.fallback` we'll use that to start the manager and avoid failing the request. #3073
3633
* [BUGFIX] Query-frontend: Fixed rounding for incoming query timestamps, to be 100% Prometheus compatible. #2990
3734
* [BUGFIX] Querier: Merge results from chunks and blocks ingesters when using streaming of results. #3013
3835
* [BUGFIX] Querier: query /series from ingesters regardless the `-querier.query-ingesters-within` setting. #3035

pkg/alertmanager/multitenant.go

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -463,31 +463,36 @@ func (am *MultitenantAlertmanager) ServeHTTP(w http.ResponseWriter, req *http.Re
463463
userAM, ok := am.alertmanagers[userID]
464464
am.alertmanagersMtx.Unlock()
465465

466-
if !ok && am.fallbackConfig != "" {
466+
if ok {
467+
if !userAM.IsActive() {
468+
http.Error(w, "the Alertmanager is not configured", http.StatusNotFound)
469+
return
470+
}
471+
472+
userAM.mux.ServeHTTP(w, req)
473+
return
474+
}
475+
476+
if am.fallbackConfig != "" {
467477
userAM, err = am.alertmanagerFromFallbackConfig(userID)
468478
if err != nil {
469479
http.Error(w, "Failed to initialize the Alertmanager", http.StatusInternalServerError)
480+
return
470481
}
471-
}
472482

473-
if !userAM.IsActive() {
474-
http.Error(w, "the Alertmanager is not configured", http.StatusNotFound)
483+
userAM.mux.ServeHTTP(w, req)
475484
return
476485
}
477486

478-
userAM.mux.ServeHTTP(w, req)
487+
http.Error(w, "the Alertmanager is not configured", http.StatusNotFound)
479488
}
480489

481490
func (am *MultitenantAlertmanager) alertmanagerFromFallbackConfig(userID string) (*Alertmanager, error) {
482-
// There's a potential race condition here. If a tenant:
483-
// a) uploads a config and b) receives an alert or access the UI
484-
// before we're able the start the new manager we'll replace that config with an empty
485-
// config.
491+
// Upload an empty config so that the Alertmanager is no de-activated in the next poll
486492
cfg := &UserConfig{AlertmanagerConfig: ""}
487493
cfgDesc := alerts.ToProto(cfg.AlertmanagerConfig, cfg.TemplateFiles, userID)
488494
err := am.store.SetAlertConfig(context.Background(), cfgDesc)
489495
if err != nil {
490-
level.Error(am.logger).Log("msg", "unable to store fallback configuration", "user", userID, "err", err.Error())
491496
return nil, err
492497
}
493498

@@ -497,7 +502,6 @@ func (am *MultitenantAlertmanager) alertmanagerFromFallbackConfig(userID string)
497502
return nil, err
498503
}
499504

500-
// We need to check again in case they've had a new alertmanager started by the time we're ready
501505
am.alertmanagersMtx.Lock()
502506
defer am.alertmanagersMtx.Unlock()
503507
return am.alertmanagers[userID], nil

pkg/alertmanager/multitenant_test.go

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"context"
66
"fmt"
77
"io/ioutil"
8+
"net/http"
89
"net/http/httptest"
910
"os"
1011
"testing"
@@ -48,7 +49,8 @@ func (m *mockAlertStore) GetAlertConfig(ctx context.Context, user string) (alert
4849
}
4950

5051
func (m *mockAlertStore) SetAlertConfig(ctx context.Context, cfg alerts.AlertConfigDesc) error {
51-
return fmt.Errorf("not implemented")
52+
m.configs[cfg.User] = cfg
53+
return nil
5254
}
5355

5456
func (m *mockAlertStore) DeleteAlertConfig(ctx context.Context, user string) error {
@@ -241,3 +243,69 @@ func TestAlertmanager_ServeHTTP(t *testing.T) {
241243
body, _ = ioutil.ReadAll(resp.Body)
242244
require.Equal(t, "the Alertmanager is not configured\n", string(body))
243245
}
246+
247+
func TestAlertmanager_ServeHTTPWithFallbackConfig(t *testing.T) {
248+
mockStore := &mockAlertStore{
249+
configs: map[string]alerts.AlertConfigDesc{},
250+
}
251+
252+
externalURL := flagext.URLValue{}
253+
err := externalURL.Set("http://localhost:8080/alertmanager")
254+
require.NoError(t, err)
255+
256+
tempDir, err := ioutil.TempDir(os.TempDir(), "alertmanager")
257+
require.NoError(t, err)
258+
defer os.RemoveAll(tempDir)
259+
260+
fallbackCfg := `
261+
global:
262+
smtp_smarthost: 'localhost:25'
263+
smtp_from: '[email protected]'
264+
route:
265+
receiver: example-email
266+
receivers:
267+
- name: example-email
268+
email_configs:
269+
270+
`
271+
272+
// Create the Multitenant Alertmanager.
273+
reg := prometheus.NewPedanticRegistry()
274+
am := createMultitenantAlertmanager(&MultitenantAlertmanagerConfig{
275+
ExternalURL: externalURL,
276+
DataDir: tempDir,
277+
}, nil, nil, mockStore, log.NewNopLogger(), reg)
278+
am.fallbackConfig = fallbackCfg
279+
280+
// Request when no user configuration is present.
281+
req := httptest.NewRequest("GET", externalURL.String()+"/api/v1/status", nil)
282+
req.Header.Add(user.OrgIDHeaderName, "user1")
283+
w := httptest.NewRecorder()
284+
285+
am.ServeHTTP(w, req)
286+
287+
resp := w.Result()
288+
289+
// It succeeds and the Alertmanager is started
290+
require.Equal(t, http.StatusOK, resp.StatusCode)
291+
require.Len(t, am.alertmanagers, 1)
292+
require.True(t, am.alertmanagers["user1"].IsActive())
293+
294+
// Even after a poll it does not pause your Alertmanager
295+
err = am.updateConfigs()
296+
require.NoError(t, err)
297+
298+
require.True(t, am.alertmanagers["user1"].IsActive())
299+
require.Len(t, am.alertmanagers, 1)
300+
301+
// Pause the alertmanager
302+
am.alertmanagers["user1"].Pause()
303+
304+
// Request when user configuration is paused.
305+
w = httptest.NewRecorder()
306+
am.ServeHTTP(w, req)
307+
308+
resp = w.Result()
309+
body, _ := ioutil.ReadAll(resp.Body)
310+
require.Equal(t, "the Alertmanager is not configured\n", string(body))
311+
}

0 commit comments

Comments
 (0)