@@ -35,7 +35,7 @@ type DefaultMultiTenantManager struct {
35
35
36
36
// Structs for holding per-user Prometheus rules Managers
37
37
// and a corresponding metrics struct
38
- userManagerMtx sync.Mutex
38
+ userManagerMtx sync.RWMutex
39
39
userManagers map [string ]RulesManager
40
40
userManagerMetrics * ManagerMetrics
41
41
@@ -50,6 +50,10 @@ type DefaultMultiTenantManager struct {
50
50
configUpdatesTotal * prometheus.CounterVec
51
51
registry prometheus.Registerer
52
52
logger log.Logger
53
+
54
+ ruleCache map [string ][]* promRules.Group
55
+ ruleCacheMtx sync.RWMutex
56
+ syncRuleMtx sync.Mutex
53
57
}
54
58
55
59
func NewDefaultMultiTenantManager (cfg Config , managerFactory ManagerFactory , evalMetrics * RuleEvalMetrics , reg prometheus.Registerer , logger log.Logger ) (* DefaultMultiTenantManager , error ) {
@@ -85,6 +89,7 @@ func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, eva
85
89
mapper : newMapper (cfg .RulePath , logger ),
86
90
userManagers : map [string ]RulesManager {},
87
91
userManagerMetrics : userManagerMetrics ,
92
+ ruleCache : map [string ][]* promRules.Group {},
88
93
managersTotal : promauto .With (reg ).NewGauge (prometheus.GaugeOpts {
89
94
Namespace : "cortex" ,
90
95
Name : "ruler_managers_total" ,
@@ -111,15 +116,17 @@ func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, eva
111
116
}
112
117
113
118
func (r * DefaultMultiTenantManager ) SyncRuleGroups (ctx context.Context , ruleGroups map [string ]rulespb.RuleGroupList ) {
114
- // A lock is taken to ensure if this function is called concurrently, then each call
115
- // returns after the call map files and check for updates
116
- r .userManagerMtx .Lock ()
117
- defer r .userManagerMtx .Unlock ()
119
+ // this is a safety lock to ensure this method is executed sequentially
120
+ r .syncRuleMtx .Lock ()
121
+ defer r .syncRuleMtx .Unlock ()
118
122
119
123
for userID , ruleGroup := range ruleGroups {
120
124
r .syncRulesToManager (ctx , userID , ruleGroup )
121
125
}
122
126
127
+ r .userManagerMtx .Lock ()
128
+ defer r .userManagerMtx .Unlock ()
129
+
123
130
// Check for deleted users and remove them
124
131
for userID , mngr := range r .userManagers {
125
132
if _ , exists := ruleGroups [userID ]; ! exists {
@@ -142,6 +149,18 @@ func (r *DefaultMultiTenantManager) SyncRuleGroups(ctx context.Context, ruleGrou
142
149
r .managersTotal .Set (float64 (len (r .userManagers )))
143
150
}
144
151
152
+ func (r * DefaultMultiTenantManager ) updateRuleCache (user string , rules []* promRules.Group ) {
153
+ r .ruleCacheMtx .Lock ()
154
+ defer r .ruleCacheMtx .Unlock ()
155
+ r .ruleCache [user ] = rules
156
+ }
157
+
158
+ func (r * DefaultMultiTenantManager ) deleteRuleCache (user string ) {
159
+ r .ruleCacheMtx .Lock ()
160
+ defer r .ruleCacheMtx .Unlock ()
161
+ delete (r .ruleCache , user )
162
+ }
163
+
145
164
// syncRulesToManager maps the rule files to disk, detects any changes and will create/update the
146
165
// the users Prometheus Rules Manager.
147
166
func (r * DefaultMultiTenantManager ) syncRulesToManager (ctx context.Context , user string , groups rulespb.RuleGroupList ) {
@@ -154,25 +173,20 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user
154
173
return
155
174
}
156
175
157
- manager , exists := r .userManagers [user ]
158
- if ! exists || update {
176
+ manager , existing := r .getRulesManager (user , ctx )
177
+
178
+ if manager == nil {
179
+ return
180
+ }
181
+
182
+ if ! existing || update {
159
183
level .Debug (r .logger ).Log ("msg" , "updating rules" , "user" , user )
160
184
r .configUpdatesTotal .WithLabelValues (user ).Inc ()
161
- if ! exists {
162
- level .Debug (r .logger ).Log ("msg" , "creating rule manager for user" , "user" , user )
163
- manager , err = r .newManager (ctx , user )
164
- if err != nil {
165
- r .lastReloadSuccessful .WithLabelValues (user ).Set (0 )
166
- level .Error (r .logger ).Log ("msg" , "unable to create rule manager" , "user" , user , "err" , err )
167
- return
168
- }
169
- // manager.Run() starts running the manager and blocks until Stop() is called.
170
- // Hence run it as another goroutine.
171
- go manager .Run ()
172
- r .userManagers [user ] = manager
185
+ if update && existing {
186
+ r .updateRuleCache (user , manager .RuleGroups ())
173
187
}
174
-
175
188
err = manager .Update (r .cfg .EvaluationInterval , files , r .cfg .ExternalLabels , r .cfg .ExternalURL .String (), ruleGroupIterationFunc )
189
+ r .deleteRuleCache (user )
176
190
if err != nil {
177
191
r .lastReloadSuccessful .WithLabelValues (user ).Set (0 )
178
192
level .Error (r .logger ).Log ("msg" , "unable to update rule manager" , "user" , user , "err" , err )
@@ -184,6 +198,29 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user
184
198
}
185
199
}
186
200
201
+ func (r * DefaultMultiTenantManager ) getRulesManager (user string , ctx context.Context ) (RulesManager , bool ) {
202
+ r .userManagerMtx .RLock ()
203
+ manager , exists := r .userManagers [user ]
204
+ r .userManagerMtx .RUnlock ()
205
+ if exists {
206
+ return manager , true
207
+ }
208
+ r .userManagerMtx .Lock ()
209
+ defer r .userManagerMtx .Unlock ()
210
+
211
+ manager , err := r .newManager (ctx , user )
212
+ if err != nil {
213
+ r .lastReloadSuccessful .WithLabelValues (user ).Set (0 )
214
+ level .Error (r .logger ).Log ("msg" , "unable to create rule manager" , "user" , user , "err" , err )
215
+ return nil , false
216
+ }
217
+ // manager.Run() starts running the manager and blocks until Stop() is called.
218
+ // Hence run it as another goroutine.
219
+ go manager .Run ()
220
+ r .userManagers [user ] = manager
221
+ return manager , false
222
+ }
223
+
187
224
func ruleGroupIterationFunc (ctx context.Context , g * promRules.Group , evalTimestamp time.Time ) {
188
225
logMessage := []interface {}{
189
226
"msg" , "evaluating rule group" ,
@@ -269,13 +306,25 @@ func (r *DefaultMultiTenantManager) getOrCreateNotifier(userID string, userManag
269
306
return n .notifier , nil
270
307
}
271
308
309
+ func (r * DefaultMultiTenantManager ) getCachedRules (userID string ) ([]* promRules.Group , bool ) {
310
+ r .ruleCacheMtx .RLock ()
311
+ defer r .ruleCacheMtx .RUnlock ()
312
+ groups , exists := r .ruleCache [userID ]
313
+ return groups , exists
314
+ }
315
+
272
316
func (r * DefaultMultiTenantManager ) GetRules (userID string ) []* promRules.Group {
273
317
var groups []* promRules.Group
274
- r .userManagerMtx .Lock ()
275
- if mngr , exists := r .userManagers [userID ]; exists {
318
+ groups , cached := r .getCachedRules (userID )
319
+ if cached {
320
+ return groups
321
+ }
322
+ r .userManagerMtx .RLock ()
323
+ mngr , exists := r .userManagers [userID ]
324
+ r .userManagerMtx .RUnlock ()
325
+ if exists {
276
326
groups = mngr .RuleGroups ()
277
327
}
278
- r .userManagerMtx .Unlock ()
279
328
return groups
280
329
}
281
330
0 commit comments