@@ -1424,3 +1424,106 @@ tests:
1424
1424
runbook_url : " https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch"
1425
1425
summary : " StatefulSet has not matched the expected number of replicas."
1426
1426
1427
+ - name : KubeCPUOvercommit alert (single-node)
1428
+ - interval : 1m
1429
+ input_series :
1430
+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1431
+ values : ' 1x10'
1432
+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1433
+ values : ' 1x10'
1434
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}'
1435
+ values : ' 1.9x10' # This value was seen on a 2x vCPU node
1436
+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1437
+ values : ' 1x10'
1438
+ alert_rule_test :
1439
+ - eval_time : 9m
1440
+ alertname : KubeCPUOvercommit
1441
+ - eval_time : 10m
1442
+ alertname : KubeCPUOvercommit
1443
+ exp_alerts :
1444
+ - exp_labels :
1445
+ severity : warning
1446
+ exp_annotations :
1447
+ description : Cluster has overcommitted CPU resource requests for Pods by 0.385 CPU shares and cannot tolerate node failure.
1448
+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
1449
+ summary : Cluster has overcommitted CPU resource requests.
1450
+
1451
+ - name : KubeCPUOvercommit alert (multi-node)
1452
+ - interval : 1m
1453
+ input_series :
1454
+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1455
+ values : ' 2x10'
1456
+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1457
+ values : ' 2x10'
1458
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}'
1459
+ values : ' 1.9x10' # This value was seen on a 2x vCPU node
1460
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n2", resource="cpu", job="kube-state-metrics"}'
1461
+ values : ' 1.9x10'
1462
+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1463
+ values : ' 1x10'
1464
+ - series : ' kube_node_info{cluster="kubernetes", node="n2", job="kube-state-metrics"}'
1465
+ values : ' 1x10'
1466
+ alert_rule_test :
1467
+ - eval_time : 9m
1468
+ alertname : KubeCPUOvercommit
1469
+ - eval_time : 10m
1470
+ alertname : KubeCPUOvercommit
1471
+ exp_alerts :
1472
+ - exp_labels :
1473
+ severity : warning
1474
+ exp_annotations :
1475
+ description : Cluster has overcommitted CPU resource requests for Pods by 2.1 CPU shares and cannot tolerate node failure.
1476
+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
1477
+ summary : Cluster has overcommitted CPU resource requests.
1478
+
1479
+ - name : KubeMemoryOvercommit alert (single-node)
1480
+ - interval : 1m
1481
+ input_series :
1482
+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1483
+ values : ' 1000000000x10' # 1 GB
1484
+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1485
+ values : ' 1000000000x10'
1486
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}'
1487
+ values : ' 1000000000x10'
1488
+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1489
+ values : ' 1x10'
1490
+ alert_rule_test :
1491
+ - eval_time : 9m
1492
+ alertname : KubeMemoryOvercommit
1493
+ - eval_time : 10m
1494
+ alertname : KubeMemoryOvercommit
1495
+ exp_alerts :
1496
+ - exp_labels :
1497
+ severity : warning
1498
+ exp_annotations :
1499
+ description : Cluster has overcommitted memory resource requests for Pods by 1.15G bytes and cannot tolerate node failure.
1500
+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
1501
+ summary : Cluster has overcommitted memory resource requests.
1502
+
1503
+ - name : KubeMemoryOvercommit alert (multi-node)
1504
+ - interval : 1m
1505
+ input_series :
1506
+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1507
+ values : ' 2000000000x10' # 2 GB
1508
+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1509
+ values : ' 2000000000x10'
1510
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}'
1511
+ values : ' 1000000000x10'
1512
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n2", resource="memory", job="kube-state-metrics"}'
1513
+ values : ' 1000000000x10'
1514
+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1515
+ values : ' 1x10'
1516
+ - series : ' kube_node_info{cluster="kubernetes", node="n2", job="kube-state-metrics"}'
1517
+ values : ' 1x10'
1518
+ alert_rule_test :
1519
+ - eval_time : 9m
1520
+ alertname : KubeMemoryOvercommit
1521
+ - eval_time : 10m
1522
+ alertname : KubeMemoryOvercommit
1523
+ exp_alerts :
1524
+ - exp_labels :
1525
+ severity : warning
1526
+ exp_annotations :
1527
+ description : Cluster has overcommitted memory resource requests for Pods by 3G bytes and cannot tolerate node failure.
1528
+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
1529
+ summary : Cluster has overcommitted memory resource requests.
0 commit comments