|
123 | 123 | "steppedLine": false, |
124 | 124 | "targets": [ |
125 | 125 | { |
126 | | - "expr": "dcgm_gpu_temp", |
| 126 | + "expr": "DCGM_FI_DEV_GPU_TEMP", |
127 | 127 | "format": "time_series", |
128 | 128 | "instant": false, |
129 | 129 | "interval": "", |
|
227 | 227 | "pluginVersion": "7.1.2", |
228 | 228 | "targets": [ |
229 | 229 | { |
230 | | - "expr": "avg(dcgm_gpu_temp)", |
| 230 | + "expr": "avg(DCGM_FI_DEV_GPU_TEMP)", |
231 | 231 | "interval": "", |
232 | 232 | "legendFormat": "", |
233 | 233 | "refId": "A" |
|
286 | 286 | "steppedLine": false, |
287 | 287 | "targets": [ |
288 | 288 | { |
289 | | - "expr": "dcgm_power_usage", |
| 289 | + "expr": "DCGM_FI_DEV_POWER_USAGE", |
290 | 290 | "interval": "", |
291 | 291 | "legendFormat": "GPU {{gpu}}", |
292 | 292 | "refId": "A" |
|
408 | 408 | "pluginVersion": "7.1.2", |
409 | 409 | "targets": [ |
410 | 410 | { |
411 | | - "expr": "sum(dcgm_power_usage)", |
| 411 | + "expr": "sum(DCGM_FI_DEV_POWER_USAGE)", |
412 | 412 | "instant": true, |
413 | 413 | "interval": "", |
414 | 414 | "legendFormat": "", |
|
471 | 471 | "steppedLine": false, |
472 | 472 | "targets": [ |
473 | 473 | { |
474 | | - "expr": "dcgm_sm_clock", |
| 474 | + "expr": "DCGM_FI_DEV_SM_CLOCK", |
475 | 475 | "format": "time_series", |
476 | 476 | "instant": false, |
477 | 477 | "interval": "", |
|
523 | 523 | "alignLevel": null |
524 | 524 | } |
525 | 525 | }, |
| 526 | + { |
| 527 | + "aliasColors": {}, |
| 528 | + "bars": false, |
| 529 | + "dashLength": 10, |
| 530 | + "dashes": false, |
| 531 | + "datasource": "${DS_PROMETHEUS}", |
| 532 | + "fill": 1, |
| 533 | + "fillGradient": 0, |
| 534 | + "gridPos": { |
| 535 | + "h": 8, |
| 536 | + "w": 12, |
| 537 | + "x": 12, |
| 538 | + "y": 16 |
| 539 | + }, |
| 540 | + "hiddenSeries": false, |
| 541 | + "id": 4, |
| 542 | + "legend": { |
| 543 | + "alignAsTable": true, |
| 544 | + "avg": true, |
| 545 | + "current": true, |
| 546 | + "max": true, |
| 547 | + "min": false, |
| 548 | + "rightSide": true, |
| 549 | + "show": true, |
| 550 | + "total": false, |
| 551 | + "values": true |
| 552 | + }, |
| 553 | + "lines": true, |
| 554 | + "linewidth": 2, |
| 555 | + "nullPointMode": "null", |
| 556 | + "options": { |
| 557 | + "dataLinks": [] |
| 558 | + }, |
| 559 | + "percentage": false, |
| 560 | + "pointradius": 2, |
| 561 | + "points": false, |
| 562 | + "renderer": "flot", |
| 563 | + "seriesOverrides": [], |
| 564 | + "spaceLength": 10, |
| 565 | + "stack": false, |
| 566 | + "steppedLine": false, |
| 567 | + "targets": [ |
| 568 | + { |
| 569 | + "expr": "DCGM_FI_DEV_MEM_CLOCK", |
| 570 | + "interval": "", |
| 571 | + "legendFormat": "GPU {{gpu}}", |
| 572 | + "refId": "A" |
| 573 | + } |
| 574 | + ], |
| 575 | + "thresholds": [], |
| 576 | + "timeFrom": null, |
| 577 | + "timeRegions": [], |
| 578 | + "timeShift": null, |
| 579 | + "interval": "3", |
| 580 | + "title": "GPU Memory Clocks", |
| 581 | + "tooltip": { |
| 582 | + "shared": true, |
| 583 | + "sort": 0, |
| 584 | + "value_type": "individual" |
| 585 | + }, |
| 586 | + "type": "graph", |
| 587 | + "xaxis": { |
| 588 | + "buckets": null, |
| 589 | + "mode": "time", |
| 590 | + "name": null, |
| 591 | + "show": true, |
| 592 | + "values": [] |
| 593 | + }, |
| 594 | + "yaxes": [ |
| 595 | + { |
| 596 | + "format": "hertz", |
| 597 | + "label": null, |
| 598 | + "logBase": 1, |
| 599 | + "max": "100", |
| 600 | + "min": "0", |
| 601 | + "show": true |
| 602 | + }, |
| 603 | + { |
| 604 | + "format": "short", |
| 605 | + "label": null, |
| 606 | + "logBase": 1, |
| 607 | + "max": null, |
| 608 | + "min": null, |
| 609 | + "show": true |
| 610 | + } |
| 611 | + ], |
| 612 | + "yaxis": { |
| 613 | + "align": false, |
| 614 | + "alignLevel": null |
| 615 | + } |
| 616 | + }, |
526 | 617 | { |
527 | 618 | "aliasColors": {}, |
528 | 619 | "bars": false, |
|
570 | 661 | "steppedLine": false, |
571 | 662 | "targets": [ |
572 | 663 | { |
573 | | - "expr": "dcgm_gpu_utilization", |
| 664 | + "expr": "DCGM_FI_DEV_GPU_UTIL", |
574 | 665 | "interval": "", |
575 | 666 | "legendFormat": "GPU {{gpu}}", |
576 | 667 | "refId": "A" |
|
618 | 709 | "alignLevel": null |
619 | 710 | } |
620 | 711 | }, |
| 712 | + { |
| 713 | + "aliasColors": {}, |
| 714 | + "bars": false, |
| 715 | + "dashLength": 10, |
| 716 | + "dashes": false, |
| 717 | + "datasource": "${DS_PROMETHEUS}", |
| 718 | + "fill": 1, |
| 719 | + "fillGradient": 0, |
| 720 | + "gridPos": { |
| 721 | + "h": 8, |
| 722 | + "w": 12, |
| 723 | + "x": 12, |
| 724 | + "y": 24 |
| 725 | + }, |
| 726 | + "hiddenSeries": false, |
| 727 | + "id": 8, |
| 728 | + "legend": { |
| 729 | + "alignAsTable": true, |
| 730 | + "avg": true, |
| 731 | + "current": true, |
| 732 | + "max": true, |
| 733 | + "min": false, |
| 734 | + "rightSide": true, |
| 735 | + "show": true, |
| 736 | + "total": false, |
| 737 | + "values": true |
| 738 | + }, |
| 739 | + "lines": true, |
| 740 | + "linewidth": 2, |
| 741 | + "nullPointMode": "null", |
| 742 | + "options": { |
| 743 | + "dataLinks": [] |
| 744 | + }, |
| 745 | + "percentage": false, |
| 746 | + "pointradius": 2, |
| 747 | + "points": false, |
| 748 | + "renderer": "flot", |
| 749 | + "seriesOverrides": [], |
| 750 | + "spaceLength": 10, |
| 751 | + "stack": false, |
| 752 | + "steppedLine": false, |
| 753 | + "targets": [ |
| 754 | + { |
| 755 | + "expr": "DCGM_FI_DEV_MEM_COPY_UTIL", |
| 756 | + "interval": "", |
| 757 | + "legendFormat": "GPU {{gpu}}", |
| 758 | + "refId": "A" |
| 759 | + } |
| 760 | + ], |
| 761 | + "thresholds": [], |
| 762 | + "timeFrom": null, |
| 763 | + "timeRegions": [], |
| 764 | + "timeShift": null, |
| 765 | + "interval": 3, |
| 766 | + "title": "GPU Mem Cpy Utilization", |
| 767 | + "tooltip": { |
| 768 | + "shared": true, |
| 769 | + "sort": 0, |
| 770 | + "value_type": "cumulative" |
| 771 | + }, |
| 772 | + "type": "graph", |
| 773 | + "xaxis": { |
| 774 | + "buckets": null, |
| 775 | + "mode": "time", |
| 776 | + "name": null, |
| 777 | + "show": true, |
| 778 | + "values": [] |
| 779 | + }, |
| 780 | + "yaxes": [ |
| 781 | + { |
| 782 | + "format": "percent", |
| 783 | + "label": null, |
| 784 | + "logBase": 1, |
| 785 | + "max": "100", |
| 786 | + "min": "0", |
| 787 | + "show": true |
| 788 | + }, |
| 789 | + { |
| 790 | + "format": "short", |
| 791 | + "label": null, |
| 792 | + "logBase": 1, |
| 793 | + "max": null, |
| 794 | + "min": null, |
| 795 | + "show": true |
| 796 | + } |
| 797 | + ], |
| 798 | + "yaxis": { |
| 799 | + "align": false, |
| 800 | + "alignLevel": null |
| 801 | + } |
| 802 | + }, |
621 | 803 | { |
622 | 804 | "aliasColors": {}, |
623 | 805 | "bars": false, |
|
664 | 846 | "steppedLine": false, |
665 | 847 | "targets": [ |
666 | 848 | { |
667 | | - "expr": "dcgm_fb_used", |
| 849 | + "expr": "DCGM_FI_DEV_FB_USED", |
668 | 850 | "interval": "", |
669 | 851 | "legendFormat": "GPU {{gpu}}", |
670 | 852 | "refId": "A" |
|
759 | 941 | "steppedLine": false, |
760 | 942 | "targets": [ |
761 | 943 | { |
762 | | - "expr": "dcgm_fb_free", |
| 944 | + "expr": "DCGM_FI_DEV_FB_FREE", |
763 | 945 | "interval": "", |
764 | 946 | "legendFormat": "GPU {{gpu}}", |
765 | 947 | "refId": "A" |
|
0 commit comments