Update CUDA graph on scale change plus clear nodes/params (ggml-org#9550)

agray3 · Nexesenex · commit 72ef3a7e0af9 · 2024-10-24T01:37:19.000+02:00
* Avoid using saved CUDA graph if scale changes and reset nodes/params on update Fixes ggml-org#9451 * clear before resize
diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
@@ -2480,6 +2480,7 @@ static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_p
     for (int i = 0; i < GGML_MAX_SRC; i++) {
         graph_node_properties->src_address[i] = node->src[i] ? node->src[i]->data : nullptr;
     }
+    memcpy(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS);
 }
 
 static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
@@ -2511,6 +2512,12 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
             return false;
         }
     }
+
+    if (node->op == GGML_OP_SCALE &&
+        memcmp(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS) != 0) {
+        return false;
+    }
+
     return true;
 }
 
@@ -2721,7 +2728,9 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
             // First call with null argument gets number of nodes in graph
             CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, nullptr, &cuda_ctx->cuda_graph->num_nodes));
             // Subsequent call with non-null argument gets nodes
+            cuda_ctx->cuda_graph->nodes.clear();
             cuda_ctx->cuda_graph->nodes.resize(cuda_ctx->cuda_graph->num_nodes);
+            cuda_ctx->cuda_graph->params.clear();
             cuda_ctx->cuda_graph->params.resize(cuda_ctx->cuda_graph->num_nodes);
             if (cuda_ctx->cuda_graph->num_nodes > 0) {
                 CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, cuda_ctx->cuda_graph->nodes.data(), &cuda_ctx->cuda_graph->num_nodes));
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
@@ -666,6 +666,7 @@ struct ggml_graph_node_properties {
     int64_t ne[GGML_MAX_DIMS];
     size_t nb[GGML_MAX_DIMS];
     void * src_address[GGML_MAX_SRC];
+    int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
 };
 
 struct ggml_cuda_graph {

Original file line number	Diff line number	Diff line change
`@@ -2480,6 +2480,7 @@ static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_p`
`2480`	`2480`	`for (int i = 0; i < GGML_MAX_SRC; i++) {`
`2481`	`2481`	`graph_node_properties->src_address[i] = node->src[i] ? node->src[i]->data : nullptr;`
`2482`	`2482`	`}`
	`2483`	`+ memcpy(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS);`
`2483`	`2484`	`}`
`2484`	`2485`
`2485`	`2486`	`static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {`
`@@ -2511,6 +2512,12 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra`
`2511`	`2512`	`return false;`
`2512`	`2513`	`}`
`2513`	`2514`	`}`
	`2515`	`+`
	`2516`	`+ if (node->op == GGML_OP_SCALE &&`
	`2517`	`+ memcmp(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS) != 0) {`
	`2518`	`+ return false;`
	`2519`	`+ }`
	`2520`	`+`
`2514`	`2521`	`return true;`
`2515`	`2522`	`}`
`2516`	`2523`
`@@ -2721,7 +2728,9 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t`
`2721`	`2728`	`// First call with null argument gets number of nodes in graph`
`2722`	`2729`	`CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, nullptr, &cuda_ctx->cuda_graph->num_nodes));`
`2723`	`2730`	`// Subsequent call with non-null argument gets nodes`
	`2731`	`+ cuda_ctx->cuda_graph->nodes.clear();`
`2724`	`2732`	`cuda_ctx->cuda_graph->nodes.resize(cuda_ctx->cuda_graph->num_nodes);`
	`2733`	`+ cuda_ctx->cuda_graph->params.clear();`
`2725`	`2734`	`cuda_ctx->cuda_graph->params.resize(cuda_ctx->cuda_graph->num_nodes);`
`2726`	`2735`	`if (cuda_ctx->cuda_graph->num_nodes > 0) {`
`2727`	`2736`	`CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, cuda_ctx->cuda_graph->nodes.data(), &cuda_ctx->cuda_graph->num_nodes));`