diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 45988c2e9b0d..c493a764f56d 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -110,10 +110,14 @@ def compile(self, compiled_graph = self.load(graph, example_inputs, graph_index, runtime_shape) if compiled_graph is not None: - if graph_index == 0: - # adds some info logging for the first graph - logger.info("Directly load the compiled graph for shape %s " - "from the cache", str(runtime_shape)) # noqa + if graph_index == num_graphs - 1: + # after loading the last graph for this shape, record the time. + # there can be multiple graphs due to piecewise compilation. + now = time.time() + elapsed = now - compilation_start_time + logger.info( + "Directly load the compiled graph(s) for shape %s " + "from the cache, took %.3f s", str(runtime_shape), elapsed) return compiled_graph # no compiler cached the graph, or the cache is disabled,