We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d5eb8f4 commit 0e7f6e0Copy full SHA for 0e7f6e0
vllm/v1/worker/gpu_model_runner.py
@@ -237,6 +237,13 @@ def __init__(
237
dtype=torch.int64,
238
device="cpu",
239
pin_memory=self.pin_memory)
240
+
241
+ # NOTE: `mrope_positions_np` shares same
242
+ # underlying data with `mrope_positions_cpu`.
243
+ #
244
+ # `mrope_positions_np` is created in favor of
245
+ # numba accelerated func `mrope_assign_next_input_positions`
246
+ # while it can operate numpy array only.
247
self.mrope_positions_np = self.mrope_positions_cpu.numpy()
248
249
# Only relevant for models using ALiBi (e.g, MPT)
0 commit comments