We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
freqs_cis
1 parent d548245 commit 25494f9Copy full SHA for 25494f9
train.py
@@ -191,6 +191,10 @@
191
192
# wrap model into DDP container
193
if ddp:
194
+ # Ignore the `freqs_cis` buffer so that DDP does not broadcast it at
195
+ # construction time since NCCL does not support `ComplexFloat`
196
+ prefix = "_orig_mod." if compile else ""
197
+ model._ddp_params_and_buffers_to_ignore = {prefix + "freqs_cis"}
198
model = DDP(model, device_ids=[ddp_local_rank])
199
200
# helps estimate an arbitrarily accurate loss over either split using many batches
0 commit comments