File tree Expand file tree Collapse file tree 6 files changed +16
-11
lines changed
experiments/transformers_backend Expand file tree Collapse file tree 6 files changed +16
-11
lines changed Original file line number Diff line number Diff line change @@ -131,12 +131,6 @@ class Model:
131131 """
132132
133133
134- @dataclass
135- class HFTransformers :
136- model : str = ""
137- """HuggingFace model ID (e.g., 'Qwen/Qwen3-4B-Instruct-2507')"""
138-
139-
140134@dataclass
141135class Optimizer :
142136 name : str = "AdamW"
@@ -903,7 +897,6 @@ class JobConfig:
903897 profiling : Profiling = field (default_factory = Profiling )
904898 metrics : Metrics = field (default_factory = Metrics )
905899 model : Model = field (default_factory = Model )
906- hf_transformers : HFTransformers = field (default_factory = HFTransformers )
907900 optimizer : Optimizer = field (default_factory = Optimizer )
908901 lr_scheduler : LRScheduler = field (default_factory = LRScheduler )
909902 training : Training = field (default_factory = Training )
Original file line number Diff line number Diff line change @@ -17,7 +17,8 @@ hf_assets_path = "./tests/assets/tokenizer"
1717+ model = "Qwen/Qwen3-4B-Instruct-2507"
1818...
1919```
20- - Train: ` LOG_RANK=7 CONFIG_FILE=<YOUR_PATHQ/torchtitan/experiments/transformers_backend/configs/qwen3_fsdp2_tp2_pp2.toml ./run_train.sh `
20+ - Train: ` LOG_RANK=7 CONFIG_FILE=<YOUR_PATHQ/torchtitan/experiments/transformers_backend/configs/qwen3_fsdp2_tp2_pp2.toml ./run_train.sh --job.custom_config_module=torchtitan.experiments.transformers_backend.job_config --compile.enable `
21+ - Make sure you have created the tokenizers beforehand
2122<img width =" 1334 " height =" 453 " alt =" image " src =" https://github.com/user-attachments/assets/da459448-027b-4af9-8176-6a3e433a272c " />
2223
2324## Supported Features
Original file line number Diff line number Diff line change 2424 RowwiseParallel ,
2525 SequenceParallel ,
2626)
27- from torchtitan .config import JobConfig , TORCH_DTYPE_MAP
27+ from torchtitan .experiments .transformers_backend .job_config import JobConfig
28+ from torchtitan .config import TORCH_DTYPE_MAP
2829from torchtitan .config .job_config import ActivationCheckpoint as ACConfig
2930from torchtitan .distributed import NoParallel , ParallelDims
3031
Original file line number Diff line number Diff line change 1919)
2020
2121from torchtitan .components .loss import LossFunction
22- from torchtitan .config import JobConfig
22+ from torchtitan .experiments . transformers_backend . job_config import JobConfig
2323from torchtitan .distributed import ParallelDims
2424from torchtitan .distributed .pipeline_parallel import build_pipeline_schedule
2525from torchtitan .protocols .train_spec import BaseModelArgs , ParallelizeFunction
Original file line number Diff line number Diff line change 1+ from dataclasses import dataclass , field
2+
3+ @dataclass
4+ class HFTransformers :
5+ model : str = ""
6+ """HuggingFace model ID (e.g., 'Qwen/Qwen3-4B-Instruct-2507')"""
7+
8+ @dataclass
9+ class JobConfig :
10+ hf_transformers : HFTransformers = field (default_factory = HFTransformers )
Original file line number Diff line number Diff line change 77from dataclasses import dataclass
88
99from torch import nn
10- from torchtitan .config import JobConfig
10+ from torchtitan .experiments . transformers_backend . job_config import JobConfig
1111from torchtitan .models .utils import get_dense_model_nparams_and_flops
1212from torchtitan .protocols import BaseModelArgs
1313from transformers import AutoConfig
You can’t perform that action at this time.
0 commit comments