Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 27 additions & 23 deletions invokeai/backend/training/textual_inversion_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from accelerate.utils import set_seed, ProjectConfiguration
from diffusers import (
AutoencoderKL,
DDPMScheduler,
Expand All @@ -35,7 +35,6 @@
from diffusers.utils import check_min_version
from diffusers.utils.import_utils import is_xformers_available
from huggingface_hub import HfFolder, Repository, whoami
from omegaconf import OmegaConf

# TODO: remove and import from diffusers.utils when the new version of diffusers is released
from packaging import version
Expand All @@ -47,6 +46,8 @@

# invokeai stuff
from invokeai.app.services.config import InvokeAIAppConfig,PagingArgumentParser
from invokeai.app.services.model_manager_service import ModelManagerService
from invokeai.backend.model_management.models import SubModelType

if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
PIL_INTERPOLATION = {
Expand Down Expand Up @@ -132,7 +133,7 @@ def parse_args():
model_group.add_argument(
"--model",
type=str,
default="stable-diffusion-1.5",
default="sd-1/main/stable-diffusion-v1-5",
help="Name of the diffusers model to train against, as defined in configs/models.yaml.",
)
model_group.add_argument(
Expand Down Expand Up @@ -565,7 +566,6 @@ def do_textual_inversion_training(
checkpointing_steps: int = 500,
resume_from_checkpoint: Path = None,
enable_xformers_memory_efficient_attention: bool = False,
root_dir: Path = None,
hub_model_id: str = None,
**kwargs,
):
Expand All @@ -584,13 +584,17 @@ def do_textual_inversion_training(

logging_dir = output_dir / logging_dir

accelerator_config = ProjectConfiguration()
accelerator_config.logging_dir = logging_dir
accelerator = Accelerator(
gradient_accumulation_steps=gradient_accumulation_steps,
mixed_precision=mixed_precision,
log_with=report_to,
logging_dir=logging_dir,
project_config=accelerator_config,
)

model_manager = ModelManagerService(config,logger)

# Make one log on every process with the configuration for debugging.
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
Expand Down Expand Up @@ -628,46 +632,46 @@ def do_textual_inversion_training(
elif output_dir is not None:
os.makedirs(output_dir, exist_ok=True)

models_conf = OmegaConf.load(config.model_conf_path)
model_conf = models_conf.get(model, None)
assert model_conf is not None, f"Unknown model: {model}"
known_models = model_manager.model_names()
model_name = model.split('/')[-1]
model_meta = next((mm for mm in known_models if mm[0].endswith(model_name)), None)
assert model_meta is not None, f"Unknown model: {model}"
model_info = model_manager.model_info(*model_meta)
assert (
model_conf.get("format", "diffusers") == "diffusers"
model_info['model_format'] == "diffusers"
), "This script only works with models of type 'diffusers'"
pretrained_model_name_or_path = model_conf.get("repo_id", None) or Path(
model_conf.get("path")
)
assert (
pretrained_model_name_or_path
), f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}"
pipeline_args = dict(cache_dir=config.cache_dir)
tokenizer_info = model_manager.get_model(*model_meta, submodel=SubModelType.Tokenizer)
noise_scheduler_info = model_manager.get_model(*model_meta, submodel=SubModelType.Scheduler)
text_encoder_info = model_manager.get_model(*model_meta, submodel=SubModelType.TextEncoder)
vae_info = model_manager.get_model(*model_meta, submodel=SubModelType.Vae)
unet_info = model_manager.get_model(*model_meta, submodel=SubModelType.UNet)

# Load tokenizer
pipeline_args = dict(local_files_only=True)
if tokenizer_name:
tokenizer = CLIPTokenizer.from_pretrained(tokenizer_name, **pipeline_args)
else:
tokenizer = CLIPTokenizer.from_pretrained(
pretrained_model_name_or_path, subfolder="tokenizer", **pipeline_args
tokenizer_info.location, subfolder='tokenizer', **pipeline_args
)

# Load scheduler and models
noise_scheduler = DDPMScheduler.from_pretrained(
pretrained_model_name_or_path, subfolder="scheduler", **pipeline_args
noise_scheduler_info.location, subfolder="scheduler", **pipeline_args
)
text_encoder = CLIPTextModel.from_pretrained(
pretrained_model_name_or_path,
text_encoder_info.location,
subfolder="text_encoder",
revision=revision,
**pipeline_args,
)
vae = AutoencoderKL.from_pretrained(
pretrained_model_name_or_path,
vae_info.location,
subfolder="vae",
revision=revision,
**pipeline_args,
)
unet = UNet2DConditionModel.from_pretrained(
pretrained_model_name_or_path,
unet_info.location,
subfolder="unet",
revision=revision,
**pipeline_args,
Expand Down Expand Up @@ -989,7 +993,7 @@ def do_textual_inversion_training(
save_full_model = not only_save_embeds
if save_full_model:
pipeline = StableDiffusionPipeline.from_pretrained(
pretrained_model_name_or_path,
unet_info.location,
text_encoder=accelerator.unwrap_model(text_encoder),
vae=vae,
unet=unet,
Expand Down