From 1351f0da71a359bd651871f3c92df6d1d04ac7b9 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 12 Apr 2023 12:36:33 +0200 Subject: [PATCH 1/2] Finish docs textual inversion --- docs/source/en/training/text_inversion.mdx | 45 ++++++++++++++++++++-- src/diffusers/loaders.py | 38 +++++++++++++++++- 2 files changed, 78 insertions(+), 5 deletions(-) diff --git a/docs/source/en/training/text_inversion.mdx b/docs/source/en/training/text_inversion.mdx index 68c613849301..5fc95c1c0e38 100644 --- a/docs/source/en/training/text_inversion.mdx +++ b/docs/source/en/training/text_inversion.mdx @@ -157,24 +157,61 @@ If you're interested in following along with your model training progress, you c ## Inference -Once you have trained a model, you can use it for inference with the [`StableDiffusionPipeline`]. Make sure you include the `placeholder_token` in your prompt, in this case, it is ``. +Once you have trained a model, you can use it for inference with the [`StableDiffusionPipeline`]. + +The textual inversion script will by default only save the textual inversion embedding vector(s) that have +been added to the text encoder embedding matrix and consequently been trained. + + +💡 The community has created a large library of different textual inversion embedding vectors, called [sd-concepts-library](https://huggingface.co/sd-concepts-library). +Instead of training textual inversion embeddings from scratch you can also see whether a fitting textual inversion embedding has already been added to the libary. + + + +To load the textual inversion embeddings you first need to load the base model that was used when training +your textual inversion embedding vectors. Here we assume that [`runwayml/stable-diffusion-v1-5`](runwayml/stable-diffusion-v1-5) +was used as a base model so we load it first: ```python from diffusers import StableDiffusionPipeline +import torch -model_id = "path-to-your-trained-model" +model_id = "runwayml/stable-diffusion-v1-5" pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") +``` -prompt = "A backpack" +Next, we need to load the textual inversion embedding vector which can be done via the [`TextualInversionLoaderMixin.load_textual_inversion`] +function. Here we'll load the embeddings of the "" example from before. +```python +pipe.load_textual_inversion("sd-concepts-library/cat-toy") +``` -image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0] +Now we can run the pipeline making sure that the placeholder_token `` is used in our prompt. +```python +prompt = "A backpack" + +image = pipe(prompt, num_inference_steps=50).images[0] image.save("cat-backpack.png") ``` + +The function [`TextualInversionLoaderMixin.load_textual_inversion`] can not only +load textual embedding vectors saved in Diffusers' format, but also embedding vectors +saved in [Automatic1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) format. +To do so, you can first download the an embedding vector from [civitAI](https://civitai.com/models/3036?modelVersionId=8387) +and then load it locally: +```python +pipe.load_textual_inversion("./charturner.py") +``` +Currently there is no `load_textual_inversion` function for Flax so one has to make sure the textual inversion +embedding vector is saved as part of the model after training. + +The model can then be run just like any other Flax model: + ```python import jax import numpy as np diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py index 31939ca4b481..8d07e10c78a0 100644 --- a/src/diffusers/loaders.py +++ b/src/diffusers/loaders.py @@ -368,7 +368,7 @@ def load_textual_inversion( ): r""" Load textual inversion embeddings into the text encoder of stable diffusion pipelines. Both `diffusers` and - `Automatic1111` formats are supported. + `Automatic1111` formats are supported (see example below). @@ -427,6 +427,42 @@ def load_textual_inversion( models](https://huggingface.co/docs/hub/models-gated#gated-models). + + Example: + + To load a textual inversion embedding vector in `diffusers` format: + ```py + from diffusers import StableDiffusionPipeline + import torch + + model_id = "runwayml/stable-diffusion-v1-5" + pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") + + pipe.load_textual_inversion("sd-concepts-library/cat-toy") + + prompt = "A backpack" + + image = pipe(prompt, num_inference_steps=50).images[0] + image.save("cat-backpack.png") + ``` + + To load a textual inversion embedding vector in Automatic1111 format, make sure to first download the vector, + e.g. from [civitAI](https://civitai.com/models/3036?modelVersionId=9857) and then load the vector loaclly: + + ```py + from diffusers import StableDiffusionPipeline + import torch + + model_id = "runwayml/stable-diffusion-v1-5" + pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") + + pipe.load_textual_inversion("./charturner.pt") + + prompt = "A backpack" + + image = pipe(prompt, num_inference_steps=50).images[0] + image.save("cat-backpack.png") + ``` """ if not hasattr(self, "tokenizer") or not isinstance(self.tokenizer, PreTrainedTokenizer): raise ValueError( From 99d9d8a1dd539e797fd8cedf0ca7c15fddf8304b Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 12 Apr 2023 13:11:18 +0100 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Sayak Paul Co-authored-by: Pedro Cuenca --- docs/source/en/training/text_inversion.mdx | 6 +++--- src/diffusers/loaders.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/en/training/text_inversion.mdx b/docs/source/en/training/text_inversion.mdx index 5fc95c1c0e38..6e6971d7f119 100644 --- a/docs/source/en/training/text_inversion.mdx +++ b/docs/source/en/training/text_inversion.mdx @@ -188,7 +188,7 @@ function. Here we'll load the embeddings of the "" example from before. pipe.load_textual_inversion("sd-concepts-library/cat-toy") ``` -Now we can run the pipeline making sure that the placeholder_token `` is used in our prompt. +Now we can run the pipeline making sure that the placeholder token `` is used in our prompt. ```python prompt = "A backpack" @@ -200,10 +200,10 @@ image.save("cat-backpack.png") The function [`TextualInversionLoaderMixin.load_textual_inversion`] can not only load textual embedding vectors saved in Diffusers' format, but also embedding vectors saved in [Automatic1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) format. -To do so, you can first download the an embedding vector from [civitAI](https://civitai.com/models/3036?modelVersionId=8387) +To do so, you can first download an embedding vector from [civitAI](https://civitai.com/models/3036?modelVersionId=8387) and then load it locally: ```python -pipe.load_textual_inversion("./charturner.py") +pipe.load_textual_inversion("./charturnerv2.pt") ``` diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py index 8d07e10c78a0..e814981a85c9 100644 --- a/src/diffusers/loaders.py +++ b/src/diffusers/loaders.py @@ -447,7 +447,7 @@ def load_textual_inversion( ``` To load a textual inversion embedding vector in Automatic1111 format, make sure to first download the vector, - e.g. from [civitAI](https://civitai.com/models/3036?modelVersionId=9857) and then load the vector loaclly: + e.g. from [civitAI](https://civitai.com/models/3036?modelVersionId=9857) and then load the vector locally: ```py from diffusers import StableDiffusionPipeline @@ -456,12 +456,12 @@ def load_textual_inversion( model_id = "runwayml/stable-diffusion-v1-5" pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") - pipe.load_textual_inversion("./charturner.pt") + pipe.load_textual_inversion("./charturnerv2.pt") - prompt = "A backpack" + prompt = "charturnerv2, multiple views of the same character in the same outfit, a character turnaround of a woman wearing a black jacket and red shirt, best quality, intricate details." image = pipe(prompt, num_inference_steps=50).images[0] - image.save("cat-backpack.png") + image.save("character.png") ``` """ if not hasattr(self, "tokenizer") or not isinstance(self.tokenizer, PreTrainedTokenizer):