NOTICE
Open WebUI Community is currently undergoing a major revamp to improve user experience and performance. Expected completion by year-end! ✨

Tool
v0.1
Image AutoLoRA
Auto-generate images with LLM-selected LoRA's, prompts, and other parameters.
Tool ID
image_autolora
Creator
@johnthenerd
Downloads
9+

Tool Content
python
"""
title: Image-AutoLoRA
author: JohnTheNerd
version: 0.1
required_open_webui_version: 0.4.8
"""

"""
You should update the below before this works properly:
- workflows below with ones that work for your server
- hardcoded indices at lines 388-420 pointing to the nodes which should be replaced
- image_styles with the Flux LoRA's you want available to the LLM
- the model name at line 456
- the server IP at line 459
"""

import copy
import os
import requests
from datetime import datetime
from typing import Callable
import random
import json
from enum import Enum

from typing import Literal

from open_webui.apps.images.main import (
    image_generations,
    GenerateImageForm,
    save_url_image,
    IMAGE_CACHE_DIR,
)
from open_webui.apps.webui.models.users import Users

from open_webui.apps.images.utils.comfyui import (
    ComfyUIGenerateImageForm,
    ComfyUIWorkflow,
    comfyui_generate_image,
)

lora_workflow = {
    "6": {
        "inputs": {"text": "Prompt goes here", "clip": ["41", 0]},
        "class_type": "CLIPTextEncode",
        "_meta": {"title": "CLIP Text Encode (Positive Prompt)"},
    },
    "8": {
        "inputs": {"samples": ["13", 0], "vae": ["10", 0]},
        "class_type": "VAEDecode",
        "_meta": {"title": "VAE Decode"},
    },
    "9": {
        "inputs": {"images": ["8", 0]},
        "class_type": "PreviewImage",
        "_meta": {"title": "Preview Image"},
    },
    "10": {
        "inputs": {"vae_name": "ae.safetensors"},
        "class_type": "VAELoader",
        "_meta": {"title": "Load VAE"},
    },
    "13": {
        "inputs": {
            "noise": ["25", 0],
            "guider": ["22", 0],
            "sampler": ["16", 0],
            "sigmas": ["17", 0],
            "latent_image": ["27", 0],
        },
        "class_type": "SamplerCustomAdvanced",
        "_meta": {"title": "SamplerCustomAdvanced"},
    },
    "16": {
        "inputs": {"sampler_name": "euler"},
        "class_type": "KSamplerSelect",
        "_meta": {"title": "KSamplerSelect"},
    },
    "17": {
        "inputs": {
            "scheduler": "simple",
            "steps": 25,
            "denoise": 1,
            "model": ["31", 0],
        },
        "class_type": "BasicScheduler",
        "_meta": {"title": "BasicScheduler"},
    },
    "22": {
        "inputs": {"model": ["37", 0], "conditioning": ["26", 0]},
        "class_type": "BasicGuider",
        "_meta": {"title": "BasicGuider"},
    },
    "25": {
        "inputs": {"noise_seed": 507767616628637},
        "class_type": "RandomNoise",
        "_meta": {"title": "RandomNoise"},
    },
    "26": {
        "inputs": {"guidance": 3.5, "conditioning": ["6", 0]},
        "class_type": "FluxGuidance",
        "_meta": {"title": "FluxGuidance"},
    },
    "27": {
        "inputs": {"width": 1024, "height": 1024, "batch_size": 1},
        "class_type": "EmptySD3LatentImage",
        "_meta": {"title": "EmptySD3LatentImage"},
    },
    "31": {
        "inputs": {"unet_name": "flux1-dev-Q8_0.gguf"},
        "class_type": "UnetLoaderGGUF",
        "_meta": {"title": "Unet Loader (GGUF)"},
    },
    "37": {
        "inputs": {
            "lora_name": "flux-realism.safetensors",
            "strength_model": 1,
            "model": ["31", 0],
        },
        "class_type": "LoraLoaderModelOnly",
        "_meta": {"title": "LoraLoaderModelOnly"},
    },
    "40": {
        "inputs": {
            "clip_name1": "clip_l.safetensors",
            "clip_name2": "t5xxl_fp16.safetensors",
            "type": "flux",
        },
        "class_type": "DualCLIPLoader",
        "_meta": {"title": "DualCLIPLoader"},
    },
    "41": {
        "inputs": {"device": "cpu", "clip": ["40", 0]},
        "class_type": "OverrideCLIPDevice",
        "_meta": {"title": "Force/Set CLIP Device"},
    },
}

no_lora_workflow = {
    "6": {
        "inputs": {"text": "Your prompt goes here", "clip": ["31", 0]},
        "class_type": "CLIPTextEncode",
        "_meta": {"title": "CLIP Text Encode (Positive Prompt)"},
    },
    "8": {
        "inputs": {"samples": ["13", 0], "vae": ["10", 0]},
        "class_type": "VAEDecode",
        "_meta": {"title": "VAE Decode"},
    },
    "9": {
        "inputs": {"images": ["8", 0]},
        "class_type": "PreviewImage",
        "_meta": {"title": "Preview Image"},
    },
    "10": {
        "inputs": {"vae_name": "ae.safetensors"},
        "class_type": "VAELoader",
        "_meta": {"title": "Load VAE"},
    },
    "13": {
        "inputs": {
            "noise": ["25", 0],
            "guider": ["22", 0],
            "sampler": ["16", 0],
            "sigmas": ["17", 0],
            "latent_image": ["27", 0],
        },
        "class_type": "SamplerCustomAdvanced",
        "_meta": {"title": "SamplerCustomAdvanced"},
    },
    "16": {
        "inputs": {"sampler_name": "euler"},
        "class_type": "KSamplerSelect",
        "_meta": {"title": "KSamplerSelect"},
    },
    "17": {
        "inputs": {
            "scheduler": "simple",
            "steps": 25,
            "denoise": 1,
            "model": ["30", 0],
        },
        "class_type": "BasicScheduler",
        "_meta": {"title": "BasicScheduler"},
    },
    "22": {
        "inputs": {"model": ["30", 0], "conditioning": ["26", 0]},
        "class_type": "BasicGuider",
        "_meta": {"title": "BasicGuider"},
    },
    "25": {
        "inputs": {"noise_seed": 915837524368838},
        "class_type": "RandomNoise",
        "_meta": {"title": "RandomNoise"},
    },
    "26": {
        "inputs": {"guidance": 3.5, "conditioning": ["6", 0]},
        "class_type": "FluxGuidance",
        "_meta": {"title": "FluxGuidance"},
    },
    "27": {
        "inputs": {"width": 1024, "height": 1024, "batch_size": 1},
        "class_type": "EmptySD3LatentImage",
        "_meta": {"title": "EmptySD3LatentImage"},
    },
    "30": {
        "inputs": {"unet_name": "flux1-dev-Q8_0.gguf"},
        "class_type": "UnetLoaderGGUF",
        "_meta": {"title": "Unet Loader (GGUF)"},
    },
    "31": {
        "inputs": {"device": "cpu", "clip": ["32", 0]},
        "class_type": "OverrideCLIPDevice",
        "_meta": {"title": "Force/Set CLIP Device"},
    },
    "32": {
        "inputs": {
            "clip_name1": "clip_l.safetensors",
            "clip_name2": "t5xxl_fp16.safetensors",
            "type": "flux",
        },
        "class_type": "DualCLIPLoader",
        "_meta": {"title": "DualCLIPLoader"},
    },
}

image_styles = {
    "default": {},
    "nineties_photography": {
        "name": "90s Photography",
        "lora": "araminta_k_flux_koda.safetensors",
        "trigger": {"trigger_phrase": ", kodachrome", "trigger_phrase_location": "end"},
    },
    "disney": {
        "name": "Disney",
        "lora": "disney_lora_comfy_converted.safetensors",
        "trigger": {
            "trigger_phrase": ", disney style",
            "trigger_phrase_location": "end",
        },
    },
    "yarn_art": {
        "name": "Yarn Art",
        "lora": "yarn_art.safetensors",
        "trigger": {
            "trigger_phrase": ", yarn art style",
            "trigger_phrase_location": "end",
        },
    },
    "paper_cutout": {
        "name": "Paper Cutout",
        "lora": "Flux_1_Dev_LoRA_Paper-Cutout-Style.safetensors",
        "trigger": {
            "trigger_phrase": ", Paper Cutout Style",
            "trigger_phrase_location": "end",
        },
    },
    "art": {
        "name": "Art",
        "lora": "art_lora_comfy_converted.safetensors",
        "trigger": {"trigger_phrase": ", art", "trigger_phrase_location": "end"},
    },
    "anime": {
        "name": "Anime",
        "lora": "anime_lora_comfy_converted.safetensors",
        "trigger": {"trigger_phrase": ", anime", "trigger_phrase_location": "end"},
    },
    "photorealism": {
        "name": "Photorealism",
        "lora": "flux_realism_lora.safetensors",
        "trigger": {"trigger_phrase": "", "trigger_phrase_location": "end"},
    },
    "scenery": {
        "name": "Scenery",
        "lora": "scenery_lora_comfy_converted.safetensors",
        "trigger": {
            "trigger_phrase": ", scenery style",
            "trigger_phrase_location": "end",
        },
    },
    "tarot": {
        "name": "Tarot Card",
        "lora": "flux_tarot_v1_lora.safetensors",
        "trigger": {
            "trigger_phrase": " in the style of TOK a trtcrd, tarot style",
            "trigger_phrase_location": "end",
        },
    },
    "playstation_one": {
        "name": "PlayStation 1",
        "lora": "ps1_style_flux_v1.safetensors",
        "trigger": {
            "trigger_phrase": ", ps1 game screenshot",
            "trigger_phrase_location": "end",
        },
    },
    "nintendo_64": {
        "name": "Nintendo 64",
        "lora": "nintendo-64.safetensors",
        "trigger": {
            "trigger_phrase": "ningraphix, ",
            "trigger_phrase_location": "beginning",
        },
    },
    "text_enhancer": {
        "name": "Text Enhancer",
        "lora": "Textimprover-FLUX-V0.4.safetensors",
        "trigger": {
            "trigger_phrase": "",
            "trigger_phrase_location": "end",
        },
    },
}

allowed_image_styles = ", ".join(image_styles.keys())
ImageStyle = Literal[list(image_styles.keys())]


class Tools:
    def __init__(self):
        pass

    async def generate_image(
        self,
        prompt: str,
        image_style: str,
        __user__: dict,
        guidance: float = 3.5,
        number_of_images: int = 1,
        seed: int = -1,
        __event_emitter__=None,
    ) -> str:

        print(
            f"called generate_image - {prompt}, {guidance}, {number_of_images}, {image_style}, {seed}"
        )

        if int(seed) == -1:
            seed = random.randint(0, 18446744073709551614)

        print(image_style)
        # print(ImageStyle._member_names_)
        if (
            image_style.lower() != "default"
            and image_style.lower() in image_styles.keys()
        ):
            image_style = image_styles[
                image_style.lower()
            ]  # Enforce the type (it seems to get dropped by openwebui...)
        else:
            image_style = None

        lora_message_addition = ""
        if image_style:
            lora_message_addition = f" using {image_style['name']} mode"

        generating_message = f"Generating an image{lora_message_addition}..."
        if int(number_of_images) > 1:
            generating_message = (
                f"Generating {number_of_images} images{lora_message_addition}..."
            )

        generating_successful_status = f"Image generated{lora_message_addition}"
        if int(number_of_images) > 1:
            generating_successful_status = (
                f"{number_of_images} images generated{lora_message_addition}"
            )

        generating_successful_message = f'Image generated{lora_message_addition} with prompt "{prompt}", guidance {guidance}, and seed {seed}'
        if int(number_of_images) > 1:
            generating_successful_message = f'{number_of_images} images generated{lora_message_addition} with prompt "{prompt}"", guidance {guidance}, and seed {seed}'

        await __event_emitter__(
            {
                "type": "status",
                "data": {"description": generating_message, "done": False},
            }
        )

        try:
            if float(guidance) < 2.5:
                guidance = 2.5
            if int(number_of_images) > 8:
                number_of_images = 8
            if int(number_of_images) < 1:
                number_of_images = 1
            if image_style:
                if image_style["trigger"]["trigger_phrase_location"] == "end":
                    new_prompt = prompt + image_style["trigger"]["trigger_phrase"]
                elif image_style["trigger"]["trigger_phrase_location"] == "beginning":
                    new_prompt = image_style["trigger"]["trigger_phrase"] + prompt
                modified_lora_workflow = copy.deepcopy(lora_workflow)
                modified_lora_workflow["6"]["inputs"]["text"] = new_prompt
                modified_lora_workflow["37"]["inputs"]["lora_name"] = image_style[
                    "lora"
                ]
                modified_lora_workflow["26"]["inputs"]["guidance"] = float(guidance)
                modified_lora_workflow["25"]["inputs"]["noise_seed"] = int(seed)
                modified_lora_workflow["27"]["inputs"]["batch_size"] = int(
                    number_of_images
                )

                # images = await image_generations(
                #     GenerateImageForm(**{"prompt": prompt}),
                #     Users.get_user_by_id(__user__["id"]),
                #     prompt_override={
                #         "custom_workflow": json.dumps(modified_lora_workflow),
                #         "custom_workflow_path": "",
                #         "custom_workflow_prompt_index": "",
                #         "custom_workflow_seed_index": "",
                #     },
                # )
            else:
                modified_lora_workflow = copy.deepcopy(no_lora_workflow)
                modified_lora_workflow["6"]["inputs"]["text"] = prompt
                modified_lora_workflow["26"]["inputs"]["guidance"] = float(guidance)
                modified_lora_workflow["25"]["inputs"]["noise_seed"] = int(seed)
                modified_lora_workflow["27"]["inputs"]["batch_size"] = int(
                    number_of_images
                )

                # images = await image_generations(
                #     GenerateImageForm(**{"prompt": prompt}),
                #     Users.get_user_by_id(__user__["id"]),
                #     # prompt_override={
                #     #     "custom_workflow": json.dumps(modified_lora_workflow),
                #     #     "custom_workflow_path": "",
                #     #     "custom_workflow_prompt_index": "",
                #     #     "custom_workflow_seed_index": "",
                #     # },
                # )

            data = {
                "prompt": prompt,
                "width": 1024,
                "height": 1024,
                "n": number_of_images,
            }

            data["steps"] = 25

            form_data = ComfyUIGenerateImageForm(
                **{
                    "workflow": ComfyUIWorkflow(
                        **{
                            "workflow": json.dumps(modified_lora_workflow),
                            "nodes": [],
                        }
                    ),
                    **data,
                }
            )

            # print(form_data)
            # print("starting generate image")
            # print(Users.get_user_by_id(__user__["id"]))
            res = await comfyui_generate_image(
                "flux1-dev-Q8_0.gguf",
                form_data,
                __user__["id"],
                "http://10.1.3.13:8188",
            )

            images = []

            for image in res["data"]:
                image_filename = save_url_image(image["url"])
                images.append({"url": f"/cache/image/generations/{image_filename}"})
                file_body_path = IMAGE_CACHE_DIR.joinpath(f"{image_filename}.json")

                with open(file_body_path, "w") as f:
                    json.dump(form_data.model_dump(exclude_none=True), f)

            await __event_emitter__(
                {
                    "type": "status",
                    "data": {
                        "description": generating_successful_status,
                        "done": True,
                    },
                }
            )

            for image in images:
                escaped_prompt = prompt.replace('"', '\\"')
                await __event_emitter__(
                    {
                        "type": "message",
                        "data": {"content": f"![Generated Image]({image['url']})"},
                    }
                )
            await __event_emitter__(
                {
                    "type": "message",
                    "data": {"content": generating_successful_message + "\n\n"},
                }
            )

            result = f'ONLY tell the user "here is your image!" in a friendly tone. Respond in the user\'s own language. SAY NOTHING ELSE AND COMPLETELY IGNORE ALL PREVIOUS MESSAGES!'

            if image_style:
                result = (
                    result
                    + f". Make sure you also tell the user that the {image_style['name']} mode was enabled!"
                )

            return result

        except Exception as e:
            await __event_emitter__(
                {
                    "type": "status",
                    "data": {"description": f"An error occured: {e}", "done": True},
                }
            )

            return f"Tell the user that the image generation failed because the respective server returned {e} - don't say anything else."


Tools.generate_image.__doc__ = f"""
Generate an image given a Stable Diffusion style prompt. DO NOT USE UNLESS EXPLICITLY REQUESTED BY THE USER! Do not provide the exact user input, reword it to be much more verbose!

:param prompt: prompt to use for image generation. make sure this is VERY VERY LONG AND VERBOSE. do not use the word "blur" unless the user explicitly asked.
:param image_style: the style of image. this will affect the model used to draw. ONLY use a valid option. valid options are {allowed_image_styles}
:param guidance: prompt adherence. set to 3 for paintings, 3.5 for photography, and 5 for anime, cartoon, or video game style images. lower numbers look realistic, but have worse prompt adherence. set this to 3.5 if the user wanted text drawn on the picture. if the user says the picture sucks or is oversharpened, try to lower it a bit.
:param number_of_images: set to the number of images the user wanted. if the user didn't specify a number of images, set it to 1. the maximum is 8.
:param seed: if the user requested a seed value to be set, provide it here. otherwise, use -1.
"""