ImageGen Function • Open WebUI Community

""" title: OpenAI Image Generator Pipe author: Professor Patterns description: Generate images using OpenAI's gpt-image-1 model and display them in chat version: 1.0.0 license: MIT """ import json import base64 import traceback import httpx from pydantic import BaseModel, Field from typing import Optional, Dict, Any, List, Callable, Awaitable, AsyncGenerator class Pipe: class Valves(BaseModel): OPENAI_API_KEY: str = Field( default="", description="Your OpenAI API key for authentication" ) OPENAI_API_BASE_URL: str = Field( default="https://api.openai.com/v1", description="Base URL for OpenAI API", ) MODEL: str = Field( default="gpt-image-1", description="The model to use for image generation (gpt-image-1, dall-e-3, dall-e-2)", ) SIZE: str = Field( default="1024x1024", description="Size of the generated image (1024x1024, 1536x1024, 1024x1536 for gpt-image-1)", ) QUALITY: str = Field( default="auto", description="Quality of the image (auto, high, medium, low for gpt-image-1; hd, standard for dall-e-3)", ) OUTPUT_FORMAT: str = Field( default="png", description="Format of the generated image (png, jpeg, webp for gpt-image-1)", ) BACKGROUND: str = Field( default="auto", description="Background transparency (transparent, opaque, auto for gpt-image-1)", ) MODERATION: str = Field( default="auto", description="Content moderation level for gpt-image-1 (auto, low)", ) OUTPUT_COMPRESSION: int = Field( default=100, description="Compression level (0-100%) for gpt-image-1 with webp/jpeg formats", ) def __init__(self): self.valves = self.Valves() self.id = "openai_image_generator" self.name = "OpenAI Image Generator" self.emitter = None def pipes(self) -> List[dict]: return [ { "id": "openai-image-generator", "name": "OpenAI Image Generator", } ] def _get_last_user_message(self, messages): """Extract the last user message from the conversation""" for message in reversed(messages): if message.get("role") == "user": content = message.get("content") if isinstance(content, str): # Limit content to 30000 characters to stay safely under the limit return content[:30000] elif isinstance(content, list): # Handle content that might be a list of parts for part in content: if part.get("type") == "text": # Limit content to 30000 characters to stay safely under the limit return part.get("text", "")[:30000] return "" async def _emit_status( self, description: str, done: bool = False ) -> Awaitable[None]: """Send status updates""" if self.emitter: return await self.emitter( { "type": "status", "data": { "description": description, "done": done, }, } ) return None async def pipe( self, body: dict, __event_emitter__: Optional[Callable[[Dict[str, Any]], Awaitable[None]]] = None, ) -> AsyncGenerator[str, None]: """Generate images using OpenAI API and display them in chat""" self.emitter = __event_emitter__ # Initialize the prompt variable with a default value image_prompt = "Generate an image" try: # Validate API key if not self.valves.OPENAI_API_KEY: yield json.dumps( {"error": "OpenAI API key is not configured"}, ensure_ascii=False ) return # Extract the prompt from the last user message last_user_message = self._get_last_user_message(body.get("messages", [])) if not last_user_message: yield json.dumps({"error": "No user message found"}, ensure_ascii=False) return # Safety check - if prompt looks like it might be too large or contain chat history if len(last_user_message) > 5000 or "```" in last_user_message: # It's likely we're getting too much content - try to extract a cleaner prompt image_prompt = last_user_message.split("\n")[0][ :1000 ] # Just take first line, limited to 1000 chars print( f"Original prompt was very long ({len(last_user_message)} chars). Using shortened prompt: {image_prompt}" ) else: image_prompt = last_user_message # Send status update await self._emit_status( "Sending request to OpenAI API for image generation..." ) # Log what we're about to do print(f"Preparing to generate image with prompt: '{image_prompt}'") # Prepare request parameters headers = { "Authorization": f"Bearer {self.valves.OPENAI_API_KEY}", "Content-Type": "application/json", } payload = { "prompt": image_prompt, "model": self.valves.MODEL, "n": 1, "size": self.valves.SIZE, } # Add model-specific parameters if self.valves.MODEL == "gpt-image-1": # These parameters are only for gpt-image-1 payload["quality"] = self.valves.QUALITY payload["output_format"] = self.valves.OUTPUT_FORMAT payload["background"] = self.valves.BACKGROUND payload["moderation"] = self.valves.MODERATION # Only add output_compression for webp or jpeg formats if self.valves.OUTPUT_FORMAT in ["webp", "jpeg"]: payload["output_compression"] = self.valves.OUTPUT_COMPRESSION # gpt-image-1 always returns base64 encoded images, no need to specify response_format else: # For dall-e models, specify response_format as b64_json payload["response_format"] = "b64_json" # For dall-e-3, add style parameter if needed (not adding by default) if self.valves.MODEL == "dall-e-3": payload["style"] = "vivid" # Default for dall-e-3 # Log the request that's being sent (removing the API key) debug_headers = headers.copy() if "Authorization" in debug_headers: debug_headers["Authorization"] = "Bearer [REDACTED]" print( f"Sending request to OpenAI API: {self.valves.OPENAI_API_BASE_URL}/images/generations" ) print(f"Headers: {debug_headers}") print(f"Payload: {payload}") # Send request to OpenAI API async with httpx.AsyncClient() as client: try: response = await client.post( f"{self.valves.OPENAI_API_BASE_URL}/images/generations", json=payload, headers=headers, timeout=60.0, # Add a longer timeout ) print(f"Response status: {response.status_code}") if response.status_code != 200: error_text = response.text print(f"Error response: {error_text}") yield json.dumps( {"error": f"Error generating image: {error_text}"}, ensure_ascii=False, ) return except httpx.TimeoutException: yield json.dumps( {"error": "Request to OpenAI API timed out. Please try again."}, ensure_ascii=False, ) return except httpx.RequestError as e: yield json.dumps( {"error": f"Request error: {str(e)}"}, ensure_ascii=False, ) return response_data = response.json() # Get the base64 encoded image data if ( self.valves.MODEL == "gpt-image-1" or payload.get("response_format") == "b64_json" ): b64_image = response_data["data"][0]["b64_json"] # Display initial success message yield f"🎨 Image generated successfully with prompt: '{image_prompt}'\n\n" # Display the image directly in chat using markdown image syntax with data URL mime_type = f"image/{self.valves.OUTPUT_FORMAT}" if self.valves.OUTPUT_FORMAT == "png": mime_type = "image/png" elif self.valves.OUTPUT_FORMAT == "jpeg": mime_type = "image/jpeg" elif self.valves.OUTPUT_FORMAT == "webp": mime_type = "image/webp" image_markdown = ( f"![Generated Image](data:{mime_type};base64,{b64_image})\n\n" ) yield image_markdown else: # For URL responses (shouldn't happen with our configuration) image_url = response_data["data"][0]["url"] yield f"🎨 Image generated successfully with prompt: '{image_prompt}'\n\n" yield f"![Generated Image]({image_url})\n\n" yield f"Note: This URL will expire in 60 minutes.\n\n" # Final status update await self._emit_status("Image generation complete", done=True) except Exception as e: error_message = f"An error occurred: {str(e)}" error_details = f"Exception type: {type(e).__name__}" stack_trace = traceback.format_exc() yield json.dumps( { "error": error_message, "details": error_details, "trace": stack_trace, }, ensure_ascii=False, ) print(f"Error in OpenAI Image pipe: {error_message} - {error_details}") print(f"Stack trace: {stack_trace}") await self._emit_status(f"Error: {str(e)}", done=True)