Nano banana Tool • Open WebUI Community

""" title: Google Gemini Flash Image Generation (Streaming) requirements: google-genai """ import base64 from typing import Optional, Tuple # Open WebUI imports from fastapi import Request from pydantic import BaseModel, Field from open_webui.routers.images import upload_image from open_webui.models.users import Users # Google Gemini (banana-style) imports from google import genai from google.genai import types def load_b64_image_data(b64_str: str) -> Tuple[bytes, str]: """ Minimal helper compatible with Open WebUI: - 支援 data:URI 或「純 base64 字串」 - 回傳 (image_bytes, mime_type) """ mime_type = "image/png" encoded = b64_str # 例如：data:image/png;base64,xxxxxx if b64_str.startswith("data:") and "," in b64_str: header, encoded = b64_str.split(",", 1) try: # header 形式大概會是：data:image/png;base64 if header.startswith("data:"): header_rest = header.split("data:", 1)[1] if ";base64" in header_rest: mime_type = header_rest.split(";base64", 1)[0] except Exception: # 失敗就用預設的 image/png pass # base64 decode image_data = base64.b64decode(encoded) return image_data, mime_type class Tools: """Container class for Open WebUI tools.""" class Valves(BaseModel): """User-configurable settings for the tool.""" api_key: str = Field(default="", description="Your Google AI API key here") # 依官方 banana 示例，預設使用 2.5 flash image preview model_name: str = Field( default="gemini-2.5-flash-image-preview", description="The Google AI model name for image+text generation (streaming)", ) def __init__(self): """Initialize the Tool.""" self.valves = self.Valves() async def gemini_generate_image( self, prompt: str, __request__: Request, __user__: dict, __event_emitter__=None, ) -> str: """ Generates image(s) and/or text from Gemini using streaming API. Streams TEXT chunks to UI and uploads IMAGE parts to Open WebUI storage. Returns a short status message for the LLM. """ if not self.valves.api_key: return ( "Error: API key is missing. Please configure it in the tool settings." ) if not isinstance(prompt, str): return "Error: The prompt must be a string." # Start status if __event_emitter__: await __event_emitter__( { "type": "status", "data": { "description": "Generating (streaming) with Gemini…", "done": False, }, } ) try: client = genai.Client(api_key=self.valves.api_key) contents = [ types.Content( role="user", parts=[types.Part.from_text(text=prompt)], ) ] generate_content_config = types.GenerateContentConfig( response_modalities=["IMAGE", "TEXT"] ) # Track results image_count = 0 text_seen = False # Stream chunks for chunk in client.models.generate_content_stream( model=self.valves.model_name, contents=contents, config=generate_content_config, ): # Some chunks may be heartbeats/empty; guard checks if ( not chunk or chunk.candidates is None or not chunk.candidates or chunk.candidates[0].content is None or chunk.candidates[0].content.parts is None ): continue parts = chunk.candidates[0].content.parts # 1) TEXT: 官方示例裡印出 chunk.text；我們直接把可見文字推送到 UI if getattr(chunk, "text", None): text_seen = True if __event_emitter__ and chunk.text.strip(): await __event_emitter__( { "type": "message", "data": {"content": chunk.text}, } ) # 2) IMAGE: 掃描 parts 中的 inline_data，把位元組上傳到 Open WebUI for part in parts: inline = getattr(part, "inline_data", None) if inline and inline.data: mime_type: str = inline.mime_type or "image/png" # 直接用回傳 bytes；不再強制經過 Pillow 重編碼 b64 = base64.b64encode(inline.data).decode("utf-8") data_uri = f"data:{mime_type};base64,{b64}" # 交給我們自訂的 load_b64_image_data 解析與存檔 image_data, content_type = load_b64_image_data(data_uri) url = upload_image( __request__, metadata={ "instances": {"prompt": prompt}, "parameters": { "sampleCount": 1, "outputOptions": {"mimeType": mime_type}, }, }, image_data=image_data, content_type=content_type, user=Users.get_user_by_id(__user__["id"]), ) image_count += 1 # 回貼圖片訊息 if __event_emitter__: await __event_emitter__( { "type": "message", "data": {"content": f"![Generated Image]({url})"}, } ) # Done status if __event_emitter__: await __event_emitter__( { "type": "status", "data": { "description": f"Done. Images: {image_count} | Text: {'yes' if text_seen else 'no'}", "done": True, }, } ) if image_count > 0: return "Notify the user that the image has been successfully generated" elif text_seen: return "Notify the user that only text was generated" else: return "Notify the user that no output was generated" except Exception as err: if __event_emitter__: await __event_emitter__( { "type": "status", "data": { "description": f"An error occurred: {err}", "done": True, }, } ) return f"Tell the user: {err}"