MultiTool Function • Open WebUI Community

""" Title: Mutlitool Description: A middleware filter that uses an LLM to route each incoming chat turn to one of three tools—web search, Flux-powered image generation, and built-in code interpreter. Forked and modifiyed from Auto-Tool v2 by mhwhgm. Version: 1 Author: Peter Note: The image generation function here takes a URL from a chat model that contains an image. This also works with gpt-image-1. """ from __future__ import annotations import json import re import sys from typing import Any, Awaitable, Callable, Dict, Optional, Tuple from uuid import uuid4 from pydantic import BaseModel, Field from open_webui.models.users import Users from open_webui.utils.chat import generate_chat_completion from open_webui.utils.middleware import chat_web_search_handler from open_webui.utils.misc import get_last_user_message # ─── Styling helpers ── _CYAN = "\x1b[96m" _MAGENTA = "\x1b[95m" _RESET = "\x1b[0m" _BOLD = "\x1b[1m" def _debug(msg: str) -> None: print( f"{_MAGENTA}{_BOLD}[AutoToolSelector]{_RESET}{_CYAN} {msg}{_RESET}", file=sys.stderr, ) # Regex _URL_RE = re.compile(r"https?://\S+") _JSON_RE = re.compile(r"\{.*?\}", re.S) def _parse_json_fuzzy(text: str) -> Dict[str, str]: raw = text.strip() if raw.startswith("```") and raw.endswith("```"): raw = "\n".join(raw.splitlines()[1:-1]) m = _JSON_RE.search(raw) if m: raw = m.group(0) try: return json.loads(raw) except Exception as e: _debug(f"⚠️ JSON parse error → {e}. Raw: {raw[:80]}…") return {} # ─── Prompt‑designer ── async def _generate_prompt_and_desc( request: Any, user: Any, model: str, convo_snippet: str, user_query: str ) -> Tuple[str, str]: sys_prompt = ( "You design image prompts. Reply ONLY with JSON:\n" '{"prompt": <≤50 words>, "description": <≤25 words>}' ) payload = { "model": model, "messages": [ {"role": "system", "content": sys_prompt}, { "role": "user", "content": f"Conversation so far:\n{convo_snippet}\n\nUser query: {user_query}", }, ], "stream": False, } try: res = await generate_chat_completion( request=request, form_data=payload, user=user ) obj = _parse_json_fuzzy(res["choices"][0]["message"]["content"]) prompt = obj.get("prompt") or user_query description = obj.get("description") or "Image generated." if user_query not in prompt: prompt = f"{user_query} – {prompt}" _debug(f"Router prompt → {prompt[:60]}… | desc: {description}") return prompt, description except Exception as exc: _debug(f"Prompt‑designer error → {exc}") return user_query, "Image generated." # ─── Flux image handler ── async def flux_image_generation_handler( request: Any, body: dict, ctx: dict, user: Any ) -> dict: prompt: str = ctx.get("prompt") or get_last_user_message(body["messages"]) description: str = ctx.get("description", "Image generated.") emitter = ctx.get("__event_emitter__") # 1️⃣ Emit loading placeholder (UI glow) placeholder_id = str(uuid4()) placeholder = {"id": placeholder_id, "role": "assistant", "content": ""} body["messages"].append(placeholder) if emitter: await emitter({"type": "chat:message", "data": placeholder}) await emitter( { "type": "status", "data": { "message_id": placeholder_id, "description": "Generating image…", "done": False, }, } ) # 2️⃣ Call Flux _debug(f"Calling Flux with prompt → {prompt[:80]}…") try: resp = await generate_chat_completion( request=request, form_data={ "model": "gpt-4o-image", "messages": [{"role": "user", "content": prompt}], "stream": False, }, user=user, ) flux_reply = resp["choices"][0]["message"]["content"].strip() except Exception as exc: _debug(f"Flux error → {exc}") fail = f"❌ Image generation failed: {exc}" if emitter: await emitter( { "type": "replace", "data": {"message_id": placeholder_id, "content": fail}, } ) await emitter( { "type": "status", "data": { "message_id": placeholder_id, "description": "Failed", "done": True, }, } ) body["messages"].pop() # drop placeholder return body url_match = _URL_RE.search(flux_reply) image_url = url_match.group(0) if url_match else flux_reply _debug(f"✅ Flux URL → {image_url}") # 3️⃣ Remove placeholder bubble (to avoid duplicate embed) if emitter: await emitter({"type": "delete", "data": {"message_id": placeholder_id}}) await emitter( { "type": "status", "data": { "message_id": placeholder_id, "description": "Done", "done": True, }, } ) body["messages"].pop() # remove from history # 4️⃣ Insert system metadata only – no automatic embed meta = ( "[IMAGE_GENERATED]\n" f"url: {image_url}\n" f"prompt: {prompt}\n" f"description: {description}\n" f'[IMAGE_INSTRUCTION] Embed the generated image using ![description](url) and add a one-sentence caption."' ) body["messages"].append({"role": "system", "content": meta}) return body # ─── Code Interpreter handler ── async def code_interpreter_handler( request: Any, body: dict, ctx: dict, user: Any ) -> dict: """Enable Open WebUI's built‑in code interpreter. We simply append a system prompt instructing the assistant to wrap runnable code inside a <code_interpreter> block. The frontend then shows the familiar grey code‑interpreter container with a Run button. """ # Insert system prompt so the next assistant turn behaves correctly sys_msg = { "role": "system", "content": ( 'You can execute Python code using Open WebUI\'s <code_interpreter> tag. When you decide code is required, answer ONLY with a single block in the form:<code_interpreter type="code" lang="python"> # your code here </code_interpreter>' ), } body["messages"].append(sys_msg) # Mark feature so the UI shows code‑run controls (matches OWUI internals) body.setdefault("features", {})["code_interpreter"] = True _debug("🔧 Code Interpreter enabled for this turn") return body # ─── Registry ── TOOL_REGISTRY: Dict[str, Callable[..., Awaitable[dict]]] = { "web_search": chat_web_search_handler, "image_generation": flux_image_generation_handler, "code_interpreter": code_interpreter_handler, } # ─── Filter ── ── class Filter: class Valves(BaseModel): decider_model: Optional[str] = Field( default=None, description="Model for router" ) class UserValves(BaseModel): auto_tools: bool = Field(default=True) def __init__(self): self.valves = self.Valves() self.user_valves = self.UserValves() async def inlet( self, body: dict, __event_emitter__: Callable[[Any], Awaitable[None]], __request__: Any, __user__: Optional[dict] = None, __model__: Optional[dict] = None, ) -> dict: if not self.user_valves.auto_tools: return body messages = body["messages"] user_message = get_last_user_message(messages) user = Users.get_user_by_id(__user__["id"]) convo_snippet = "\n".join( f"{m['role'].upper()}: {m['content']!r}" for m in messages[-4:] ) router_sys = "You are a routing assistant. From the user's final message decide which single tool best helps. Available tools: web_search, image_generation, code_interpreter. Return ONLY one of: web_search, image_generation, code_interpreter, none.) or reply none." router_payload = { "model": self.valves.decider_model or body["model"], "messages": [ {"role": "system", "content": router_sys}, { "role": "user", "content": f"History:\n{convo_snippet}\nUser query: {user_message}", }, ], "stream": False, } try: res = await generate_chat_completion( request=__request__, form_data=router_payload, user=user ) decision = res["choices"][0]["message"]["content"].strip().lower() _debug(f"Router decision → {decision}") except Exception as exc: _debug(f"Router error → {exc}") return body if decision == "image_generation": prompt, desc = await _generate_prompt_and_desc( __request__, user, router_payload["model"], convo_snippet, user_message ) return await TOOL_REGISTRY[decision]( __request__, body, { "__event_emitter__": __event_emitter__, "prompt": prompt, "description": desc, }, user, ) elif decision in TOOL_REGISTRY: return await TOOL_REGISTRY[decision]( __request__, body, {"__event_emitter__": __event_emitter__}, user ) return body