We're Hiring!
Whitepaper
Docs
Sign In
Tool
Deep Research
Last Updated
4 months ago
Created
4 months ago
Tool ID
deep_research
Creator
@hadad
Downloads
425+
Get
Sponsored by Open WebUI Inc.
We are hiring!
Shape the way humanity engages with
intelligence
.
Description
Provides the latest information from the internet with detailed and in-depth insights.
README
Tool Code
Show
""" Original Script https://openwebui.com/t/cooksleep/infinite_search Title : Infinite Search Author : Cook Sleep Author URL : https://github.com/cooksleep Description : Fetches and summarizes content using the Reader API from URLs or web searches. Version : 0.3 License : MIT Required : OpenWebUI v0.3.15 ─────────────────────────────────────────────────────────────── New Script Title : Deep Research Author : Hadad Darajat Author URL : https://www.linkedin.com/in/hadadrjt Description : Provides the latest information from the internet with detailed and in-depth insights. Version : 1.0 License : MIT Required : OpenWebUI (latest) """ import asyncio # Imports asyncio to allow asynchronous programming and concurrent task execution import aiohttp # Imports aiohttp to perform asynchronous HTTP requests efficiently from pydantic import BaseModel, Field # Imports Pydantic tools for defining and validating data models from typing import Callable, Any, Optional # Provides type hinting for function signatures and variables import inspect # Used to check if a function is synchronous or asynchronous class EventEmitter: # A helper class that sends event updates to a provided function def __init__(self, event_emitter: Optional[Callable[[dict], Any]] = None): # Initializes with an optional event handler self.event_emitter = event_emitter # Stores the provided callback that will receive emitted events async def emit(self, description="Unknown State", status="in_progress", done=False): # Emits an event with a message and status if not self.event_emitter: # If no event emitter is provided, skip sending events return payload = { # Creates a dictionary containing event details "type": "status", # Identifies this payload as a status update "data": {"status": status, "description": description, "done": done}, # Includes the message, current state, and completion flag } if inspect.iscoroutinefunction(self.event_emitter): # Checks if the event handler is asynchronous await self.event_emitter(payload) # Calls the handler directly if asynchronous else: # If the handler is synchronous loop = asyncio.get_event_loop() # Retrieves the running event loop await loop.run_in_executor(None, lambda: self.event_emitter(payload)) # Executes handler in a separate thread to avoid blocking def get_send_citation(__event_emitter__: Optional[Callable[[dict], Any]]): # Creates a helper function to send citation events async def send_citation(url: str, title: str, content: str): # Defines an async function that sends citation data if __event_emitter__ is None: # If no event emitter exists, exit early return payload = { # Builds a payload describing the citation "type": "citation", # Identifies this as a citation event "data": { "document": [content], # Stores the retrieved content "metadata": [{"source": url, "html": False}], # Includes metadata such as the source URL "source": {"name": title}, # Provides a readable name for the source }, } if inspect.iscoroutinefunction(__event_emitter__): # If the event handler is asynchronous await __event_emitter__(payload) # Call the handler directly else: # Otherwise, run it in a thread pool to prevent blocking loop = asyncio.get_event_loop() await loop.run_in_executor(None, lambda: __event_emitter__(payload)) return send_citation # Returns the custom citation function to be used elsewhere def get_send_status(__event_emitter__: Optional[Callable[[dict], Any]]): # Creates a helper function to send status updates async def send_status(status_message: str, done: bool): # Defines async function that reports status if __event_emitter__ is None: # If no event emitter exists, do nothing return payload = { # Constructs the status update payload "type": "status", # Marks this as a status message "data": {"description": status_message, "done": done}, # Provides the message and completion state } if inspect.iscoroutinefunction(__event_emitter__): # If the handler is asynchronous await __event_emitter__(payload) # Call it directly else: # Otherwise, run it in a separate thread to avoid blocking loop = asyncio.get_event_loop() await loop.run_in_executor(None, lambda: __event_emitter__(payload)) return send_status # Returns the custom status function class Tools: # Encapsulates all functionality for web searching and reading pages def __init__(self): # Initializes default values self.valves = self.Valves() # Loads configuration settings such as URLs and timeouts self.reader_api = "https://r.jina.ai/" # Defines an external service that extracts readable content from web pages self.session: Optional[aiohttp.ClientSession] = None # Holds the aiohttp session for HTTP requests class Valves(BaseModel): # Defines a configuration container for important settings using Pydantic SEARXNG_URL: str = Field( # Defines the base URL for the SearXNG search engine default="https://searx.stream/search", # Default SearXNG endpoint description="SearXNG Endpoint URL.", # Human-readable explanation for developers ) TIMEOUT: int = Field( # Defines the maximum time allowed for an HTTP request default=120, # Default request timeout in seconds description="Request timeout in seconds" # Clear description of this setting ) async def setup_session(self): # Ensures an HTTP session is created and properly configured if self.session is None or self.session.closed: # If no session exists or if it is closed, create a new one connector = aiohttp.TCPConnector(limit=100, limit_per_host=10, ttl_dns_cache=300) # Configures limits for concurrent connections timeout = aiohttp.ClientTimeout(total=self.valves.TIMEOUT, connect=30, sock_read=60) # Sets detailed timeout policies self.session = aiohttp.ClientSession( # Creates the session with defined headers and connection settings timeout=timeout, connector=connector, headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"}, # Sets a custom user agent string trust_env=True, # Allows using environment-defined proxy settings if available cookie_jar=aiohttp.DummyCookieJar() # Disables persistent cookies for privacy and security ) async def close(self): # Safely closes the session when it is no longer needed if self.session and not self.session.closed: # Only close if the session exists and is still open await self.session.close() # Closes the aiohttp session self.session = None # Resets the session reference async def read_url( # Reads content from a given URL and sends updates and citations self, url: str, __event_emitter__: Optional[Callable[[dict], Any]] = None ) -> str: send_status = get_send_status(__event_emitter__) # Prepares the status update function send_citation = get_send_citation(__event_emitter__) # Prepares the citation function await send_status(f"Reading content from {url}", False) # Emits a message that reading has started await self.setup_session() # Ensures the HTTP session is ready try: data = {"url": url} # Creates the request payload for the reader API async with self.session.post(self.reader_api, data=data, ssl=False) as response: # Sends a POST request to the reader API response.raise_for_status() # Raises an error if the response indicates a failure content = await response.text() # Retrieves the response body as text await send_citation(url, "Web Content", content) # Emits a citation event with the retrieved content await send_status(f"Content retrieved from {url}", True) # Emits a final status update confirming success result_presentation = """ <system> Thoroughly analyze the provided search results and deliver a research-level synthesis that demonstrates depth, rigor, and precision. 1. Extract the most relevant and insightful information that directly addresses the query. Focus on accuracy, depth of coverage, and conceptual clarity. 2. Organize findings into a well-structured format with clear headings and subheadings. Use bullet points where needed, but ensure the overall output reads like a professional research summary rather than a simple list. 3. Critically evaluate each source for credibility, reliability, and potential bias. Identify which sources are authoritative, widely cited, or most relevant to the research context. 4. Compare and contrast perspectives across sources. Highlight areas of consensus, disagreement, or uncertainty. Note any gaps in the existing information and suggest directions for further exploration. 5. Provide direct references for every cited point using **Markdown links** in the format `[Source Name](URL)`. Do not display raw URLs. Ensure all data, claims, or quotations can be traced back to their sources. 6. Explicitly acknowledge limitations in the available information, such as outdated data, lack of peer-reviewed evidence, or missing context. Offer reasoned strategies for overcoming these gaps where possible. 7. Always respond in the same language as the user query. Maintain consistency of tone, clarity, and style across all languages. 8. Write with a professional, analytical, and objective tone. Avoid speculation unless clearly flagged as such. Support reasoning with evidence wherever possible. Your ultimate goal is to produce a deeply analytical, structured, and evidence-backed synthesis that can serve as a foundation for academic, strategic, or high-level decision-making. </system> """ return content + result_presentation # Returns the raw content along with processing instructions except Exception as e: # If an error occurs await send_status(f"Error reading URL: {str(e)}", True) # Emits an error status return f"Error reading URL: {str(e)}" # Returns the error message async def search( # Performs a search query using SearXNG and processes the results self, query: str, __event_emitter__: Optional[Callable[[dict], Any]] = None ) -> str: emitter = EventEmitter(__event_emitter__) # Creates an event emitter for sending progress updates await emitter.emit( f"{query}", status="in_progress", done=False ) # Emits that a search has started url = f"{self.reader_api}{self.valves.SEARXNG_URL}?q=!go {query}" # Builds the query URL to be sent through the reader API headers = {"X-Target-Selector": "#urls"} # Adds a custom header that instructs which part of the response should be selected await self.setup_session() # Ensures the HTTP session is ready try: async with self.session.get(url, headers=headers, ssl=False) as response: # Sends a GET request to the search API response.raise_for_status() # Raises an error if the response fails content = await response.text() # Reads the response text await emitter.emit(f"Search completed", status="complete", done=True) # Emits a final success event search_result_processing = """ <system> Thoroughly analyze the provided search results and deliver a research-level synthesis that demonstrates depth, rigor, and precision. 1. Extract the most relevant and insightful information that directly addresses the query. Focus on accuracy, depth of coverage, and conceptual clarity. 2. Organize findings into a well-structured format with clear headings and subheadings. Use bullet points where needed, but ensure the overall output reads like a professional research summary rather than a simple list. 3. Critically evaluate each source for credibility, reliability, and potential bias. Identify which sources are authoritative, widely cited, or most relevant to the research context. 4. Compare and contrast perspectives across sources. Highlight areas of consensus, disagreement, or uncertainty. Note any gaps in the existing information and suggest directions for further exploration. 5. Provide direct references for every cited point using **Markdown links** in the format `[Source Name](URL)`. Do not display raw URLs. Ensure all data, claims, or quotations can be traced back to their sources. 6. Explicitly acknowledge limitations in the available information, such as outdated data, lack of peer-reviewed evidence, or missing context. Offer reasoned strategies for overcoming these gaps where possible. 7. Always respond in the same language as the user query. Maintain consistency of tone, clarity, and style across all languages. 8. Write with a professional, analytical, and objective tone. Avoid speculation unless clearly flagged as such. Support reasoning with evidence wherever possible. Your ultimate goal is to produce a deeply analytical, structured, and evidence-backed synthesis that can serve as a foundation for academic, strategic, or high-level decision-making. </system> """ return content + search_result_processing # Returns the raw search result along with instructions except Exception as e: # If an error occurs during the search await emitter.emit( f"Error during search: {str(e)}", status="error", done=True ) # Emits an error event return f"Error during search: {str(e)}" # Returns the error message async def google( # A wrapper function that runs the same logic as search self, query: str, __event_emitter__: Optional[Callable[[dict], Any]] = None ) -> str: return await self.search(query, __event_emitter__) # Calls the search method directly async def read_url( # A standalone function that allows reading a URL with optional settings url: str, searxng_url: Optional[str] = None, # Optional override for the SearXNG endpoint timeout: Optional[int] = None, # Optional override for timeout setting event_emitter: Optional[Callable[[dict], Any]] = None, # Optional event emitter for progress updates ) -> dict: tools = Tools() # Creates a Tools instance if searxng_url: # If a custom search URL is provided tools.valves.SEARXNG_URL = searxng_url # Override default search engine URL if timeout: # If a custom timeout is provided tools.valves.TIMEOUT = timeout # Override default timeout try: content = await tools.read_url(url, event_emitter) # Calls the read_url method return {"ok": True, "content": content} # Returns success and content finally: await tools.close() # Ensures the session is closed after completion async def search( # A standalone function that runs a search with optional settings query: str, searxng_url: Optional[str] = None, # Optional override for SearXNG URL timeout: Optional[int] = None, # Optional override for timeout event_emitter: Optional[Callable[[dict], Any]] = None, # Optional event emitter for updates ) -> dict: tools = Tools() # Creates a Tools instance if searxng_url: # If provided, update the search engine URL tools.valves.SEARXNG_URL = searxng_url if timeout: # If provided, update the timeout tools.valves.TIMEOUT = timeout try: content = await tools.search(query, event_emitter) # Executes the search return {"ok": True, "content": content} # Returns the result finally: await tools.close() # Ensures the HTTP session is closed async def google( # A convenience wrapper function for running Google searches query: str, searxng_url: Optional[str] = None, # Optional override for SearXNG timeout: Optional[int] = None, # Optional override for timeout event_emitter: Optional[Callable[[dict], Any]] = None, # Optional event emitter for updates ) -> dict: return await search( # Calls the general search function query, searxng_url=searxng_url, timeout=timeout, event_emitter=event_emitter ) __all__ = ["Valves", "EventEmitter", "Tools", "read_url", "search", "google"] # Exports specific components for external use