Infinite Search Tool | Open WebUI Community

NOTICE
Open WebUI Community is currently undergoing a major revamp to improve user experience and performance ✨
Tool
v0.3
Infinite Search
Fetches and summarizes content using the Reader API from URLs or web searches. Requires the `OpenAI ReAct agent` or the `OpenAI ReAct agent_Added Whitelist Version` function to use streaming function calls.
Tool ID
infinite_search
Creator
@cooksleep
Downloads
1K+
Tool Content
python
"""
title: Infinite Search
author: Cook Sleep
author_urls: https://github.com/cooksleep
description: Fetches and summarizes content using the Reader API from URLs or web searches.
required_open_webui_version: 0.3.15
version: 0.3
licence: MIT
"""

import requests
from pydantic import BaseModel, Field
from typing import Callable, Any, Optional


class Valves(BaseModel):
    SEARXNG_URL: str = Field(
        default="https://paulgo.io/search",
        description="SearXNG search URL. You can find available SearXNG instances at https://searx.space/. The URL should end with '/search'.",
    )
    BAIDU_URL: str = Field(
        default="https://www.baidu.com/s", description="Baidu search URL"
    )
    TIMEOUT: int = Field(default=30, description="Request timeout in seconds")


class EventEmitter:
    def __init__(self, event_emitter: Callable[[dict], Any] = None):
        self.event_emitter = event_emitter

    async def emit(self, description="Unknown State", status="in_progress", done=False):
        if self.event_emitter:
            await self.event_emitter(
                {
                    "type": "status",
                    "data": {
                        "status": status,
                        "description": description,
                        "done": done,
                    },
                }
            )


def get_send_citation(
    __event_emitter__: Optional[Callable[[dict], Any]]
) -> Callable[[str, str, str], None]:
    async def send_citation(url: str, title: str, content: str):
        if __event_emitter__ is None:
            return
        await __event_emitter__(
            {
                "type": "citation",
                "data": {
                    "document": [content],
                    "metadata": [{"source": url, "html": False}],
                    "source": {"name": title},
                },
            }
        )

    return send_citation


def get_send_status(
    __event_emitter__: Optional[Callable[[dict], Any]]
) -> Callable[[str, bool], None]:
    async def send_status(status_message: str, done: bool):
        if __event_emitter__ is None:
            return
        await __event_emitter__(
            {
                "type": "status",
                "data": {"description": status_message, "done": done},
            }
        )

    return send_status


class Tools:
    def __init__(self):
        self.valves = Valves()
        self.reader_api = "https://r.jina.ai/"

    async def read_url(
        self, url: str, __event_emitter__: Optional[Callable[[dict], Any]] = None
    ) -> str:
        """
        Read and extract the main content from a given URL.

        :param url: The URL to read from.
        :return: The main content of the page in processed format.
        """
        send_status = get_send_status(__event_emitter__)
        send_citation = get_send_citation(__event_emitter__)

        try:
            await send_status(f"Reading content from {url}", False)

            data = {"url": url}
            response = requests.post(
                self.reader_api, data=data, timeout=self.valves.TIMEOUT
            )
            response.raise_for_status()
            content = response.text

            await send_citation(url, "Web Content", content)
            await send_status(f"Content retrieved from {url}", True)

            result_presentation = """


PLEASE strictly FOLLOW the instructions below!
PLEASE strictly FOLLOW the instructions below!
PLEASE strictly FOLLOW the instructions below!
PLEASE strictly FOLLOW the instructions below!

# Content Processing Instructions:
- Thoroughly examine retrieved content
- Identify key points and critical data
- Assess source credibility
- Provide concise yet comprehensive summaries
- Emphasize most relevant information
- Use clear structure in your response
- Indicate any ambiguities or contradictions
- Acknowledge when information is unavailable
- Cite sources appropriately
- Consider user's language preferences
- Anticipate potential follow-up questions

# Use the following format for presentation (do not include "---"):
---
## [Site Name - Title](URL)
Your summary
(Leave a blank line between each summary.)
---


"""

            return content + result_presentation
        except Exception as e:
            await send_status(f"Error reading URL: {str(e)}", True)
            return f"Error reading URL: {str(e)}"

    async def search(
        self,
        query: str,
        engine: str = "google",
        __event_emitter__: Optional[Callable[[dict], Any]] = None,
    ) -> str:
        """
        Perform a web search using the specified engine.

        :param query: The search query.
        :param engine: The search engine to use ('google', 'bing', or 'baidu').
        :return: The search results as formatted text ready for display.
        """
        emitter = EventEmitter(__event_emitter__)

        try:
            await emitter.emit(
                f"Searching with {engine}: {query}", status="in_progress", done=False
            )

            if engine == "baidu":
                url = f"{self.reader_api}{self.valves.BAIDU_URL}?wd={query}"
                headers = {"X-Target-Selector": "#content_left"}
            else:
                prefix = "!go" if engine == "google" else "!bi"
                url = f"{self.reader_api}{self.valves.SEARXNG_URL}?q={prefix} {query}"
                headers = {"X-Target-Selector": "#urls"}

            response = requests.get(url, headers=headers, timeout=self.valves.TIMEOUT)
            response.raise_for_status()
            content = response.text

            await emitter.emit(
                f"Search completed with {engine}", status="complete", done=True
            )

            search_result_processing = """


PLEASE strictly FOLLOW the instructions below!
PLEASE strictly FOLLOW the instructions below!
PLEASE strictly FOLLOW the instructions below!
PLEASE strictly FOLLOW the instructions below!

# Search Result Processing
1. Select 1-3 highly relevant results from the search
2. Read the full content of the selected result using read_url()


"""

            return content + search_result_processing
        except Exception as e:
            await emitter.emit(
                f"Error during search: {str(e)}", status="error", done=True
            )
            return f"Error during search: {str(e)}"

    async def google_search(
        self, query: str, __event_emitter__: Optional[Callable[[dict], Any]] = None
    ) -> str:
        """
        Perform a Google search using SearXNG.

        :param query: The search query.
        :return: The formatted search results for processing.
        """
        return await self.search(query, "google", __event_emitter__)

    async def bing_search(
        self, query: str, __event_emitter__: Optional[Callable[[dict], Any]] = None
    ) -> str:
        """
        Perform a Bing search using SearXNG.

        Note: Bing provides a balanced choice for searching both international and Simplified Chinese content.

        :param query: The search query.
        :return: The search results in text format ready for processing.
        """
        return await self.search(query, "bing", __event_emitter__)

    async def baidu_search(
        self, query: str, __event_emitter__: Optional[Callable[[dict], Any]] = None
    ) -> str:
        """
        Perform a Baidu search.

        Note: Baidu is optimal for Simplified Chinese content searches.

        :param query: The search query.
        :return: The search results in text format ready for processing.
        """
        return await self.search(query, "baidu", __event_emitter__)