Cloudflare Browser Rendering Tool • Open WebUI Community

""" title: Cloudflare Browser Rendering author: jerieljan version: 0.3 license: MIT License description: Enables LLMs to read webpages by processing them into Markdown with the Cloudflare Browser Rendering API """ from pydantic import BaseModel, Field from typing import Optional, Callable, Any, Dict, List import requests import asyncio class Tools: class Valves(BaseModel): CLOUDFLARE_ACCOUNT_ID: str = Field( default="", description="The Cloudflare Account ID" ) CLOUDFLARE_API_TOKEN: str = Field( default="", description="The API token to access Cloudflare services" ) CLOUDFLARE_API_BASE_URL: str = Field( default="https://api.cloudflare.com/client/v4", description="(Optional) The base URL for Cloudflare API endpoints", ) def __init__(self): self.valves = self.Valves() self.citation = False self.tools = [ { "type": "function", "function": { "name": "extract_markdown", "description": "Extract markdown content from a webpage using Cloudflare Browser Rendering", "parameters": { "type": "object", "properties": { "url": { "type": "string", "description": "The URL of the webpage to extract markdown from", }, "reject_pattern": { "type": "array", "items": {"type": "string"}, "description": "Optional regex patterns to reject certain requests (e.g., CSS files)", "default": [] } }, "required": ["url"], }, }, }, { "type": "function", "function": { "name": "extract_markdown_from_html", "description": "Convert raw HTML content to markdown using Cloudflare Browser Rendering", "parameters": { "type": "object", "properties": { "html": { "type": "string", "description": "The raw HTML content to convert to markdown", } }, "required": ["html"], }, }, } ] async def extract_markdown( self, url: str, reject_pattern: Optional[List[str]] = None, __event_emitter__: Optional[Callable[[Dict], Any]] = None ) -> str: """ Uses the Cloudflare Browser Rendering service to fetch a webpage and provide the content in Markdown format. """ if not self.valves.CLOUDFLARE_API_TOKEN: raise Exception("CLOUDFLARE_API_TOKEN not provided in valves") if not self.valves.CLOUDFLARE_ACCOUNT_ID: raise Exception("CLOUDFLARE_ACCOUNT_ID not provided in valves") # Status emitter helper async def emit_status( description: str, status: str = "in_progress", done: bool = False ): if __event_emitter__: await __event_emitter__( { "type": "status", "data": { "description": description, "status": status, "done": done, }, } ) # Initial status await emit_status(f"Extracting markdown from: {url}...", "processing") headers = { "Authorization": f"Bearer {self.valves.CLOUDFLARE_API_TOKEN}", "Content-Type": "application/json", } payload = {"url": url} if reject_pattern: payload["rejectRequestPattern"] = reject_pattern try: await emit_status(f"Extracting markdown from: {url}...", "processing") response = requests.post( f"{self.valves.CLOUDFLARE_API_BASE_URL}/accounts/{self.valves.CLOUDFLARE_ACCOUNT_ID}/browser-rendering/markdown", headers=headers, json=payload, timeout=30, ) response.raise_for_status() result = response.json() if not result.get("success", False): error_msg = result.get("errors", ["Unknown error occurred"])[0] await emit_status(f"Error: {error_msg}", status="error", done=True) return f"Error extracting markdown: {error_msg}" markdown_content = result.get("result", "") # Emit citation if __event_emitter__: await __event_emitter__( { "type": "citation", "data": { "document": [markdown_content], "metadata": [{"source": url}], "source": {"name": url, "url": url}, }, } ) # Complete status await emit_status( "Markdown extraction completed successfully", status="complete", done=True ) return markdown_content except requests.exceptions.RequestException as e: error_msg = f"Network error extracting markdown: {str(e)}" await emit_status(error_msg, status="error", done=True) return error_msg except Exception as e: error_msg = f"Error extracting markdown: {str(e)}" await emit_status(error_msg, status="error", done=True) return error_msg async def extract_markdown_from_html( self, html: str, __event_emitter__: Optional[Callable[[Dict], Any]] = None ) -> str: """ Uses the Cloudflare Browser Rendering service to process the user's provided HTML code and reformat it to Markdown. """ if not self.valves.CLOUDFLARE_API_TOKEN: raise Exception("CLOUDFLARE_API_TOKEN not provided in valves") if not self.valves.CLOUDFLARE_ACCOUNT_ID: raise Exception("CLOUDFLARE_ACCOUNT_ID not provided in valves") # Status emitter helper async def emit_status( description: str, status: str = "in_progress", done: bool = False ): if __event_emitter__: await __event_emitter__( { "type": "status", "data": { "description": description, "status": status, "done": done, }, } ) # Initial status await emit_status("Converting HTML to markdown...", "processing") headers = { "Authorization": f"Bearer {self.valves.CLOUDFLARE_API_TOKEN}", "Content-Type": "application/json", } payload = {"html": html} try: await emit_status("Converting HTML to markdown...", "processing") response = requests.post( f"{self.valves.CLOUDFLARE_API_BASE_URL}/accounts/{self.valves.CLOUDFLARE_ACCOUNT_ID}/browser-rendering/markdown", headers=headers, json=payload, timeout=30, ) response.raise_for_status() result = response.json() if not result.get("success", False): error_msg = result.get("errors", ["Unknown error occurred"])[0] await emit_status(f"Error: {error_msg}", status="error", done=True) return f"Error converting HTML to markdown: {error_msg}" markdown_content = result.get("result", "") # Emit citation for HTML content if __event_emitter__: await __event_emitter__( { "type": "citation", "data": { "document": [markdown_content], "metadata": [{"source": "Raw HTML Content"}], "source": {"name": "HTML Content"}, }, } ) # Complete status await emit_status( "HTML to markdown conversion completed successfully", status="complete", done=True ) return markdown_content except requests.exceptions.RequestException as e: error_msg = f"Network error converting HTML to markdown: {str(e)}" await emit_status(error_msg, status="error", done=True) return error_msg except Exception as e: error_msg = f"Error converting HTML to markdown: {str(e)}" await emit_status(error_msg, status="error", done=True) return error_msg