Whitepaper
Docs
Sign In
Function
Function
filter
OCR Before Requests
Function ID
ocr_before_requests
Creator
@xiniah
Downloads
140+
OCR image before requests, especially for o1-mini
Get
README
No README available
Function Code
Show
import asyncio import re from typing import Callable, Awaitable, Any, Optional import aiohttp from pydantic import BaseModel, Field class Filter: class Valves(BaseModel): priority: int = Field(default=0, description="Priority level for filtering operations.") OCR_Base_URL: str = Field( default="https://api.openai.com", description="Base URL for the LLM OCR API." ) OCR_API_KEY: str = Field(default="", description="API key for the API.") max_retries: int = Field(default=3, description="Maximum number of retries for HTTP requests.") ocr_prompt: str = Field( default="Please only recognize and extract the text or data from this image without interpreting, analyzing, or understanding the content. Do not output any additional information. Simply return the recognized text or data content.", description="Prompt for performing OCR recognition.", ) model_name: str = Field( default="gemini-1.5-flash-latest", description="Model name used for OCR on images." ) def __init__(self): self.valves = self.Valves() async def _perform_ocr( self, image: str, event_emitter: Callable[[Any], Awaitable[None]] ) -> str: """Internal method for performing OCR recognition.""" await event_emitter( { "type": "status", "data": { "description": "✨Performing text recognition on the image, please wait patiently...", "done": False, }, } ) headers = { "Content-Type": "application/json", "Authorization": f"Bearer {self.valves.OCR_API_KEY}", } ocr_body = { "model": self.valves.model_name, "messages": [ { "role": "system", "content": [{"type": "text", "text": self.valves.ocr_prompt}], }, { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": image, "detail": "high"}, } ], }, ], } url = f"{self.valves.OCR_Base_URL}/v1/chat/completions" async with aiohttp.ClientSession() as session: for attempt in range(self.valves.max_retries): try: async with session.post( url, json=ocr_body, headers=headers ) as response: response.raise_for_status() response_data = await response.json() result = response_data["choices"][0]["message"]["content"] await event_emitter( { "type": "status", "data": { "description": "🎉Recognition successful, passing to the model for processing...", "done": True, }, } ) return result except Exception as e: if attempt == self.valves.max_retries - 1: raise RuntimeError(f"OCR recognition failed: {e}") async def inlet( self, body: dict, __event_emitter__: Callable[[Any], Awaitable[None]], __user__: Optional[dict] = None, __model__: Optional[dict] = None, ) -> dict: messages = body.get("messages", []) # Find the image image_info = self._find_image_in_messages(messages) if not image_info: return body message_index, content_index, image = image_info # If it's already the second or subsequent round of conversation, return directly if (len(messages) // 2) >= 1: del messages[message_index]["content"][content_index] body["messages"] = messages return body try: # Perform OCR recognition result = await self._perform_ocr(image, __event_emitter__) # Update message content messages[message_index]["content"][content_index]["type"] = "text" messages[message_index]["content"][content_index].pop("image_url", None) messages[message_index]["content"][content_index]["text"] = result body["messages"] = messages except Exception as e: print(f"OCR recognition error: {e}") # Handle errors as needed return body def _find_image_in_messages(self, messages): """Find an image in the messages.""" for m_index, message in enumerate(messages): if message["role"] == "user" and isinstance(message.get("content"), list): for c_index, content in enumerate(message["content"]): if content["type"] == "image_url": return m_index, c_index, content["image_url"]["url"] return None async def outlet( self, body: dict, __event_emitter__: Callable[[Any], Awaitable[None]], __user__: Optional[dict] = None, __model__: Optional[dict] = None, ) -> dict: return body