import requests
from typing import Callable, Any
import re
from pydantic import BaseModel, Field
import unittest
import time
import os
# Cache local básico
cache = {}
def extract_title(text):
"""
Extracts the title from a string containing structured text.
:param text: The input string containing the title.
:return: The extracted title string, or None if the title is not found.
"""
match = re.search(r"Title: (.*)\n", text)
return match.group(1).strip() if match else None
def clean_urls(text) -> str:
"""
Cleans URLs from a string containing structured text.
:param text: The input string containing the URLs.
:return: The cleaned string with URLs removed.
"""
return re.sub(r"\((http[^)]+)\)", "", text)
class EventEmitter:
def __init__(self, event_emitter: Callable[[dict], Any] = None):
self.event_emitter = event_emitter
async def progress_update(self, description):
await self.emit(description)
async def error_update(self, description):
await self.emit(description, "error", True)
async def success_update(self, description):
await self.emit(description, "success", True)
async def emit(self, description="Unknown State", status="in_progress", done=False):
if self.event_emitter:
await self.event_emitter(
{
"type": "status",
"data": {
"status": status,
"description": description,
"done": done,
},
}
)
class Tools:
class Valves(BaseModel):
DISABLE_CACHING: bool = Field(
default=False, description="Bypass Jina Cache when scraping"
)
GLOBAL_JINA_API_KEY: str = Field(
default="",
description="(Optional) Jina API key. Allows a higher rate limit when scraping. Used when a User-specific API key is not available.",
)
class UserValves(BaseModel):
CLEAN_CONTENT: bool = Field(
default=True,
description="Remove links and image urls from scraped content. This reduces the number of tokens.",
)
JINA_API_KEY: str = Field(
default="",
description="(Optional) Jina API key. Allows a higher rate limit when scraping.",
)
def __init__(self):
self.valves = self.Valves()
self.citation = True