Whitepaper
Docs
Sign In
Tool
Tool
JINA API WEB
Tool ID
jina_api_web
Creator
@caiomfm
Downloads
380+
WEB SEARCH
Get
README
No README available
Tool Code
Show
import requests from typing import Callable, Any import re from pydantic import BaseModel, Field import unittest import time import os # Cache local básico cache = {} def extract_title(text): """ Extracts the title from a string containing structured text. :param text: The input string containing the title. :return: The extracted title string, or None if the title is not found. """ match = re.search(r"Title: (.*)\n", text) return match.group(1).strip() if match else None def clean_urls(text) -> str: """ Cleans URLs from a string containing structured text. :param text: The input string containing the URLs. :return: The cleaned string with URLs removed. """ return re.sub(r"\((http[^)]+)\)", "", text) class EventEmitter: def __init__(self, event_emitter: Callable[[dict], Any] = None): self.event_emitter = event_emitter async def progress_update(self, description): await self.emit(description) async def error_update(self, description): await self.emit(description, "error", True) async def success_update(self, description): await self.emit(description, "success", True) async def emit(self, description="Unknown State", status="in_progress", done=False): if self.event_emitter: await self.event_emitter( { "type": "status", "data": { "status": status, "description": description, "done": done, }, } ) class Tools: class Valves(BaseModel): DISABLE_CACHING: bool = Field( default=False, description="Bypass Jina Cache when scraping" ) GLOBAL_JINA_API_KEY: str = Field( default="", description="(Optional) Jina API key. Allows a higher rate limit when scraping. Used when a User-specific API key is not available.", ) class UserValves(BaseModel): CLEAN_CONTENT: bool = Field( default=True, description="Remove links and image urls from scraped content. This reduces the number of tokens.", ) JINA_API_KEY: str = Field( default="", description="(Optional) Jina API key. Allows a higher rate limit when scraping.", ) def __init__(self): self.valves = self.Valves() self.citation = True