We're Hiring!
Whitepaper
Docs
Sign In
@2171
·
a year ago
·
a year ago
tool
Jina
Get
Last Updated
a year ago
Created
a year ago
Tool
Name
Jina
Downloads
243+
Saves
0+
Description
用Jina阅读url
Tool Code
Show
import asyncio import aiohttp from typing import Callable, Any, Optional, List EmitterType = Optional[Callable[[dict], Any]] class EventEmitter: def __init__(self, event_emitter: EmitterType): self.event_emitter = event_emitter async def emit(self, event_type: str, data: dict): if self.event_emitter: await self.event_emitter({"type": event_type, "data": data}) async def update_status( self, description: str, done: bool, action: str, urls: List[str] ): await self.emit( "status", {"done": done, "action": action, "description": description, "urls": urls}, ) async def send_citation(self, title: str, url: str, content: str): await self.emit( "citation", { "document": [content], "metadata": [{"name": title, "source": url, "html": False}], }, ) class Tools: @staticmethod async def web_scrape( urls: List[str], user_request: str, __event_emitter__: EmitterType = None ) -> str: """ Scrape multiple web pages using r.jina.ai API :param urls: List of URLs of the web pages to scrape. :param user_request: The user's original request or query. :param __event_emitter__: Optional event emitter for status updates. :return: Combined scraped contents, or error messages. """ emitter = EventEmitter(__event_emitter__) combined_results = [] await emitter.update_status( f"Jina 正在读取 {len(urls)} 个网页", False, "web_search", urls ) async def process_url(url): jina_url = f"https://r.jina.ai/{url}" headers = { "X-No-Cache": "true", "X-With-Images-Summary": "true", "X-With-Links-Summary": "true", } try: async with aiohttp.ClientSession() as session: async with session.get(jina_url, headers=headers) as response: response.raise_for_status() content = await response.text() title = f"Scraped content from {url}" await emitter.send_citation(title, url, content) return f"URL: {url}\n内容: {content}\n" except aiohttp.ClientError as e: error_message = f"读取网页 {url} 时出错: {str(e)}" await emitter.update_status(error_message, False, "web_scrape", [url]) return f"URL: {url}\n错误: {error_message}\n" tasks = [process_url(url) for url in urls] results = await asyncio.gather(*tasks) combined_results.extend(results) await emitter.update_status( f"已完成 {len(urls)} 个网页的读取", True, "web_search", urls ) # 将所有结果合并为一个字符串 final_result = "\n".join(combined_results) return f"用户需求: {user_request}\n\n对请求的处理结果:\n{final_result}"
Sponsored by Open WebUI Inc.
We are hiring!
Shape the way humanity engages with
intelligence
.
0