Tool
v0.0.2
Web Scrape
Web Scraper using Jina Reader
Tool ID
web_scrape
Downloads
4.2K+

Tool Content
python
"""
title: Web Scrape
author: Pyotr Growpotkin
version: 0.0.2
"""

import requests


class Tools:
    def __init__(self):
        pass

    def web_scrape(self, url: str) -> str:
        """
        Scrape and process a web page using r.jina.ai

        :param url: The URL of the web page to scrape.
        :return: The scraped and processed content without the Links/Buttons section, or an error message.
        """
        jina_url = f"https://r.jina.ai/{url}"

        headers = {
            "X-No-Cache": "true",
            "X-With-Images-Summary": "true",
            "X-With-Links-Summary": "true",
        }

        try:
            response = requests.get(jina_url, headers=headers)
            response.raise_for_status()

            # Extract content and remove Links/Buttons section as its too many tokens
            content = response.text
            links_section_start = content.rfind("Images:")
            if links_section_start != -1:
                content = content[:links_section_start].strip()

            return content

        except requests.RequestException as e:
            return f"Error scraping web page: {str(e)}"