"""
title: BBC News Feed
author: @nathanwindisch, with bug fixes by @igna503
author_url: https://github.com/nathanwindisch, https://github.com/igna503
funding_url: https://www.patreon.com/NathanWindisch
version: 0.1.0
changelog:
- 0.0.1 - Initial upload to openwebui community.
- 0.0.2 - Modified formatting slightly.
- 0.0.3 - Added tool docstring, and this changelog.
- 0.0.4 - Added funding_url to docstring.
- 0.0.5 - Updated get_bbc_news_feed function to use a default for
the ArticleType, and updated it's docstring to include
a list of the possible types, to assist the LLM's query.
- 0.0.6 - Added event emitter to the get_bbc_news_feed function,
to provide status updates to the user as the function
executes. Also wrapped the function in a try/catch, to
handle any exceptions that may occur during execution.
- 0.0.7 - Fixed a major bug where the type was not being casted
to the ArticleType enum, causing the get_uri function
to not be called correctly.
- 0.0.8 - Updated the ArticleType parameter docstring to make it
mandatory, and for to contain the full names of the
'world/' types rather than the abbreviations.
- 0.0.9 - Created a new function, get_bbc_news_content, which
retrieves the article text content of a BBC News link,
given it's URI.
- 0.1.0 - Removed enum, used dict instead, fixing bug where
articletype was not a supported JSON schema by
Open WebUI.
"""
import re
import json
import requests
import xml.etree.ElementTree as ElementTree
from typing import Awaitable, Callable
from pydantic import BaseModel
from bs4 import BeautifulSoup
categories = {
"top_stories": "",
"world": "world",
"uk": "uk",
"business": "business",
"politics": "politics",
"health": "health",
"education": "education",
"science_and_environment": "science_and_environment",
"technology": "technology",
"entertainment_and_arts": "entertainment_and_arts",
"england": "england",
"northern_ireland": "northern_ireland",
"scotland": "scotland",
"wales": "wales",
"africa": "world/africa",
"asia": "world/asia",
"australia": "world/australia",
"europe": "world/europe",
"latin_america": "world/latin_america",
"middle_east": "world/middle_east",
"us_and_canada": "world/us_and_canada",
}
def get_name(category) -> str:
return category.replace("_", " ").title()
def get_uri(category) -> str:
return (
f"https://feeds.bbci.co.uk/news/{categories[category]}/rss.xml"
if category != "top_stories"
else "https://feeds.bbci.co.uk/news/rss.xml"
)
# Regex to match a BBC News article URI.
# Details:
# - Must use http or https.
# - Must be a bbc.com or bbc.co.uk domain.
# - Must be a news article or video.
# - Must have a valid ID (alphanumeric characters).
URI_REGEX = re.compile(
"^(https?:\/\/)(www\.)?bbc\.(com|co\.uk)\/news\/(articles|videos)\/\w+$"
)
class Tools:
def __init__(self):
pass
class UserValves(BaseModel):
pass
async def get_bbc_news_feed(
self,
category: str,
__event_emitter__: Callable[[dict], Awaitable[None]],
__user__: dict = {},
) -> str:
"""
Get the latest news from the BBC, as an array of JSON objects with a title, description, link, and published date.
:param category: The category of news to get. It can be any of the 'categories' dict's keys (world, uk, business, politics, health, education, science_and_environment, technology, entertainment_and_arts, england, northern_ireland, scotland, wales, world/africa, world/asia, world/australia, world/europe, world/latin_america, world/middle_east, world/us_and_canada).
:return: A list of news items or an error message.
"""
await __event_emitter__(
{
"data": {
"description": f"Starting BBC News Feed retrieval for articles in the '{get_name(category)}' category...",
"status": "in_progress",
"done": False,
},
"type": "status",
}
)
output = []
try:
response = requests.get(get_uri(category))
if not response.ok:
return f"Error: '{category}' ({get_uri(category)}) not found ({response.status_code})"
root = ElementTree.fromstring(response.content)
for item in root.iter("item"):
output.append(
{
"title": item.find("title").text,
"description": item.find("description").text,
"link": item.find("link").text,
"published": item.find("pubDate").text,
}
)
await __event_emitter__(
{
"data": {
"description": f"Retrieved {len(output)} news items from BBC News Feed for articles in the '{get_name(category)}' category.",
"status": "complete",
"done": True,
},
"type": "status",
}
)
except Exception as e:
await __event_emitter__(
{
"data": {
"description": f"Failed to retrieved any news items from BBC News Feed for articles in the '{get_name(category)}' ({get_uri(category)}) category: {e}.",
"status": "complete",
"done": True,
},
"type": "status",
}
)
return f"Error: {e}"
return json.dumps(output)
async def get_bbc_news_content(
self,
uri: str,
__event_emitter__: Callable[[dict], Awaitable[None]],
__user__: dict = {},
) -> str:
"""
Get the content of a news article from the BBC.
:param uri: The URI of the article to get the content of, which should start with https://bbc.com/news or https://bbc.co.uk/news.
:return: The content of the article or an error message.
"""
await __event_emitter__(
{
"data": {
"description": f"Starting BBC News Article retrieval from '{uri}'...",
"status": "in_progress",
"done": False,
},
"type": "status",
}
)
if uri == "":
await __event_emitter__(
{
"data": {
"description": f"Error: No URI provided.",
"status": "complete",
"done": True,
},
"type": "status",
}
)
return "Error: No URI provided"
if not re.match(URI_REGEX, uri):
await __event_emitter__(
{
"data": {
"description": f"Error: URI must be a BBC News article.",
"status": "complete",
"done": True,
},
"type": "status",
}
)
return "Error: URI must be a BBC News article."
content = ""
try:
response = requests.get(uri)
if not response.ok:
return f"Error: '{uri}' not found ({response.status_code})"
article = BeautifulSoup(response.content, "html.parser").find("article")
if article is None:
await __event_emitter__(
{
"data": {
"description": f"Failed to retrieve BBC News Article content from '{uri}': Article content not found.",
"status": "complete",
"done": True,
},
"type": "status",
}
)
return f"Error: Article content for {uri} not found."
paragraphs = article.find_all("p")
for paragraph in paragraphs:
content += f"{paragraph.text}\n"
await __event_emitter__(
{
"data": {
"description": f"Retrieved BBC News Article content from '{uri}' ({len(content)} characters).",
"status": "complete",
"done": True,
},
"type": "status",
}
)
except Exception as e:
await __event_emitter__(
{
"data": {
"description": f"Failed to retrieve BBC News Article content from '{uri}': {e}.",
"status": "complete",
"done": True,
},
"type": "status",
}
)
return f"Error: {e}"
return content