Tool
v0.0.1
Youtube Transcript Provider (Langchain Community)
A youtube transcript provider without RAG. Uses the same Youtube loader used in Open WebUI (langchain community youtube loader)
Tool ID
youtube_transcript_provider_lc
Creator
@thearyadev
Downloads
450+

Tool Content
python
"""
title: Youtube Transcript Provider (Langchain Community)
author: thearyadev 
author_url: https://github.com/thearyadev/youtube-transcript-provider
funding_url: https://github.com/open-webui
version: 0.0.1
"""

from typing import Awaitable, Callable, Any
from langchain_community.document_loaders import YoutubeLoader
import traceback


class Tools:
    def __init__(self):
        self.citation = True

    async def get_youtube_transcript(
        self,
        url: str,
        __event_emitter__: Callable[[dict[str, dict[str, Any] | str]], Awaitable[None]],
    ) -> str:
        """
        Provides the title and full transcript of a YouTube video in English.
        Only use if the user supplied a valid YouTube URL.
        Examples of valid YouTube URLs: https://youtu.be/dQw4w9WgXcQ, https://www.youtube.com/watch?v=dQw4w9WgXcQ

        :param url: The URL of the youtube video that you want the transcript for.
        :return: The title and full transcript of the YouTube video in English, or an error message.
        """
        try:
            if "dQw4w9WgXcQ" in url:
                await __event_emitter__(
                    {
                        "type": "status",
                        "data": {
                            "description": f"{url} is not a valid youtube link",
                            "done": True,
                        },
                    }
                )
                return "The tool failed with an error. No transcript has been provided."

            data = YoutubeLoader.from_youtube_url(
                youtube_url=url, add_video_info=True, language=["en", "en_auto"]
            ).load()

            if not data:
                await __event_emitter__(
                    {
                        "type": "status",
                        "data": {
                            "description": f"Failed to retrieve transcript for {url}. No results",
                            "done": True,
                        },
                    }
                )
                return "The tool failed with an error. No transcript has been provided."

            await __event_emitter__(
                {
                    "type": "status",
                    "data": {
                        "description": f"Successfully retrieved transcript for {url}",
                        "done": True,
                    },
                }
            )
            return f"Title: {data[0].metadata['title']}\nTranscript:\n{data[0].page_content}"
        except:
            await __event_emitter__(
                {
                    "type": "status",
                    "data": {
                        "description": f"Failed to retrieve transcript for {url}.",
                        "done": True,
                    },
                }
            )
            return f"The tool failed with an error. No transcript has been provided.\nError Traceback: \n{traceback.format_exc()}"