Tool
v0.0.6
Youtube Transcript Provider
A tool that returns the full youtube transcript in English for a given youtube video url instead of using RAG
Tool ID
youtube_transcript_provider
Creator
@whirlybird
Downloads
1.9K+

Tool Content
python
"""
title: Youtube Transcript Provider
description: A tool that returns the full, detailed youtube transcript in English of a passed in youtube url.
author: ekatiyar
author_url: https://github.com/ekatiyar
github: https://github.com/ekatiyar/open-webui-tools
funding_url: https://github.com/open-webui
version: 0.0.6
license: MIT
"""

from langchain_community.document_loaders import YoutubeLoader
import re
from typing import Callable, Any

import unittest
    
class EventEmitter:
    def __init__(self, event_emitter: Callable[[dict], Any] = None):
        self.event_emitter = event_emitter

    async def progress_update(self, description):
        await self.emit(description)

    async def error_update(self, description):
        await self.emit(description, "error", True)

    async def success_update(self, description):
        await self.emit(description, "success", True)

    async def emit(self, description="Unknown State", status="in_progress", done=False):
        if self.event_emitter:
            await self.event_emitter(
                {
                    "type": "status",
                    "data": {
                        "status": status,
                        "description": description,
                        "done": done,
                    },
                }
            )
    

class Tools:
    def __init__(self):
        self.citation = True

    async def get_youtube_transcript(self, url: str, __event_emitter__: Callable[[dict], Any] = None) -> str:
        """
        Provides the title and full transcript of a YouTube video in English.
        Only use if the user supplied a valid YouTube URL.
        Examples of valid YouTube URLs: https://youtu.be/dQw4w9WgXcQ, https://www.youtube.com/watch?v=dQw4w9WgXcQ

        :param url: The URL of the youtube video that you want the transcript for.
        :return: The title and full transcript of the YouTube video in English, or an error message.
        """
        emitter = EventEmitter(__event_emitter__)


        try:
            await emitter.progress_update(f"Getting transcript for {url}")

            error_message = f"Error: Invalid YouTube URL: {url}"
            if not url or url == "":
                await emitter.error_update(error_message)
                return error_message
            elif "dQw4w9WgXcQ" in url: # LLM's love passing in this url when the user doesn't provide one
                await emitter.error_update(f"Error: No URL provided (except for Rick Roll ... is that what you want?).")
                return error_message
            
            transcript = YoutubeLoader.from_youtube_url(url, add_video_info=True, language=["en", "en_auto"], translation="en").load()
            
            if len(transcript) == 0:
                error_message = f"Error: Failed to find transcript for {url}"
                await emitter.error_update(error_message)
                return error_message

            title = transcript[0].metadata["title"]
            transcript = "\n".join([document.page_content for document in transcript])
            
            await emitter.success_update(f"Transcript for {title} retrieved!")
            return f"Title: {title}\n\nTranscript:\n{transcript}"

        except Exception as e:
            error_message = f"Error: {str(e)}"
            await emitter.error_update(error_message)
            return error_message

class YoutubeTranscriptProviderTest(unittest.IsolatedAsyncioTestCase):
    async def assert_transcript_length(self, url: str, expected_length: int):
        self.assertEqual(len(await Tools().get_youtube_transcript(url)), expected_length)

    async def assert_transcript_error(self, url: str):
        response = await Tools().get_youtube_transcript(url)
        self.assertTrue("Error" in response)

    async def test_get_youtube_transcript(self):
        url = "https://www.youtube.com/watch?v=zhWDdy_5v2w"
        await self.assert_transcript_length(url, 1384)

    async def test_get_youtube_transcript_with_invalid_url(self):
        invalid_url = "https://www.example.com/invalid"
        missing_url = "https://www.youtube.com/watch?v=zhWDdy_5v3w"
        rick_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"

        await self.assert_transcript_error(invalid_url)
        await self.assert_transcript_error(missing_url)
        await self.assert_transcript_error(rick_url)

    async def test_get_youtube_transcript_with_none_arg(self):
        await self.assert_transcript_error(None)
        await self.assert_transcript_error("")

if __name__ == '__main__':
    print("Running tests...")
    unittest.main()