Tool
v1.0.0
YouTube Video Transcript
Get YouTube Video Transcript
Tool ID
youtube_video_transcript
Creator
@bytepull
Downloads
108+

Tool Content
python
"""
title: Youtube Video Transcript Retriever
author: Bytepull
author_url: https://github.com/bytepull
github: https://github.com/bytepull/open-webui-utilities
funding_url: https://github.com/open-webui
version: 1.0.0
license: MIT
"""

import re
from typing import Callable, Any
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter


class EventEmitter:
    def __init__(self, event_emitter: Callable[[dict], Any] = None):
        self.event_emitter = event_emitter

    async def emit(self, description="Unknown State", status="in_progress", done=False):
        if self.event_emitter:
            await self.event_emitter(
                {
                    "type": "status",
                    "data": {
                        "status": status,
                        "description": description,
                        "done": done,
                    },
                }
            )


class Tools:
    def __init__(self):
        pass

    async def get_video_transcript(
        self, url: str, __event_emitter__: Callable[[dict], Any] = None
    ) -> str:
        """
        Retrieves the transcript for a YouTube video given the video URL.
        :param url: The URL of the YouTube video.
        :return: The transcript of the YouTube video.
        """

        emitter = EventEmitter(__event_emitter__)
        pattern = re.compile(
            r"https\:\/\/www.youtube.com/watch\?v=[A-Za-z0-9_-]+")

        if not (bool(pattern.match(url))):
            await emitter.emit(
                status="error",
                description=f"Wrong URL: {url}",
                done=True,
            )
            return ""

        video_id = url.split("v=")[1]

        if not (video_id):
            await emitter.emit(
                status="error",
                description=f"Cannot get video ID from URL: {url}",
                done=True,
            )
            return ""

        await emitter.emit("Fetching video transcript")

        formatter = TextFormatter()

        transcript = "video transcript not found"

        try:
            transcript = formatter.format_transcript(
                YouTubeTranscriptApi.get_transcript(
                    video_id, languages=["it", "en"])
            )
            await emitter.emit(
                status="complete",
                description="Transcript retrieved succesfully",
                done=True,
            )
        except Exception as e:
            print(e)
            await emitter.emit(
                status="error",
                description="Transcript not found",
                done=True,
            )
        finally:
            return transcript