Whitepaper
Docs
Sign In
Tool
Tool
v1.0.0
YouTube Video Transcript
Tool ID
youtube_video_transcript
Creator
@bytepull
Downloads
1.5K+
Get YouTube Video Transcript
Get
README
No README available
Tool Code
Show
""" title: Youtube Video Transcript Retriever author: Bytepull author_url: https://github.com/bytepull github: https://github.com/bytepull/open-webui-utilities funding_url: https://github.com/open-webui version: 1.0.0 license: MIT """ import re from typing import Callable, Any from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter class EventEmitter: def __init__(self, event_emitter: Callable[[dict], Any] = None): self.event_emitter = event_emitter async def emit(self, description="Unknown State", status="in_progress", done=False): if self.event_emitter: await self.event_emitter( { "type": "status", "data": { "status": status, "description": description, "done": done, }, } ) class Tools: def __init__(self): pass async def get_video_transcript( self, url: str, __event_emitter__: Callable[[dict], Any] = None ) -> str: """ Retrieves the transcript for a YouTube video given the video URL. :param url: The URL of the YouTube video. :return: The transcript of the YouTube video. """ emitter = EventEmitter(__event_emitter__) pattern = re.compile( r"https\:\/\/www.youtube.com/watch\?v=[A-Za-z0-9_-]+") if not (bool(pattern.match(url))): await emitter.emit( status="error", description=f"Wrong URL: {url}", done=True, ) return "" video_id = url.split("v=")[1] if not (video_id): await emitter.emit( status="error", description=f"Cannot get video ID from URL: {url}", done=True, ) return "" await emitter.emit("Fetching video transcript") formatter = TextFormatter() transcript = "video transcript not found" try: transcript = formatter.format_transcript( YouTubeTranscriptApi.get_transcript( video_id, languages=["it", "en"]) ) await emitter.emit( status="complete", description="Transcript retrieved succesfully", done=True, ) except Exception as e: print(e) await emitter.emit( status="error", description="Transcript not found", done=True, ) finally: return transcript