Tool
v0.1.0
transcribe
Transcribes video and audio
Tool ID
whisper_transcribe
Creator
@henry444
Downloads
524+

Tool Content
python
"""
title: Audio Transcription using OpenAI Whisper API
author: Your Name
funding_url: https://github.com/your-repo
version: 0.1.0
license: MIT
"""

import os
import requests
import json
from pydantic import BaseModel, Field
from typing import Callable, Any
import asyncio
from main.app.state import config

class EventEmitter:
    def __init__(self, event_emitter: Callable[[dict], Any] = None):
        self.event_emitter = event_emitter

    async def emit(self, description="Unknown State", status="in_progress", done=False):
        if self.event_emitter:
            await self.event_emitter(
                {
                    "type": "status",
                    "data": {
                        "status": status,
                        "description": description,
                        "done": done,
                    },
                }
            )

class Tools:
    class Valves(BaseModel):
        MODEL: str = Field(
            default="whisper-1",
            description="ID of the model to use",
        )
        LANGUAGE: str = Field(
            default="",
            description="Language of the input audio in ISO-639-1 format (optional)",
        )
        RESPONSE_FORMAT: str = Field(
            default="json",
            description="Format of the transcript output: json, text, srt, verbose_json, or vtt",
        )
        TEMPERATURE: float = Field(
            default=0.0,
            description="Sampling temperature between 0 and 1",
        )

    def __init__(self):
        self.valves = self.Valves()
        self.headers = {
            "Authorization": f"Bearer {config.OPENAI_API_KEY}",
        }
        self.api_url = "https://api.openai.com/v1/audio/transcriptions"

    async def transcribe_audio(
        self,
        file_path: str,
        __event_emitter__: Callable[[dict], Any] = None,
    ) -> str:
        """
        Transcribe an audio file using the OpenAI Whisper API.
        :param file_path: The path to the audio file to transcribe.
        :return: The transcription result in the specified format.
        """
        emitter = EventEmitter(__event_emitter__)

        await emitter.emit(f"Starting transcription for: {file_path}")

        if not os.path.isfile(file_path):
            await emitter.emit(
                status="error",
                description=f"File not found: {file_path}",
                done=True,
            )
            return json.dumps({"error": "File not found."})

        files = {
            "file": (os.path.basename(file_path), open(file_path, "rb")),
            "model": (None, self.valves.MODEL),
            "response_format": (None, self.valves.RESPONSE_FORMAT),
        }

        if self.valves.LANGUAGE:
            files["language"] = (None, self.valves.LANGUAGE)
        if self.valves.TEMPERATURE != 0.0:
            files["temperature"] = (None, str(self.valves.TEMPERATURE))

        try:
            await emitter.emit("Sending transcription request to OpenAI API")

            response = requests.post(
                self.api_url,
                headers=self.headers,
                files=files,
                timeout=120,
            )
            response.raise_for_status()
            result = response.json()

            await emitter.emit(
                status="complete",
                description="Transcription completed successfully",
                done=True,
            )

            return json.dumps(result, ensure_ascii=False)

        except requests.exceptions.RequestException as e:
            await emitter.emit(
                status="error",
                description=f"Error during transcription: {str(e)}",
                done=True,
            )
            return json.dumps({"error": str(e)})

        except json.JSONDecodeError:
            await emitter.emit(
                status="error",
                description="Failed to decode the transcription response",
                done=True,
            )
            return json.dumps({"error": "Failed to decode the response."})