Tool
v0.2.3
ElevenLabs TTS
A tool to generate speech from text using the ElevenLabs API
Tool ID
elevenlabs_tts
Creator
@justinrahb
Downloads
1K+

Tool Content
python
"""
title: ElevenLabs Text-to-Speech Tool
author: justinh-rahb
author_url: https://github.com/justinh-rahb
funding_url: https://github.com/open-webui
version: 0.2.3
license: MIT
"""

import requests
import uuid
import os
from pydantic import BaseModel, Field
from typing import Callable, Union, Any
from config import UPLOAD_DIR
from apps.webui.models.files import Files

DEBUG = False


class Tools:
    class Valves(BaseModel):
        ELEVENLABS_API_KEY: str = Field(
            default=None, description="Your ElevenLabs API key."
        )
        ELEVENLABS_MODEL_ID: str = Field(
            default="eleven_multilingual_v2",
            description="ID of the ElevenLabs TTS model to use.",
        )

    def __init__(self):
        self.valves = self.Valves()
        self.voice_id_cache = {}

    def fetch_available_voices(self) -> str:
        """
        Fetches the list of available voices from the ElevenLabs API.

        :return: A formatted string containing the names and descriptions of available voices.
        """
        if DEBUG:
            print("Debug: Fetching available voices")

        base_url = "https://api.elevenlabs.io/v1"
        headers = {
            "xi-api-key": self.valves.ELEVENLABS_API_KEY,
            "Content-Type": "application/json",
        }

        voices_url = f"{base_url}/voices"
        try:
            response = requests.get(voices_url, headers=headers)
            response.raise_for_status()
            voices_data = response.json()

            message = "Here are the available voices from ElevenLabs:\n\n"
            for voice in voices_data["voices"]:
                message += f"- {voice['name']}: {voice.get('description', 'No description available.')}\n"
                self.voice_id_cache[voice["name"].lower()] = voice["voice_id"]

            if DEBUG:
                print(f"Debug: Found {len(voices_data['voices'])} voices")

            return message
        except requests.RequestException as e:
            if DEBUG:
                print(f"Debug: Error fetching voices: {str(e)}")
            return "Sorry, I couldn't fetch the list of available voices at the moment."

    def get_voice_list(self) -> str:
        """
        Retrieves and returns a list of available voices as a formatted string.

        :return: A formatted string containing the list of voices.
        """
        voices_message = self.fetch_available_voices()
        return voices_message

    async def elevenlabs_text_to_speech(
        self,
        text: str,
        voice_name: str = "rachel",
        __user__: dict = {},
        __event_emitter__: Callable[[dict], Any] = None,
    ) -> str:
        """
        Converts text to speech using the ElevenLabs API or lists available voices.

        :param text: The text to convert to speech or "list voices" to retrieve available voices.
        :param voice_name: The name of the voice to use for speech generation. Defaults to "rachel".
        :param __user__: A dictionary containing user information.
        :param __event_emitter__: An optional callback function to emit status events throughout the process.
        :return: A message indicating the result of the operation (success or error).
        """

        if DEBUG:
            print(
                f"Debug: Starting TTS for voice '{voice_name}' with text '{text[:20]}...'"
            )

        def status_object(
            description="Unknown State", status="in_progress", done=False
        ):
            """
            Helper function to create a status object.

            :param description: A short description of the current status.
            :param status: Status type (e.g., 'in_progress', 'error', 'complete').
            :param done: Boolean indicating whether the operation is done.
            :return: A dictionary representing the status object.
            """
            return {
                "type": "status",
                "data": {
                    "status": status,
                    "description": description,
                    "done": done,
                },
            }

        if __event_emitter__:
            await __event_emitter__(
                status_object("Initializing ElevenLabs Text-to-Speech")
            )

        if not self.valves.ELEVENLABS_API_KEY:
            if __event_emitter__:
                await __event_emitter__(
                    status_object("Error: API key not set", status="error", done=True)
                )
            return "ElevenLabs API key is not set. Please set it in your environment variables."

        if "id" not in __user__:
            if __event_emitter__:
                await __event_emitter__(
                    status_object(
                        "Error: User not authenticated", status="error", done=True
                    )
                )
            return "Error: User ID is not available. Please ensure you're logged in."

        if text.lower().strip() in [
            "list voices",
            "show voices",
            "available voices",
            "what voices are available",
        ]:
            voices = self.get_voice_list()
            if __event_emitter__:
                await __event_emitter__(
                    status_object(
                        "Available voices fetched", status="complete", done=True
                    )
                )
            return voices

        voice_id = self.voice_id_cache.get(voice_name.lower())
        if not voice_id:
            voices_message = self.fetch_available_voices()
            if voices_message.startswith("Sorry, I couldn't fetch"):
                if __event_emitter__:
                    await __event_emitter__(
                        status_object(
                            "Error: Could not fetch voices", status="error", done=True
                        )
                    )
                return voices_message

            voice_id = self.voice_id_cache.get(voice_name.lower())
            if not voice_id:
                if __event_emitter__:
                    await __event_emitter__(
                        status_object(
                            f"Error: Voice '{voice_name}' not found",
                            status="error",
                            done=True,
                        )
                    )
                return f"Error: Voice '{voice_name}' not found. Use 'list voices' to see available options."

        if __event_emitter__:
            await __event_emitter__(status_object("Generating speech"))

        base_url = "https://api.elevenlabs.io/v1"
        headers = {
            "xi-api-key": self.valves.ELEVENLABS_API_KEY,
            "Content-Type": "application/json",
        }

        tts_url = f"{base_url}/text-to-speech/{voice_id}"
        payload = {
            "text": text,
            "model_id": self.valves.ELEVENLABS_MODEL_ID,
            "voice_settings": {"stability": 0.5, "similarity_boost": 0.5},
        }

        try:
            response = requests.post(tts_url, json=payload, headers=headers)
            response.raise_for_status()

            if response.status_code == 200:
                audio_data = response.content
                file_name = f"tts_{uuid.uuid4()}.mp3"

                file_id = self._create_file(
                    file_name, "Generated Audio", audio_data, "audio/mpeg", __user__
                )
                if file_id:
                    file_url = self._get_file_url(file_id)
                    if __event_emitter__:
                        await __event_emitter__(
                            status_object(
                                "Generated successfully",
                                status="complete",
                                done=True,
                            )
                        )
                    if file_url:
                        if __event_emitter__:
                            await __event_emitter__(
                                {
                                    "type": "message",
                                    "data": {
                                        "content": f"\n[click to listen]({file_url})\n"
                                    },
                                }
                            )
                    return f"Audio generated successfully using ElevenLabs voice **{voice_name}**. Right click and **Save Link As** to download [the audio file]({file_url})."
                else:
                    if __event_emitter__:
                        await __event_emitter__(
                            status_object(
                                "Error saving audio file", status="error", done=True
                            )
                        )
                    return "Error saving audio file."
            else:
                if __event_emitter__:
                    await __event_emitter__(
                        status_object(
                            f"Error: Unexpected API response", status="error", done=True
                        )
                    )
                return f"Error generating speech: {response.text}"

        except requests.RequestException as e:
            if __event_emitter__:
                await __event_emitter__(
                    status_object(
                        f"Error: API request failed", status="error", done=True
                    )
                )
            return f"Error generating speech: {str(e)}"

    def _create_file(
        self,
        file_name: str,
        title: str,
        content: Union[str, bytes],
        content_type: str,
        __user__: dict = {},
    ) -> str:
        """
        Creates and saves a file in the local upload directory and registers it with the Files API.

        :param file_name: The name of the file to save.
        :param title: The title of the file.
        :param content: The content of the file, either as a string or bytes.
        :param content_type: The MIME type of the file (e.g., "audio/mpeg").
        :param __user__: A dictionary containing user information.
        :return: The ID of the saved file, if successful; otherwise, None.
        """
        if DEBUG:
            print(f"Debug: Entering _create_file method")
            print(f"Debug: File name: {file_name}")
            print(f"Debug: Content type: {content_type}")
            print(f"Debug: User: {__user__}")

        if "id" not in __user__:
            if DEBUG:
                print("Debug: User ID is not available")
            return None

        base_path = os.path.join(UPLOAD_DIR)
        file_id = str(uuid.uuid4())

        file_path = os.path.join(base_path, f"{file_id}_{file_name}")
        mode = "w" if isinstance(content, str) else "wb"

        try:
            os.makedirs(base_path, exist_ok=True)
            with open(file_path, mode) as f:
                f.write(content)

            meta = {
                "source": file_path,
                "title": title,
                "content_type": content_type,
                "size": os.path.getsize(file_path),
                "path": file_path,
            }

            class FileForm(BaseModel):
                id: str
                filename: str
                meta: dict = {}

            formData = FileForm(id=file_id, filename=file_name, meta=meta)
            file = Files.insert_new_file(__user__["id"], formData)

            if DEBUG:
                print(f"Debug: File saved to local storage. File path: {file_path}")
                print(f"Debug: Meta information: {meta}")
            return file.id
        except Exception as e:
            if DEBUG:
                print(f"Debug: Error saving file: {e}")
            return None

    def _get_file_url(self, file_id: str) -> str:
        """
        Constructs and returns the URL to access the file content by its ID.

        :param file_id: The ID of the file.
        :return: The URL to access the file content.
        """
        return f"/api/v1/files/{file_id}/content"