Function
pipe
v0.3.0
Sambanova
Sambanova system manifold api with RAG enabled and dynamic system prompt and etc with token limit fix
Function ID
sambanova
Creator
@hammad87
Downloads
74+

Function Content
python
"""
title: Samba nova system manifold api with RAG enabled and dynamic system prompt and etc with token limit fix
version: 0.3.0
author: hammad
"""

import os
import requests
import json
import time
from typing import List, Union, Generator, Iterator
from pydantic import BaseModel, Field
from open_webui.utils.misc import pop_system_message

MAX_TOKENS = 8192  # Define the model’s max token limit


class Pipe:
    class Valves(BaseModel):
        SAMBANOVA_API_KEY: str = Field(default="")

    def __init__(self):
        self.type = "manifold"
        self.id = "sambanova"
        self.name = "sambanova/"
        self.valves = self.Valves(
            **{"SAMBANOVA_API_KEY": os.getenv("SAMBANOVA_API_KEY", "")}
        )
        pass

    def get_sambanova_models(self):
        # List of SambaNova models, including Llama 3.1 series
        return [
            {
                "id": "Meta-Llama-3.1-8B-Instruct-8k",
                "name": "Meta-Llama-3.1-8B-Instruct-8k",
            },
            {
                "id": "Meta-Llama-3.1-70B-Instruct-8k",
                "name": "Meta-Llama-3.1-70B-Instruct-8k",
            },
            {
                "id": "Meta-Llama-3.1-405B-Instruct-8k",
                "name": "Meta-Llama-3.1-405B-Instruct-8k",
            },
        ]

    def pipes(self) -> List[dict]:
        return self.get_sambanova_models()

    def estimate_token_count(self, messages: List[dict]) -> int:
        # Roughly estimate token count (e.g., 1 token per 4 characters)
        return sum(len(msg["content"]) // 4 for msg in messages)

    def truncate_messages(self, messages: List[dict]) -> List[dict]:
        # Truncate messages until the token count is within the limit
        while self.estimate_token_count(messages) > MAX_TOKENS and len(messages) > 1:
            messages.pop(0)  # Remove the oldest message
        return messages

    def pipe(self, body: dict) -> Union[str, Generator, Iterator]:
        # Handle system message dynamically
        system_message, messages = pop_system_message(body["messages"])

        # Add system message if present
        processed_messages = []
        if system_message:
            processed_messages.append(
                {"role": "system", "content": system_message.get("content", "")}
            )

        # Append the user and assistant messages
        processed_messages.extend(
            {"role": msg["role"], "content": msg.get("content", "")} for msg in messages
        )

        # Add support for RAG
        rag_context = body.get("rag_context", "")
        if rag_context:
            processed_messages.insert(
                1, {"role": "system", "content": f"Knowledge context: {rag_context}"}
            )

        # Truncate messages if they exceed the token limit
        processed_messages = self.truncate_messages(processed_messages)

        model_id = body["model"]
        if model_id.startswith("sambanova."):
            model_id = model_id[len("sambanova.") :]
        elif model_id.startswith("samba_nova_api."):
            model_id = model_id[len("samba_nova_api.") :]

        # Payload with truncated messages
        payload = {
            "model": model_id,
            "messages": processed_messages,
            "max_tokens": body.get("max_tokens"),
            "temperature": body.get("temperature", 0.7),
            "top_k": body.get("top_k", 40),
            "top_p": body.get("top_p", 0.9),
            "stop": body.get("stop", []),
            "stream": body.get("stream", False),
        }

        headers = {
            "Authorization": f"Bearer {self.valves.SAMBANOVA_API_KEY}",
            "Content-Type": "application/json",
        }

        url = "https://api.sambanova.ai/v1/chat/completions"

        try:
            if body.get("stream", False):
                return self.stream_response(url, headers, payload)
            else:
                return self.non_stream_response(url, headers, payload)
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}")
            return f"Error: Request failed: {e}"
        except Exception as e:
            print(f"Error in pipe method: {e}")
            return f"Error: {e}"

    def stream_response(self, url, headers, payload):
        try:
            with requests.post(
                url,
                headers=headers,
                json=payload,
                stream=True,
                timeout=(2, 30),
                verify=False,
            ) as response:
                if response.status_code != 200:
                    raise Exception(
                        f"HTTP Error {response.status_code}: {response.text}"
                    )

                for line in response.iter_lines():
                    if line:
                        line = line.decode("utf-8")
                        if line.startswith("data: "):
                            try:
                                data = json.loads(line[6:])
                                if data["choices"][0]["delta"].get("content"):
                                    yield data["choices"][0]["delta"]["content"]

                                time.sleep(0.01)
                            except json.JSONDecodeError:
                                print(f"Failed to parse JSON: {line}")
                            except KeyError as e:
                                print(f"Unexpected data structure: {e}")
                                print(f"Full data: {data}")
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}")
            yield f"Error: Request failed: {e}"
        except Exception as e:
            print(f"General error in stream_response method: {e}")
            yield f"Error: {e}"

    def non_stream_response(self, url, headers, payload):
        try:
            response = requests.post(
                url, headers=headers, json=payload, timeout=(2, 30), verify=False
            )
            if response.status_code != 200:
                raise Exception(f"HTTP Error {response.status_code}: {response.text}")

            res = response.json()
            return res["choices"][0]["message"]["content"]
        except requests.exceptions.RequestException as e:
            print(f"Failed non-stream request: {e}")
            return f"Error: {e}"