"""
title: Samba nova system manifold api with RAG enabled and dynamic system prompt and etc with token limit fix
version: 0.3.0
author: hammad
"""
import os
import requests
import json
import time
from typing import List, Union, Generator, Iterator
from pydantic import BaseModel, Field
from open_webui.utils.misc import pop_system_message
MAX_TOKENS = 8192 # Define the model’s max token limit
class Pipe:
class Valves(BaseModel):
SAMBANOVA_API_KEY: str = Field(default="")
def __init__(self):
self.type = "manifold"
self.id = "sambanova"
self.name = "sambanova/"
self.valves = self.Valves(
**{"SAMBANOVA_API_KEY": os.getenv("SAMBANOVA_API_KEY", "")}
)
pass
def get_sambanova_models(self):
# List of SambaNova models, including Llama 3.1 series
return [
{
"id": "Meta-Llama-3.1-8B-Instruct-8k",
"name": "Meta-Llama-3.1-8B-Instruct-8k",
},
{
"id": "Meta-Llama-3.1-70B-Instruct-8k",
"name": "Meta-Llama-3.1-70B-Instruct-8k",
},
{
"id": "Meta-Llama-3.1-405B-Instruct-8k",
"name": "Meta-Llama-3.1-405B-Instruct-8k",
},
]
def pipes(self) -> List[dict]:
return self.get_sambanova_models()
def estimate_token_count(self, messages: List[dict]) -> int:
# Roughly estimate token count (e.g., 1 token per 4 characters)
return sum(len(msg["content"]) // 4 for msg in messages)
def truncate_messages(self, messages: List[dict]) -> List[dict]:
# Truncate messages until the token count is within the limit
while self.estimate_token_count(messages) > MAX_TOKENS and len(messages) > 1:
messages.pop(0) # Remove the oldest message
return messages
def pipe(self, body: dict) -> Union[str, Generator, Iterator]:
# Handle system message dynamically
system_message, messages = pop_system_message(body["messages"])
# Add system message if present
processed_messages = []
if system_message:
processed_messages.append(
{"role": "system", "content": system_message.get("content", "")}
)
# Append the user and assistant messages
processed_messages.extend(
{"role": msg["role"], "content": msg.get("content", "")} for msg in messages
)
# Add support for RAG
rag_context = body.get("rag_context", "")
if rag_context:
processed_messages.insert(
1, {"role": "system", "content": f"Knowledge context: {rag_context}"}
)
# Truncate messages if they exceed the token limit
processed_messages = self.truncate_messages(processed_messages)
model_id = body["model"]
if model_id.startswith("sambanova."):
model_id = model_id[len("sambanova.") :]
elif model_id.startswith("samba_nova_api."):
model_id = model_id[len("samba_nova_api.") :]
# Payload with truncated messages
payload = {
"model": model_id,
"messages": processed_messages,
"max_tokens": body.get("max_tokens"),
"temperature": body.get("temperature", 0.7),
"top_k": body.get("top_k", 40),
"top_p": body.get("top_p", 0.9),
"stop": body.get("stop", []),
"stream": body.get("stream", False),
}
headers = {
"Authorization": f"Bearer {self.valves.SAMBANOVA_API_KEY}",
"Content-Type": "application/json",
}
url = "https://api.sambanova.ai/v1/chat/completions"
try:
if body.get("stream", False):
return self.stream_response(url, headers, payload)
else:
return self.non_stream_response(url, headers, payload)
except requests.exceptions.RequestException as e:
print(f"Request failed: {e}")
return f"Error: Request failed: {e}"
except Exception as e:
print(f"Error in pipe method: {e}")
return f"Error: {e}"
def stream_response(self, url, headers, payload):
try:
with requests.post(
url,
headers=headers,
json=payload,
stream=True,
timeout=(2, 30),
verify=False,
) as response:
if response.status_code != 200:
raise Exception(
f"HTTP Error {response.status_code}: {response.text}"
)
for line in response.iter_lines():
if line:
line = line.decode("utf-8")
if line.startswith("data: "):
try:
data = json.loads(line[6:])
if data["choices"][0]["delta"].get("content"):
yield data["choices"][0]["delta"]["content"]
time.sleep(0.01)
except json.JSONDecodeError:
print(f"Failed to parse JSON: {line}")
except KeyError as e:
print(f"Unexpected data structure: {e}")
print(f"Full data: {data}")
except requests.exceptions.RequestException as e:
print(f"Request failed: {e}")
yield f"Error: Request failed: {e}"
except Exception as e:
print(f"General error in stream_response method: {e}")
yield f"Error: {e}"
def non_stream_response(self, url, headers, payload):
try:
response = requests.post(
url, headers=headers, json=payload, timeout=(2, 30), verify=False
)
if response.status_code != 200:
raise Exception(f"HTTP Error {response.status_code}: {response.text}")
res = response.json()
return res["choices"][0]["message"]["content"]
except requests.exceptions.RequestException as e:
print(f"Failed non-stream request: {e}")
return f"Error: {e}"