Whitepaper
Docs
Sign In
Function
Function
pipe
v0.3.0
Sambanova
Function ID
sambanova
Creator
@hammad87
Downloads
150+
Sambanova system manifold api with RAG enabled and dynamic system prompt and etc with token limit fix
Get
README
No README available
Function Code
Show
""" title: Samba nova system manifold api with RAG enabled and dynamic system prompt and etc with token limit fix version: 0.3.0 author: hammad """ import os import requests import json import time from typing import List, Union, Generator, Iterator from pydantic import BaseModel, Field from open_webui.utils.misc import pop_system_message MAX_TOKENS = 8192 # Define the model’s max token limit class Pipe: class Valves(BaseModel): SAMBANOVA_API_KEY: str = Field(default="") def __init__(self): self.type = "manifold" self.id = "sambanova" self.name = "sambanova/" self.valves = self.Valves( **{"SAMBANOVA_API_KEY": os.getenv("SAMBANOVA_API_KEY", "")} ) pass def get_sambanova_models(self): # List of SambaNova models, including Llama 3.1 series return [ { "id": "Meta-Llama-3.1-8B-Instruct-8k", "name": "Meta-Llama-3.1-8B-Instruct-8k", }, { "id": "Meta-Llama-3.1-70B-Instruct-8k", "name": "Meta-Llama-3.1-70B-Instruct-8k", }, { "id": "Meta-Llama-3.1-405B-Instruct-8k", "name": "Meta-Llama-3.1-405B-Instruct-8k", }, ] def pipes(self) -> List[dict]: return self.get_sambanova_models() def estimate_token_count(self, messages: List[dict]) -> int: # Roughly estimate token count (e.g., 1 token per 4 characters) return sum(len(msg["content"]) // 4 for msg in messages) def truncate_messages(self, messages: List[dict]) -> List[dict]: # Truncate messages until the token count is within the limit while self.estimate_token_count(messages) > MAX_TOKENS and len(messages) > 1: messages.pop(0) # Remove the oldest message return messages def pipe(self, body: dict) -> Union[str, Generator, Iterator]: # Handle system message dynamically system_message, messages = pop_system_message(body["messages"]) # Add system message if present processed_messages = [] if system_message: processed_messages.append( {"role": "system", "content": system_message.get("content", "")} ) # Append the user and assistant messages processed_messages.extend( {"role": msg["role"], "content": msg.get("content", "")} for msg in messages ) # Add support for RAG rag_context = body.get("rag_context", "") if rag_context: processed_messages.insert( 1, {"role": "system", "content": f"Knowledge context: {rag_context}"} ) # Truncate messages if they exceed the token limit processed_messages = self.truncate_messages(processed_messages) model_id = body["model"] if model_id.startswith("sambanova."): model_id = model_id[len("sambanova.") :] elif model_id.startswith("samba_nova_api."): model_id = model_id[len("samba_nova_api.") :] # Payload with truncated messages payload = { "model": model_id, "messages": processed_messages, "max_tokens": body.get("max_tokens"), "temperature": body.get("temperature", 0.7), "top_k": body.get("top_k", 40), "top_p": body.get("top_p", 0.9), "stop": body.get("stop", []), "stream": body.get("stream", False), } headers = { "Authorization": f"Bearer {self.valves.SAMBANOVA_API_KEY}", "Content-Type": "application/json", } url = "https://api.sambanova.ai/v1/chat/completions" try: if body.get("stream", False): return self.stream_response(url, headers, payload) else: return self.non_stream_response(url, headers, payload) except requests.exceptions.RequestException as e: print(f"Request failed: {e}") return f"Error: Request failed: {e}" except Exception as e: print(f"Error in pipe method: {e}") return f"Error: {e}" def stream_response(self, url, headers, payload): try: with requests.post( url, headers=headers, json=payload, stream=True, timeout=(2, 30), verify=False, ) as response: if response.status_code != 200: raise Exception( f"HTTP Error {response.status_code}: {response.text}" ) for line in response.iter_lines(): if line: line = line.decode("utf-8") if line.startswith("data: "): try: data = json.loads(line[6:]) if data["choices"][0]["delta"].get("content"): yield data["choices"][0]["delta"]["content"] time.sleep(0.01) except json.JSONDecodeError: print(f"Failed to parse JSON: {line}") except KeyError as e: print(f"Unexpected data structure: {e}") print(f"Full data: {data}") except requests.exceptions.RequestException as e: print(f"Request failed: {e}") yield f"Error: Request failed: {e}" except Exception as e: print(f"General error in stream_response method: {e}") yield f"Error: {e}" def non_stream_response(self, url, headers, payload): try: response = requests.post( url, headers=headers, json=payload, timeout=(2, 30), verify=False ) if response.status_code != 200: raise Exception(f"HTTP Error {response.status_code}: {response.text}") res = response.json() return res["choices"][0]["message"]["content"] except requests.exceptions.RequestException as e: print(f"Failed non-stream request: {e}") return f"Error: {e}"