OpenRouter Pro Smart Filtering and Reasoning Tokens Function • Open WebUI Community

""" title: OpenRouterPro version: 1.1.2 license: MIT description: Enhanced OpenRouter connector with model group selection, citation support, and reasoning tokens author: d0dg3r """ import os import re import json import requests import time from typing import List, Union, Generator, Dict, Iterator from pydantic import BaseModel, Field DEBUG = True # Set to True to help with debugging def _insert_citations(text: str, citations: list[str]) -> str: # Define regex pattern for citation markers [n] pattern = r"\[(\d+)\]" def replace_citation(match_obj): # Extract the number from the match num = int(match_obj.group(1)) # Check if there's a corresponding citation URL # Citations are 0-indexed in the list, but 1-indexed in the text if 1 <= num <= len(citations): url = citations[num - 1] # Return Markdown link: [url]([n]) return f"[{match_obj.group(0)}]({url})" else: # If no corresponding citation, return the original marker return match_obj.group(0) # Replace all citation markers in the text result = re.sub(pattern, replace_citation, text) return result class Pipe: class Valves(BaseModel): """Configuration for OpenRouter Pro.""" OPENROUTER_API_BASE_URL: str = Field(default="https://openrouter.ai/api/v1") OPENROUTER_API_KEY: str = Field( default="", description="Your OpenRouter API key" ) INCLUDE_REASONING: bool = Field( default=True, description="Request reasoning tokens from models that support it", ui_inline=True, ) # Model selection options with toggle buttons INCLUDE_RECOMMENDED: bool = Field( default=True, description="Include recommended models", ui_always_show=True, ui_inline=True, ) INCLUDE_TOP: bool = Field( default=False, description="Include top performing models", ui_always_show=True, ui_inline=True, ) INCLUDE_OPENAI: bool = Field( default=False, description="Include OpenAI models", ui_always_show=True, ui_inline=True, ) INCLUDE_ANTHROPIC: bool = Field( default=False, description="Include Anthropic Claude models", ui_always_show=True, ui_inline=True, ) INCLUDE_MISTRAL: bool = Field( default=False, description="Include Mistral AI models", ui_always_show=True, ui_inline=True, ) INCLUDE_META: bool = Field( default=False, description="Include Meta's Llama models", ui_always_show=True, ui_inline=True, ) INCLUDE_GOOGLE: bool = Field( default=False, description="Include Google's Gemini models", ui_always_show=True, ui_inline=True, ) INCLUDE_FAVORITE: bool = Field( default=False, description="Include favorite models", ui_always_show=True, ui_inline=True, ) INCLUDE_FAST: bool = Field( default=False, description="Include fast-responding models", ui_always_show=True, ui_inline=True, ) INCLUDE_POWERFUL: bool = Field( default=False, description="Include most powerful models", ui_always_show=True, ui_inline=True, ) INCLUDE_VISION: bool = Field( default=False, description="Include models with vision capabilities", ui_always_show=True, ui_inline=True, ) INCLUDE_CODE: bool = Field( default=False, description="Include code-specialized models", ui_always_show=True, ui_inline=True, ) INCLUDE_ALL: bool = Field( default=True, description="Include all available models", ui_always_show=True, ui_inline=True, ) FREE_ONLY: bool = Field( default=False, description="Show only free models", ui_inline=True, ) MODEL_PREFIX: str = Field( default="", description="Optional prefix for model names in Open WebUI" ) def __init__(self): self.type = "manifold" self.id = "openrouter-pro" self.name = "OpenRouter Pro/" # Initialize valves with defaults or from environment self.valves = self.Valves( OPENROUTER_API_KEY=os.getenv("OPENROUTER_API_KEY", ""), INCLUDE_REASONING=os.getenv("INCLUDE_REASONING", "true").lower() == "true", INCLUDE_RECOMMENDED=os.getenv("INCLUDE_RECOMMENDED", "true").lower() == "true", INCLUDE_TOP=os.getenv("INCLUDE_TOP", "false").lower() == "true", INCLUDE_OPENAI=os.getenv("INCLUDE_OPENAI", "false").lower() == "true", INCLUDE_ANTHROPIC=os.getenv("INCLUDE_ANTHROPIC", "false").lower() == "true", INCLUDE_MISTRAL=os.getenv("INCLUDE_MISTRAL", "false").lower() == "true", INCLUDE_META=os.getenv("INCLUDE_META", "false").lower() == "true", INCLUDE_GOOGLE=os.getenv("INCLUDE_GOOGLE", "false").lower() == "true", INCLUDE_FAVORITE=os.getenv("INCLUDE_FAVORITE", "false").lower() == "true", INCLUDE_FAST=os.getenv("INCLUDE_FAST", "false").lower() == "true", INCLUDE_POWERFUL=os.getenv("INCLUDE_POWERFUL", "false").lower() == "true", INCLUDE_VISION=os.getenv("INCLUDE_VISION", "false").lower() == "true", INCLUDE_CODE=os.getenv("INCLUDE_CODE", "false").lower() == "true", INCLUDE_ALL=os.getenv("INCLUDE_ALL", "true").lower() == "true", FREE_ONLY=os.getenv("FREE_ONLY", "false").lower() == "true", MODEL_PREFIX=os.getenv("MODEL_PREFIX", ""), ) # Defined model groups self.model_groups = { "top": [ "anthropic/claude-3.7-sonnet", "anthropic/claude-3.5-sonnet", "openai/gpt-4o", "openai/gpt-4o-mini", "meta-llama/llama-3.1-70b-instruct", "google/gemini-pro-1.5", "mistralai/mistral-large", ], "recommended": [ "anthropic/claude-3.7-sonnet", "anthropic/claude-3.5-sonnet", "anthropic/claude-3-opus", "anthropic/claude-3-sonnet", "anthropic/claude-3-haiku", "openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-4-turbo", "openai/gpt-3.5-turbo", "meta-llama/llama-3.1-70b-instruct", "meta-llama/llama-3.1-8b-instruct", "meta-llama/llama-3-70b-instruct", "meta-llama/llama-3-8b-instruct", "google/gemini-pro-1.5", "google/gemini-pro", "mistralai/mistral-large", "mistralai/mistral-medium", "mistralai/mixtral-8x7b-instruct", "qwen/qwen-max", "qwen/qwen-plus", ], "openai": [ "openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-4-turbo", "openai/gpt-3.5-turbo", "openai/o1", "openai/o1-mini", "openai/o1-pro", ], "anthropic": [ "anthropic/claude-3.7-sonnet", "anthropic/claude-3.5-sonnet", "anthropic/claude-3.5-haiku", "anthropic/claude-3-opus", "anthropic/claude-3-sonnet", "anthropic/claude-3-haiku", "anthropic/claude-2.1", "anthropic/claude-2", ], "mistral": [ "mistralai/mistral-large", "mistralai/mistral-medium", "mistralai/mistral-small", "mistralai/mixtral-8x22b-instruct", "mistralai/mixtral-8x7b-instruct", "mistralai/mistral-7b-instruct", ], "meta": [ "meta-llama/llama-3.1-70b-instruct", "meta-llama/llama-3.1-8b-instruct", "meta-llama/llama-3-70b-instruct", "meta-llama/llama-3-8b-instruct", "meta-llama/llama-2-70b-chat", "meta-llama/llama-2-13b-chat", ], "google": [ "google/gemini-pro-1.5", "google/gemini-flash-1.5", "google/gemini-pro", "google/gemini-3-27b-it", "google/gemini-3-12b-it", "google/gemini-3-4b-it", "google/gemini-3-1b-it", ], "favorite": [ "anthropic/claude-3.7-sonnet", "openai/gpt-4o", "meta-llama/llama-3.1-70b-instruct", "google/gemini-pro-1.5", ], "fast": [ "anthropic/claude-3.5-haiku", "openai/gpt-4o-mini", "meta-llama/llama-3.1-8b-instruct", "google/gemini-flash-1.5", "mistralai/mistral-small", ], "powerful": [ "anthropic/claude-3.7-sonnet", "anthropic/claude-3-opus", "openai/gpt-4o", "openai/o1-pro", "meta-llama/llama-3.1-70b-instruct", "mistralai/mistral-large", ], "vision": [ "openai/gpt-4o", "anthropic/claude-3.7-sonnet", "google/gemini-pro-1.5", "meta-llama/llama-3.2-11b-vision-instruct", ], "code": [ "anthropic/claude-3.7-sonnet", "openai/gpt-4o", "mistralai/codestral-2501", "deepseek/deepseek-chat", "qwen/qwen-2.5-coder-32b-instruct", ], } def _debug(self, message: str): """Print debug messages if DEBUG is True.""" if DEBUG: print(f"OpenRouter Pro: {message}") def _get_headers(self) -> Dict[str, str]: """Get headers for OpenRouter API requests.""" if not self.valves.OPENROUTER_API_KEY: raise ValueError("OPENROUTER_API_KEY is not set") return { "Authorization": f"Bearer {self.valves.OPENROUTER_API_KEY}", "Content-Type": "application/json", "HTTP-Referer": "https://openwebui.com/", "X-Title": "OpenRouter Pro for Open WebUI", } def _format_model_id(self, model_id: str) -> str: """Format model ID for OpenRouter API.""" # Remove prefixes if present prefixes = [ "openrouter.", "openroutermodels.", "openrouter_api_clone.", "openrouter-pro.", "jmrouter.", ] for prefix in prefixes: if model_id.startswith(prefix): return model_id[len(prefix) :] # If model ID contains a dot, assume it's in the format "prefix.model" if "." in model_id: return model_id.split(".", 1)[1] # If the model ID has no prefix, return it as is return model_id def _handle_response(self, response: requests.Response) -> dict: """Process the response from OpenRouter API.""" try: response.raise_for_status() return response.json() except requests.exceptions.HTTPError as e: self._debug(f"HTTPError: {e.response.text}") try: error_data = e.response.json() error_message = f"HTTP Error {e.response.status_code}" if "error" in error_data: if ( isinstance(error_data["error"], dict) and "message" in error_data["error"] ): error_message += f": {error_data['error']['message']}" else: error_message += f": {error_data['error']}" raise ValueError(error_message) except ValueError as ve: raise ve except Exception: # If we can't parse the error JSON, just raise the original error raise e except ValueError as e: self._debug(f"Invalid JSON response: {response.text}") raise def get_openrouter_models(self) -> List[Dict[str, str]]: """Fetch all models from OpenRouter API.""" url = f"{self.valves.OPENROUTER_API_BASE_URL}/models" try: self._debug(f"Fetching models from {url}") response = requests.get(url, headers=self._get_headers()) models_data = self._handle_response(response).get("data", []) # Apply free-only filter if enabled if self.valves.FREE_ONLY: self._debug("Filtering for free models only") models_data = [ model for model in models_data if "free" in model.get("id", "").lower() ] # Extract model information all_models = [] for model in models_data: model_id = model.get("id") if model_id: # Use model name or ID, with optional prefix model_name = model.get("name", model_id) prefix = self.valves.MODEL_PREFIX all_models.append({ "id": model_id, "name": f"{prefix}{model_name}" if prefix else model_name, }) self._debug(f"Found {len(all_models)} models from OpenRouter") # Save the list of all model IDs to a file for debugging try: with open("openrouter_model_ids.txt", "w") as f: f.write("All available model IDs from OpenRouter:\n") for model in all_models: f.write(f"{model['id']}: {model['name']}\n") self._debug("Saved model IDs to openrouter_model_ids.txt for reference") except Exception as e: self._debug(f"Error saving model IDs to file: {e}") return all_models or [{"id": "error", "name": "No models found"}] except Exception as e: self._debug(f"Error fetching models: {e}") return [{"id": "openrouter-pro", "name": str(e)}] def _model_matches(self, model_id: str, pattern: str) -> bool: """Check if a model ID matches a pattern, with case-insensitive matching.""" # Clean up both strings for comparison model_id = model_id.strip().lower() pattern = pattern.strip().lower() return ( model_id == pattern or model_id.startswith(pattern + ":") or pattern.startswith(model_id + ":") ) def _filter_models_by_group( self, all_models: List[Dict[str, str]], selected_groups: List[str] ) -> List[Dict[str, str]]: """Filter models based on the selected groups.""" # If no groups selected or "all" is in the list, return all models if not selected_groups or "all" in selected_groups: self._debug("All models selected or no groups specified") return all_models # Add models from each selected group model_ids_to_include = [] for group in selected_groups: if group in self.model_groups: group_model_ids = self.model_groups[group] model_ids_to_include.extend(group_model_ids) self._debug(f"Added {len(group_model_ids)} models from group '{group}'") # Debug log if DEBUG: self._debug(f"Looking for {len(model_ids_to_include)} model IDs across {len(selected_groups)} groups") # Filter models by the IDs in the selected groups filtered_models = [] seen_model_ids = set() # To prevent duplicates for model_id_pattern in model_ids_to_include: # Try to find models that match this pattern for model in all_models: if self._model_matches(model["id"], model_id_pattern) and model["id"] not in seen_model_ids: # If found, add it to the filtered list filtered_models.append(model) seen_model_ids.add(model["id"]) self._debug(f"✓ Found model: {model['id']} matches pattern: {model_id_pattern}") break # Only add the first match for each pattern self._debug(f"Found {len(filtered_models)} models from {len(model_ids_to_include)} patterns") # If we didn't find any models, log a warning if not filtered_models: self._debug("WARNING: No models found in selected groups - check your configuration") self._debug("You can view available model IDs in the openrouter_model_ids.txt file") return filtered_models def pipes(self) -> List[dict]: """Return models based on the configured groups.""" # Get all available models all_models = self.get_openrouter_models() # If no models were found (e.g., API issue), return an error if not all_models or ( len(all_models) == 1 and "error" in all_models[0].get("id", "") ): return all_models # If ALL models are selected, just return all models if self.valves.INCLUDE_ALL: return all_models # Determine which groups are selected based on toggles selected_groups = [] if self.valves.INCLUDE_RECOMMENDED: selected_groups.append("recommended") if self.valves.INCLUDE_TOP: selected_groups.append("top") if self.valves.INCLUDE_OPENAI: selected_groups.append("openai") if self.valves.INCLUDE_ANTHROPIC: selected_groups.append("anthropic") if self.valves.INCLUDE_MISTRAL: selected_groups.append("mistral") if self.valves.INCLUDE_META: selected_groups.append("meta") if self.valves.INCLUDE_GOOGLE: selected_groups.append("google") if self.valves.INCLUDE_FAVORITE: selected_groups.append("favorite") if self.valves.INCLUDE_FAST: selected_groups.append("fast") if self.valves.INCLUDE_POWERFUL: selected_groups.append("powerful") if self.valves.INCLUDE_VISION: selected_groups.append("vision") if self.valves.INCLUDE_CODE: selected_groups.append("codeing") # If no groups selected, default to "recommended" if not selected_groups: self._debug("No groups selected, defaulting to 'recommended'") selected_groups.append("recommended") self._debug(f"Selected groups: {', '.join(selected_groups)}") # Filter based on the selected groups filtered_models = self._filter_models_by_group(all_models, selected_groups) # If no models were found after filtering, but we had models before filtering, # default to showing all models to avoid an empty list if not filtered_models and all_models: self._debug("No models found after filtering - showing all models instead") return all_models return filtered_models def pipe(self, body: dict) -> Union[str, Generator[str, None, None], Iterator[str]]: """Process the chat request.""" try: # Get the original model ID original_model_id = body["model"] self._debug(f"Original model ID: {original_model_id}") # Format the model ID correctly model = self._format_model_id(original_model_id) self._debug(f"Formatted model ID: {model}") messages = body["messages"] stream = body.get("stream", False) # Add required parameters for the OpenRouter API payload = { "model": model, "messages": messages, "stream": stream, "temperature": body.get("temperature", 0.7), } # Add reasoning support if self.valves.INCLUDE_REASONING: payload["include_reasoning"] = True # Copy other parameters from body if present for param in [ "max_tokens", "top_p", "frequency_penalty", "presence_penalty", ]: if param in body: payload[param] = body[param] if DEBUG: self._debug(f"Outgoing payload: {json.dumps(payload, indent=2)}") url = f"{self.valves.OPENROUTER_API_BASE_URL}/chat/completions" if stream: return self._stream_response(url, self._get_headers(), payload) return self._get_completion(url, self._get_headers(), payload) except KeyError as e: error_msg = f"Missing required key in body: {e}" self._debug(error_msg) return f"Error: {error_msg}" except Exception as e: self._debug(f"Error in pipe method: {e}") return f"Error: {e}" def _stream_response( self, url: str, headers: dict, payload: dict, retries: int = 5 ) -> Generator[str, None, None]: """Stream response from OpenRouter with reasoning token and citation support.""" self._debug(f"Streaming response from {url}") for attempt in range(retries): try: response = requests.post( url, json=payload, headers=headers, stream=True, timeout=90 ) response.raise_for_status() # State tracking in_reasoning_state = False # True if we've output the opening <think> tag latest_citations = [] # The latest citations list for line in response.iter_lines(): if not line: continue line_text = line.decode("utf-8") if not line_text.startswith("data: "): continue elif line_text == "data: [DONE]": if latest_citations: citation_list = [f"1. {l}" for l in latest_citations] citation_list_str = "\n".join(citation_list) yield f"\n\n---\nCitations:\n{citation_list_str}" yield "data: [DONE]\n\n" return try: chunk = json.loads(line_text[6:]) if "choices" in chunk and chunk["choices"]: choice = chunk["choices"][0] citations = chunk.get("citations", []) # Update the citation list if citations: latest_citations = citations # Check for reasoning tokens reasoning_text = None if "delta" in choice and "reasoning" in choice["delta"]: reasoning_text = choice["delta"]["reasoning"] elif ( "message" in choice and "reasoning" in choice["message"] ): reasoning_text = choice["message"]["reasoning"] # Check for content tokens content_text = None if "delta" in choice and "content" in choice["delta"]: content_text = choice["delta"]["content"] elif "message" in choice and "content" in choice["message"]: content_text = choice["message"]["content"] # Handle reasoning tokens if reasoning_text: # If first reasoning token, output opening tag if not in_reasoning_state: yield "<think>\n" in_reasoning_state = True # Output the reasoning token with citations yield _insert_citations(reasoning_text, citations) # Handle content tokens if content_text: # If transitioning from reasoning to content, close the thinking tag if in_reasoning_state: yield "\n</think>\n\n" in_reasoning_state = False # Output the content with citations yield _insert_citations(content_text, citations) except json.JSONDecodeError: self._debug(f"Failed to decode stream line: {line}") continue # If we're still in reasoning state at the end, close the tag if in_reasoning_state: yield "\n</think>\n\n" return except requests.RequestException as e: status_code = getattr(response, "status_code", None) if status_code == 429 and attempt < retries - 1: wait_time = 2**attempt self._debug( f"Rate limited (429). Retrying after {wait_time} seconds..." ) time.sleep(wait_time) else: self._debug(f"Stream request failed: {e}") yield f"Error: {str(e)}" if status_code: yield f" Status code: {status_code}" try: error_text = response.text yield f" Response: {error_text}" except: pass return def _get_completion( self, url: str, headers: dict, payload: dict, retries: int = 3 ) -> str: """Handle non-streaming response from OpenRouter with reasoning and citation support.""" for attempt in range(retries): try: self._debug( f"Attempt {attempt + 1}: Sending completion request to {url}" ) response = requests.post(url, json=payload, headers=headers, timeout=90) res = self._handle_response(response) # Check if we have choices in the response if not res.get("choices") or len(res["choices"]) == 0: return "" # Extract content and reasoning if present choice = res["choices"][0] message = choice.get("message", {}) # Get citations if present citations = res.get("citations", []) # Extract content and reasoning content = message.get("content", "") reasoning = message.get("reasoning", "") self._debug( f"Found reasoning: {bool(reasoning)} ({len(reasoning)} chars)" ) self._debug(f"Found content: {bool(content)} ({len(content)} chars)") # Apply citations to the content and reasoning if citations and content: content = _insert_citations(content, citations) if citations and reasoning: reasoning = _insert_citations(reasoning, citations) # If we have both reasoning and content if reasoning and content: result = f"<think>\n{reasoning}\n</think>\n\n{content}" elif reasoning: # Only reasoning, no content (unusual) result = f"<think>\n{reasoning}\n</think>\n\n" elif content: # Only content, no reasoning result = content else: result = "" # Add citations at the end if present if citations: citation_list = [f"1. {l}" for l in citations] citation_list_str = "\n".join(citation_list) result += f"\n\n---\nCitations:\n{citation_list_str}" return result except requests.RequestException as e: status_code = getattr(response, "status_code", None) if status_code == 429 and attempt < retries - 1: wait_time = 2**attempt self._debug( f"Rate limited (429). Retrying after {wait_time} seconds..." ) time.sleep(wait_time) else: error_msg = f"Completion request failed: {e}" if status_code: error_msg += f" Status code: {status_code}" try: error_msg += f" Response: {response.text}" except: pass self._debug(error_msg) return f"Error: {error_msg}"