We're Hiring!
Whitepaper
Docs
Sign In
@scitlab
·
8 months ago
·
8 months ago
function
doccape_anonymization
Get
Last Updated
8 months ago
Created
8 months ago
Function
filter
v1.0
Name
doccape_anonymization
Downloads
31+
Saves
0+
Description
A filter PII for redacting (anonymize or pseuodymize) messages before sending them to the LLM using the Doccape API.
Function Code
Show
""" title: Doccape PII Filter author: Scitlab author_url: https://scitlab.de/ date: 2025-06-03 funding_url: https://github.com/open-webui version: 1.0 license: MIT desciption: A filter PII for redacting (anonymize or pseuodymize) messages before sending them to the LLM using the Doccape API. requirements: - pydantic - requests """ import logging import os import warnings from typing import List, Optional import requests from pydantic import BaseModel, Field logging.basicConfig(level=logging.INFO) # Uncomment to show SSL verification warnings if needed warnings.filterwarnings("ignore", message="Unverified HTTPS request") class Filter: # Valves: Configuration options for the filter class Valves(BaseModel): DOCCAPE_API_KEY: str = Field( default=os.getenv("DOCCAPE_API_KEY", ""), description="Your Doccape API key. Check out the documentation at https://api.doccape.de/api-docs on how to create one.", ) DOCCAPE_API_BASE_URL: str = Field( default=os.getenv("DOCCAPE_API_BASE_URL", "https://api.doccape.de"), description="Base URL for accessing Doccape API endpoints, e.g. https://api.doccape.de.", ) PII_CLASSES: List[str] = Field( default=["PER", "STR", "LOC", "ORG"], description="List of detected PII classes, separated by commas without spaces (e.g., PER,STR,LOC,ORG). Check out the documentation at https://api.doccape.de/api-docs to see which classes are supported.", ) USE_PSEUDONYMIZATION: bool = Field( default=True, description="Pseudonymize: `True`, Anonymize: `False`. Pseudonymization replaces PII with a unique identifier (e.g. `PER-1`), while anonymization replaces PII with a generic placeholder (e.g. `****`).", ) VERIFY_SSL: bool = Field( default=True, description="Set to `True` to verify SSL certificates when making API requests.", ) def __init__(self): # Initialize valves self.valves = self.Valves() def call_doccape_api(self, text: str) -> str: """Call the Doccape API to pseudonymize the text.""" url = f"{self.valves.DOCCAPE_API_BASE_URL}/api/external/v1/text" headers = { "Authorization": f"Bearer {self.valves.DOCCAPE_API_KEY}", "Content-Type": "application/json", } data = { "text": text, "usePseudonyms": self.valves.USE_PSEUDONYMIZATION, "textLabels": self.valves.PII_CLASSES, } try: response = requests.post( url, headers=headers, json=data, verify=self.valves.VERIFY_SSL ) response.raise_for_status() return response.json().get("pseudonymizedText", text) except requests.RequestException as e: logging.error(f"API Call Failed: {e}") raise e def inlet(self, body: dict, __user__: Optional[dict] = None, **kwargs) -> dict: """Process all the user messages and anonymize PII using the Doccape API.""" # Extract user text messages text_messages = [] for message in body["messages"]: if message["role"] == "user" and isinstance(message["content"], str): text_messages.append(message["content"]) if message["role"] == "user" and isinstance(message["content"], list): for item in message["content"]: if item["type"] == "text": text_messages.append(item["text"]) if any(text_messages): # Use a rare separator to avoid conflicts separator = "\n\x1c||SEP||\x1c\n" # Merge all inputs into a single text (files come first) merged_text = separator.join(text_messages) # Call Doccape API anonymized_text = self.call_doccape_api(merged_text) # Split back into files and messages anonymized_messages = anonymized_text.split(separator) # Overwrite user messages in order for message in body["messages"]: if message["role"] == "user" and isinstance(message["content"], str): message["content"] = anonymized_messages.pop(0) if message["role"] == "user" and isinstance(message["content"], list): for item in message["content"]: if item["type"] == "text": item["text"] = anonymized_messages.pop(0) return body def outlet(self, body: dict, __user__: Optional[dict] = None) -> None: pass # Testing the filter if __name__ == "__main__": filter = Filter() # Test the filter with a sample message body = { "messages": [ {"role": "user", "content": "Hello, my name is John Doe."}, {"role": "bot", "content": "Hi PER-1, how can I help you today?"}, ] } result = filter.inlet(body) print(result) expected_message = "Hello, my name is PER-1." message = result["messages"][0]["content"] if message == expected_message: print("Filter test passed!") else: print(f"Filter test failed!\n") print(f"Message: '{message}'") print(f"Expected: '{expected_message}")
Sponsored by Open WebUI Inc.
We are hiring!
Shape the way humanity engages with
intelligence
.