"""
title: Google Scholar Research Tool
author: OpenWebUI Community
author_url: https://github.com/open-webui/open-webui
description: Advanced Google Scholar research tool for academic paper search and analysis
version: 1.0.0
license: MIT
required_open_webui_version: 0.4.0
requirements: google-search-results>=2.4.2
"""
import asyncio
import json
from datetime import datetime
from typing import List, Dict, Optional, Any
from pydantic import BaseModel, Field
from serpapi import GoogleScholarSearch
class Tools:
def __init__(self):
"""Initialize the Google Scholar Research Tool."""
self.citation = False # Disable automatic citations to use custom ones
self.valves = self.Valves()
class Valves(BaseModel):
serpapi_key: str = Field(
default="",
description="Your SerpApi API key (get one at https://serpapi.com/users/sign_up)"
)
max_results: int = Field(
default=10,
description="Maximum number of results to return per search (1-20)"
)
default_year_from: int = Field(
default=2020,
description="Default starting year for searches"
)
default_year_to: int = Field(
default=2024,
description="Default ending year for searches"
)
class UserValves(BaseModel):
max_results_per_search: int = Field(
default=10,
description="Maximum results per search for this user (1-20)"
)
preferred_language: str = Field(
default="en",
description="Preferred language for search results (e.g., 'en', 'es', 'fr')"
)
def search_papers(
self,
query: str,
year_from: Optional[int] = None,
year_to: Optional[int] = None,
max_results: Optional[int] = None,
include_patents: bool = False,
sort_by_date: bool = False,
__user__: Optional[Dict] = None,
__event_emitter__=None,
) -> str:
"""
Search for academic papers on Google Scholar.
:param query: Search query (can include author:, source:, etc.)
:param year_from: Starting year for search results
:param year_to: Ending year for search results
:param max_results: Maximum number of results to return
:param include_patents: Whether to include patents in results
:param sort_by_date: Sort results by date instead of relevance
:return: Formatted search results with citations
"""
return asyncio.run(self._search_papers_async(
query, year_from, year_to, max_results, include_patents,
sort_by_date, __user__, __event_emitter__
))
async def _search_papers_async(
self,
query: str,
year_from: Optional[int],
year_to: Optional[int],
max_results: Optional[int],
include_patents: bool,
sort_by_date: bool,
__user__: Optional[Dict],
__event_emitter__,
) -> str:
"""Async implementation of paper search."""
try:
# Validate API key
if not hasattr(self, 'valves') or not self.valves.serpapi_key:
return "❌ Error: SerpApi API key not configured. Please set your API key in the tool settings."
# Emit status
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": "🔍 Searching Google Scholar...", "done": False}
})
# Set up search parameters
user_valves = __user__.get("valves", {}) if __user__ else {}
# Determine result limit
if max_results is None:
max_results = min(
user_valves.get("max_results_per_search", self.valves.max_results),
self.valves.max_results
)
max_results = min(max_results, 20) # SerpApi limit
# Set up year range
if year_from is None:
year_from = self.valves.default_year_from
if year_to is None:
year_to = self.valves.default_year_to
# Configure search parameters
search_params = {
"q": query,
"api_key": self.valves.serpapi_key,
"num": max_results,
"hl": user_valves.get("preferred_language", "en"),
}
# Add year range if specified
if year_from:
search_params["as_ylo"] = year_from
if year_to:
search_params["as_yhi"] = year_to
# Configure filtering
if include_patents:
search_params["as_sdt"] = "7" # Include patents
else:
search_params["as_sdt"] = "0" # Exclude patents
# Configure sorting
if sort_by_date:
search_params["scisbd"] = "1" # Sort by date
# Perform search
search = GoogleScholarSearch(search_params)
results = search.get_dict()
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": "📄 Processing search results...", "done": False}
})
# Process results
if "organic_results" not in results:
return f"❌ No results found for query: '{query}'"
papers = results["organic_results"]
total_results = len(papers)
# Format results
formatted_results = []
formatted_results.append(f"# 🎓 Google Scholar Search Results")
formatted_results.append(f"**Query:** {query}")
formatted_results.append(f"**Results:** {total_results} papers found")
if year_from or year_to:
year_range = f"{year_from or 'Any'} - {year_to or 'Any'}"
formatted_results.append(f"**Year Range:** {year_range}")
formatted_results.append("")
# Process each paper
for i, paper in enumerate(papers, 1):
title = paper.get("title", "Unknown Title")
authors = paper.get("publication_info", {}).get("authors", [])
# Format authors
if authors:
if len(authors) > 3:
author_str = f"{', '.join(authors[:3])}, et al."
else:
author_str = ', '.join(authors)
else:
author_str = "Unknown Authors"
# Extract publication info
pub_info = paper.get("publication_info", {})
journal = pub_info.get("summary", "")
year = pub_info.get("summary", "").split()[-1] if pub_info.get("summary") else ""
# Extract metrics
cited_by = paper.get("inline_links", {}).get("cited_by", {})
citation_count = cited_by.get("total", 0) if cited_by else 0
# Get snippet/abstract
snippet = paper.get("snippet", "")
# Format paper entry
formatted_results.append(f"## {i}. {title}")
formatted_results.append(f"**Authors:** {author_str}")
if journal:
formatted_results.append(f"**Publication:** {journal}")
if citation_count > 0:
formatted_results.append(f"**Citations:** {citation_count}")
if snippet:
formatted_results.append(f"**Abstract:** {snippet}")
# Add link if available
link = paper.get("link")
if link:
formatted_results.append(f"**Link:** [View Paper]({link})")
formatted_results.append("")
# Emit citation for each paper
if __event_emitter__:
citation_text = f"Title: {title}\nAuthors: {author_str}"
if journal:
citation_text += f"\nPublication: {journal}"
if snippet:
citation_text += f"\nAbstract: {snippet}"
await __event_emitter__({
"type": "citation",
"data": {
"document": [citation_text],
"metadata": [{
"date_accessed": datetime.now().isoformat(),
"source": f"Google Scholar - {title}",
"authors": author_str,
"publication": journal,
"citations": citation_count
}],
"source": {"name": title, "url": link if link else ""},
},
})
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": f"✅ Found {total_results} papers", "done": True}
})
return "\n".join(formatted_results)
except Exception as e:
error_msg = f"❌ Error searching Google Scholar: {str(e)}"
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": error_msg, "done": True}
})
return error_msg
def search_by_author(
self,
author_name: str,
max_results: Optional[int] = None,
year_from: Optional[int] = None,
year_to: Optional[int] = None,
__user__: Optional[Dict] = None,
__event_emitter__=None,
) -> str:
"""
Search for papers by a specific author.
:param author_name: Name of the author to search for
:param max_results: Maximum number of results to return
:param year_from: Starting year for search results
:param year_to: Ending year for search results
:return: Formatted search results
"""
query = f'author:"{author_name}"'
return self.search_papers(
query, year_from, year_to, max_results, False, False, __user__, __event_emitter__
)
def search_citing_papers(
self,
paper_id: str,
max_results: Optional[int] = None,
__user__: Optional[Dict] = None,
__event_emitter__=None,
) -> str:
"""
Find papers that cite a specific paper.
:param paper_id: Google Scholar paper ID (from cluster parameter)
:param max_results: Maximum number of results to return
:return: Papers that cite the specified paper
"""
return asyncio.run(self._search_citing_papers_async(
paper_id, max_results, __user__, __event_emitter__
))
async def _search_citing_papers_async(
self,
paper_id: str,
max_results: Optional[int],
__user__: Optional[Dict],
__event_emitter__,
) -> str:
"""Async implementation of citing papers search."""
try:
if not hasattr(self, 'valves') or not self.valves.serpapi_key:
return "❌ Error: SerpApi API key not configured."
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": "🔍 Finding citing papers...", "done": False}
})
user_valves = __user__.get("valves", {}) if __user__ else {}
if max_results is None:
max_results = min(
user_valves.get("max_results_per_search", self.valves.max_results),
self.valves.max_results
)
search_params = {
"cites": paper_id,
"api_key": self.valves.serpapi_key,
"num": min(max_results, 20),
"hl": user_valves.get("preferred_language", "en"),
}
search = GoogleScholarSearch(search_params)
results = search.get_dict()
if "organic_results" not in results:
return f"❌ No citing papers found for ID: {paper_id}"
papers = results["organic_results"]
formatted_results = []
formatted_results.append(f"# 📚 Papers Citing This Work")
formatted_results.append(f"**Found:** {len(papers)} citing papers")
formatted_results.append("")
for i, paper in enumerate(papers, 1):
title = paper.get("title", "Unknown Title")
authors = paper.get("publication_info", {}).get("authors", [])
author_str = ', '.join(authors[:3]) + (", et al." if len(authors) > 3 else "")
formatted_results.append(f"## {i}. {title}")
formatted_results.append(f"**Authors:** {author_str}")
snippet = paper.get("snippet", "")
if snippet:
formatted_results.append(f"**Context:** {snippet}")
link = paper.get("link")
if link:
formatted_results.append(f"**Link:** [View Paper]({link})")
formatted_results.append("")
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": f"✅ Found {len(papers)} citing papers", "done": True}
})
return "\n".join(formatted_results)
except Exception as e:
error_msg = f"❌ Error finding citing papers: {str(e)}"
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": error_msg, "done": True}
})
return error_msg
def search_recent_papers(
self,
query: str,
max_results: Optional[int] = None,
__user__: Optional[Dict] = None,
__event_emitter__=None,
) -> str:
"""
Search for recent papers (last 2 years) on a topic.
:param query: Search query
:param max_results: Maximum number of results to return
:return: Recent papers on the topic
"""
current_year = datetime.now().year
return self.search_papers(
query,
year_from=current_year - 2,
year_to=current_year,
max_results=max_results,
sort_by_date=True,
__user__=__user__,
__event_emitter__=__event_emitter__
)
def get_research_summary(
self,
topic: str,
num_papers: int = 5,
__user__: Optional[Dict] = None,
__event_emitter__=None,
) -> str:
"""
Get a research summary for a specific topic with key papers.
:param topic: Research topic to summarize
:param num_papers: Number of key papers to include
:return: Research summary with key findings
"""
return asyncio.run(self._get_research_summary_async(
topic, num_papers, __user__, __event_emitter__
))
async def _get_research_summary_async(
self,
topic: str,
num_papers: int,
__user__: Optional[Dict],
__event_emitter__,
) -> str:
"""Async implementation of research summary."""
try:
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": f"📊 Generating research summary for: {topic}", "done": False}
})
# Search for highly cited papers
recent_results = await self._search_papers_async(
topic,
year_from=datetime.now().year - 3,
year_to=datetime.now().year,
max_results=num_papers,
include_patents=False,
sort_by_date=False,
__user__=__user__,
__event_emitter__=None # Don't emit duplicate events
)
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": "✅ Research summary generated", "done": True}
})
summary = f"# 📊 Research Summary: {topic}\n\n"
summary += f"**Topic:** {topic}\n"
summary += f"**Focus:** Recent developments (last 3 years)\n"
summary += f"**Key Papers:** {num_papers}\n\n"
summary += "## 🔍 Recent Key Findings\n\n"
summary += recent_results
return summary
except Exception as e:
error_msg = f"❌ Error generating research summary: {str(e)}"
if __event_emitter__:
await __event_emitter__({
"type": "status",
"data": {"description": error_msg, "done": True}
})
return error_msg