"""
title: Academic Search
author: Alex P. Leith, Ph.D.
author_url: https://github.com/apleith/owui-academic-search
description: >
Perform literature searches without any API keys:
- Google Scholar via scholarly (scraping)
- Crossref via habanero (open REST API)
- arXiv via its open API
requirements: scholarly,habanero,arxiv
version: 0.1.0
license: Apache-2.0
"""
from pydantic import BaseModel
from typing import List, Dict
class Tools:
class GSConfig(BaseModel):
query: str
num_results: int = 5
async def search_google_scholar(self, cfg: GSConfig) -> List[Dict]:
"""
Search Google Scholar via the scholarly package.
Returns a list of dicts with title, authors, year, and url.
"""
from scholarly import scholarly, ProxyGenerator
pg = ProxyGenerator()
pg.FreeProxies()
scholarly.use_proxy(pg)
search = scholarly.search_pubs(cfg.query)
results = []
for i, entry in enumerate(search):
if i >= cfg.num_results:
break
bib = entry.bib
results.append({
"title": bib.get("title"),
"authors": bib.get("author"),
"year": bib.get("year"),
"url": bib.get("url"),
})
return results
class CRConfig(BaseModel):
query: str
rows: int = 5
async def search_crossref(self, cfg: CRConfig) -> List[Dict]:
"""
Search Crossref REST API via habanero.
Returns a list of dicts with title, authors, DOI, and URL.
"""
from habanero import Crossref
cr = Crossref()
resp = cr.works(query=cfg.query, rows=cfg.rows)
items = resp.get("message", {}).get("items", [])
results = []
for itm in items:
results.append({
"title": itm.get("title", [""])[0],
"authors": [
f"{a.get('given','')} {a.get('family','')}".strip()
for a in itm.get("author", [])
],
"doi": itm.get("DOI"),
"url": itm.get("URL"),
"year": itm.get("issued", {}).get("date-parts", [[None]])[0][0],
})
return results
class ArxivConfig(BaseModel):
query: str
max_results: int = 5
async def search_arxiv(self, cfg: ArxivConfig) -> List[Dict]:
"""
Search arXiv via its open API.
Returns a list of dicts with title, authors, summary, and URL.
"""
import arxiv
search = arxiv.Search(
query=cfg.query,
max_results=cfg.max_results,
sort_by=arxiv.SortCriterion.Relevance,
)
results = []
for entry in search.results():
results.append({
"title": entry.title,
"authors": [a.name for a in entry.authors],
"summary": entry.summary,
"url": entry.entry_id,
"published": entry.published.isoformat(),
})
return results