Doc Builder MD PDF v1.7.3 • Open WebUI Community

""" title: Doc_Builder_MD_PDF author: Lumen for Nefhis version: 1.7.3 notes: Flow reducido (Fuente+Nombre). Branding y limpieza via (User)Valves persistentes. Limpieza por-mensaje en 'chat completo' para evitar capturas cross-bloque. MD+PDF; code auto-fencing; GFM tables; lang="es"; secure links. icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0naHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmcnIHZpZXdCb3g9JzAgMCAyNCAyNCcgZmlsbD0nbm9uZScgc3Ryb2tlLWxpbmVjYXA9J3JvdW5kJyBzdHJva2UtbGluZWpvaW49J3JvdW5kJz48ZyBzdHJva2U9J2N1cnJlbnRDb2xvcicgc3Ryb2tlLXdpZHRoPScxLjYnPjxwYXRoIGQ9J00xNCAySDdhMiAyIDAgMCAwLTIgMnYxNmEyIDIgMCAwIDAgMiAyaDEwYTIgMiAwIDAgMCAyLTJWOXonLz48cGF0aCBkPSdNMTQgMnY3aDcnLz48L2c+PGcgc3Ryb2tlPScjMTRCOEE2JyBzdHJva2Utd2lkdGg9JzEuNic+PHBhdGggZD0nTTEyIDExdjYnLz48cGF0aCBkPSdNOS41IDE0LjVMMTIgMTdsMi41LTIuNScvPjwvZz48L3N2Zz4= """ # SPDX-License-Identifier: MIT # Copyright (c) 2025 José Antonio Iranzo Bazco import os, re, datetime, base64, json from typing import Literal, Optional from pydantic import BaseModel, Field class Action: # ---------- Valves (global) + UserValves (por-usuario) ---------- class Valves(BaseModel): branding: Literal["none", "teal", "burgundy", "gray"] = Field( default="none", description="Banda de marca en PDF." ) tag_cleaning: Literal["none", "known", "custom"] = Field( default="none", description="Limpieza de etiquetas/artefactos." ) custom_tokens: str = Field( default="tool,image,iframe,script,html_file_id,video_file_id", description="Lista (coma/; / salto de línea) usada si tag_cleaning='custom'.", ) class UserValves(BaseModel): branding: Optional[Literal["none", "teal", "burgundy", "gray"]] = None tag_cleaning: Optional[Literal["none", "known", "custom"]] = None custom_tokens: Optional[str] = None def __init__(self): self.valves = self.Valves() # admin defaults self.out_md = os.path.join("exports", "md") self.out_pdf = os.path.join("exports", "pdf") for d in (self.out_md, self.out_pdf): try: os.makedirs(d, exist_ok=True) except Exception: pass # ---------- utils ---------- def _nowstamp(self, date_only: bool = False): fmt = "%Y-%m-%d" if date_only else "%Y-%m-%d %H:%M" return datetime.datetime.now().strftime(fmt) def _default_basename(self): return f"document-{datetime.datetime.now().strftime('%Y%m%d-%H%M')}" def _sanitize_name(self, name: str, ext: str): # Base sugerida si viene vacío name = (name or "").strip() or self._default_basename() # 1) Quitar controles y separadores problemáticos name = re.sub(r"[\x00-\x1F\x7F]+", " ", name) name = re.sub(r"[\\/:*?\"<>|]+", "-", name) # 2) Normalizar espacios (colapsar) y espacios alrededor de los puntos name = re.sub(r"\s{2,}", " ", name).strip() name = re.sub(r"\s*\.\s*", ".", name) # <-- clave: '. md' -> '.md' # 3) Evitar dotfiles y guiones/underscores iniciales "residuales" name = name.lstrip(".-_ ").strip() # 4) Quitar extensión si es la pedida o una conocida, aunque tenga espacios antes/después KNOWN_EXTS = ( "md", "markdown", "pdf", "txt", "html", "htm", "doc", "docx", "rtf", ) base = name for e in (ext,) + KNOWN_EXTS: # elimina '.[ext]' con posibles espacios residuales al final new_base = re.sub( rf"\.\s*{re.escape(e)}\s*$", "", base, flags=re.IGNORECASE ) if new_base != base: base = new_base break # 5) Limpiar puntos/espacios al final por si quedó algo colgando base = base.rstrip(" .") # 6) Longitud y fallback base = (base or self._default_basename())[:120] return f"{base}.{ext}" # --- aplanado y preservación de código --- def _emit_code_fence(self, txt: str, lang: str = ""): lang = (lang or "").strip() if not re.fullmatch(r"[A-Za-z0-9_+.\-#]*", lang): lang = "" return f"```{lang}\n{txt or ''}\n```" def _flat_content(self, content): if content is None: return "" if isinstance(content, str): return content if isinstance(content, list): parts = [] for it in content: if isinstance(it, dict): typ = (it.get("type") or "").lower() lang = it.get("language") or it.get("lang") or "" code_txt = ( it.get("text") or it.get("content") or it.get("code") or (isinstance(it.get("data"), dict) and it["data"].get("code")) or "" ) if typ in { "code", "codeblock", "code_block", "pre", "markdown_code", "code-cell", }: parts.append(self._emit_code_fence(code_txt, lang)) else: parts.append(it.get("text") or it.get("content") or "") elif isinstance(it, str): parts.append(it) return "\n".join(parts) if isinstance(content, dict): typ = (content.get("type") or "").lower() lang = content.get("language") or content.get("lang") or "" t = ( content.get("text") or content.get("content") or content.get("code") or "" ) if typ in { "code", "codeblock", "code_block", "pre", "markdown_code", "code-cell", }: return self._emit_code_fence(t, lang) return t return str(content) def _collect_chat_text(self, messages): lines = [] for m in messages: role = m.get("role") if role not in ("user", "assistant", "system"): continue txt = self._flat_content(m.get("content")).strip() if not txt: continue if role == "user": lines.append(f"**User:**\n\n{txt}\n") elif role == "assistant": lines.append(f"**Assistant:**\n\n{txt}\n") else: lines.append(f"\n") return "\n---\n".join(lines).strip() # --- NUEVO: chat completo con limpieza por-mensaje (evita capturas cross-bloque) def _collect_chat_text_cleaned( self, messages, tag_cleaning: str, custom_tokens: str ): lines = [] for m in messages: role = m.get("role") if role not in ("user", "assistant", "system"): continue txt = self._flat_content(m.get("content")).strip() if not txt: continue # Limpieza por mensaje if tag_cleaning == "known": txt = self._clean_known_artifacts(txt) elif tag_cleaning == "custom": txt = self._clean_by_custom_tokens(txt, custom_tokens) # Auto-fence por mensaje (idempotente si ya hay ```) txt = self._auto_fence_code_in_text(txt) if role == "user": lines.append(f"**User:**\n\n{txt}\n") elif role == "assistant": lines.append(f"**Assistant:**\n\n{txt}\n") else: lines.append(f"\n") return "\n---\n".join(lines).strip() def _auto_fence_code_in_text(self, text: str) -> str: """ Inserta ``` ... ``` en bloques que claramente son código cuando vienen sin fencing. Heurística conservadora: - No actúa si ya hay ``` - Detecta secuencias de >=3 líneas "codey". - NO considera headings Markdown (#, ##, ###...) como "codey". """ if "```" in (text or ""): return text or "" lines = (text or "").splitlines() out, buf = [], [] def is_md_heading(s: str) -> bool: # heading válido: 0-3 espacios, 1-6 #, espacio y algo de texto return re.match(r"^\s{0,3}#{1,6}\s+\S", s) is not None def codey(l: str) -> bool: # indentado / tabulaciones if re.match(r"^\s{4,}|\t", l): return True # si es cabecera MD, no es "codey" if is_md_heading(l): return False # comentarios tipo shell/python (# ...) permitidos como "codey" if l.lstrip().startswith("#"): return True # patrones típicos de código if re.match( r"^\s*(import|from|def|class|try|except|with|for|while|if|elif|else|return|print\(|@)", l, ): return True if l.rstrip().endswith(":"): return True return False in_seq = False for l in lines: if codey(l): buf.append(l) in_seq = True else: if in_seq: if len(buf) >= 3: out.append("```") out.extend(buf) out.append("```") else: out.extend(buf) buf, in_seq = [], False out.append(l) if in_seq: if len(buf) >= 3: out.append("```") out.extend(buf) out.append("```") else: out.extend(buf) return "\n".join(out) def _clean_known_artifacts(self, text: str) -> str: if not text: return text or "" out = text out = re.sub(r"", "", out, flags=re.DOTALL) out = re.sub( r"<\s*details\b[^>]*>.*?</\s*details\s*>", "", out, flags=re.DOTALL | re.IGNORECASE, ) out = re.sub( r"<\s*summary\b[^>]*>.*?</\s*summary\s*>", "", out, flags=re.DOTALL | re.IGNORECASE, ) out = re.sub( r"<\s*files\b[^>]*>.*?</\s*files\s*>", "", out, flags=re.DOTALL | re.IGNORECASE, ) out = re.sub( r"<\s*(?:tool|image)\b[^>]*>.*?</\s*(?:tool|image)\s*>", "", out, flags=re.DOTALL | re.IGNORECASE, ) out = re.sub( r"<\s*(?:tool|image)\b[^>]*\/?>", "", out, flags=re.DOTALL | re.IGNORECASE ) out = re.sub(r"\[\s*(?:tool|image)\s*:[^\]]*\]", "", out, flags=re.IGNORECASE) out = re.sub(r"\[\s*/?\s*(?:tool|image)\s*\]", "", out, flags=re.IGNORECASE) out = re.sub( r"\{\{\s*(?:HTML|VIDEO|IMAGE|AUDIO|PDF|TEXT|CSV|MD|DOC|FILE|ASSET)_[A-Z0-9_:\-]+?\s*\}\}", "", out, flags=re.IGNORECASE, ) out = re.sub( r"<\s*iframe\b[^>]*>.*?</\s*iframe\s*>", "", out, flags=re.DOTALL | re.IGNORECASE, ) out = re.sub( r"<\s*script\b[^>]*>.*?</\s*script\s*>", "", out, flags=re.DOTALL | re.IGNORECASE, ) out = re.sub(r"[ \t]+\n", "\n", out) out = re.sub(r"\n{3,}", "\n\n", out) return out def _clean_by_custom_tokens(self, text: str, tokens_csv: str) -> str: """ Limpieza agresiva por tokens (custom): - <tok ...> ... </tok> → elimina todo el bloque - [tok] ... [/tok] → elimina todo el bloque - [tok:algo] ... [/tok] → elimina todo el bloque - [tok:algo] (standalone) → elimina la marca - <tok .../> y <tok ...> y </tok> → elimina - {{....tok....}} → elimina placeholder con ese token - 'comment' en tokens → elimina  - Limpieza final de [] y [/] """ if not text or not tokens_csv: return text or "" out = text tokens = [t.strip() for t in re.split(r"[,\n;]+", tokens_csv) if t.strip()] if not tokens: return out for tok in tokens: # 0) Comentarios HTML si el token es 'comment' if tok.lower() == "comment": out = re.sub(r"", "", out, flags=re.DOTALL) continue # no hace falta seguir con 'comment' # 1) Bloque HTML pareado: <tok ...> ... </tok> out = re.sub( rf"<\s*{re.escape(tok)}\b[^>]*>.*?</\s*{re.escape(tok)}\s*>", "", out, flags=re.DOTALL | re.IGNORECASE, ) # 2) Bloque BBCode simple: [tok] ... [/tok] out = re.sub( rf"\[\s*{re.escape(tok)}\s*\].*?\[\s*/\s*{re.escape(tok)}\s*\]", "", out, flags=re.DOTALL | re.IGNORECASE, ) # 3) Bloque BBCode con atributo: [tok:algo] ... [/tok] out = re.sub( rf"\[\s*{re.escape(tok)}\s*:[^\]]*\].*?\[\s*/\s*{re.escape(tok)}\s*\]", "", out, flags=re.DOTALL | re.IGNORECASE, ) # 4) Marca [tok:algo] standalone (sin cierre) out = re.sub( rf"\[\s*{re.escape(tok)}\s*:[^\]]*\]", "", out, flags=re.DOTALL | re.IGNORECASE, ) # 5) Etiquetas HTML sueltas: <tok ...>, </tok>, <tok .../> out = re.sub( rf"<\s*/?\s*{re.escape(tok)}\b[^>]*>", "", out, flags=re.DOTALL | re.IGNORECASE, ) # 6) Placeholders con el token dentro out = re.sub( rf"\{{\{{[^}}]*{re.escape(tok)}[^}}]*\}}\}}", "", out, flags=re.DOTALL | re.IGNORECASE, ) # 7) Palabra suelta 'tok' (segura, no toca otras) out = re.sub( rf"(?<!\w){re.escape(tok)}(?!\w)", "", out, flags=re.IGNORECASE, ) # 8) Limpiezas finales de restos out = re.sub(r"\[\s*/?\s*\]", "", out) # quita [] y [/] out = re.sub(r"[ \t]+\n", "\n", out) out = re.sub(r"\n{3,}", "\n\n", out) return out def _has_very_long_code_line(self, src: str, limit: int = 120) -> bool: lines = (src or "").splitlines() in_fence = False for line in lines: if line.strip().startswith("```"): in_fence = not in_fence continue visual_len = line.replace("\t", " ").__len__() if in_fence: if visual_len > limit: return True continue if line.startswith("\t") or re.match(r"^ {4,}", line) or "\t" in line: if visual_len > limit: return True return False def _escape_html(self, s): return s.replace("&", "&").replace("<", "<").replace(">", ">") def _slug(self, text: str): s = text.lower() s = re.sub(r"[áàä]", "a", s) s = re.sub(r"[éèë]", "e", s) s = re.sub(r"[íìï]", "i", s) s = re.sub(r"[óòö]", "o", s) s = re.sub(r"[úùü]", "u", s) s = s.replace("ñ", "n") s = re.sub(r"[^a-z0-9]+", "-", s).strip("-") return s or "sec" def _inline_md(self, s): if s is None: return "" if not isinstance(s, str): s = str(s) code_spans = [] def _code_repl(m): code_spans.append(self._escape_html(m.group(1))) return f"@@CODE{len(code_spans)-1}@@" s = re.sub(r"`([^`\n]+)`", _code_repl, s) anchors = [] def _link_repl(m): text = m.group(1) href = m.group(2) text_esc = self._escape_html(text) href_attr = self._escape_html(href.replace('"', "%22")) anchors.append( f'<a href="{href_attr}" target="_blank" rel="noopener noreferrer">{text_esc}</a>' ) return f"@@A{len(anchors)-1}@@" s = re.sub(r"\[([^\]]+)\]$(https?://[^\s)]+)$", _link_repl, s) s = self._escape_html(s) s = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", s) s = re.sub(r"(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)", r"<em>\1</em>", s) for i, a in enumerate(anchors): s = s.replace(f"@@A{i}@@", a) for i, code_html in enumerate(code_spans): s = s.replace(f"@@CODE{i}@@", f"<code>{code_html}</code>") return s def _render_code_block(self, code_text: str): lines = (code_text or "").split("\n") out = ['<ol class="codeblock">'] for ln in lines: safe = self._escape_html(ln) if ln else "" out.append(f"<li><code>{safe}</code></li>") out.append("</ol>") return "\n".join(out) def _md_to_html_and_toc(self, text): lines = (text or "").splitlines() # Elimina BOM si viene pegado en el primer carácter if lines and lines[0].startswith("\ufeff"): lines[0] = lines[0].lstrip("\ufeff") n = len(lines) html, toc = [], [] in_ul = in_ol = in_code = False code = [] anchors_used = set() def close_lists(): nonlocal in_ul, in_ol if in_ul: html.append("</ul>") in_ul = False if in_ol: html.append("</ol>") in_ol = False def split_table_row(row: str): row = row.strip() if row.startswith("|"): row = row[1:] if row.endswith("|"): row = row[:-1] return [c.strip() for c in row.split("|")] def is_table_sep(row: str): row = row.strip() if "|" not in row: return False if row.startswith("|"): row = row[1:] if row.endswith("|"): row = row[:-1] parts = [p.strip() for p in row.split("|")] if not parts or any(not p for p in parts): return False for p in parts: if not re.match(r"^:?-{2,}:?$", p): return False return True def is_table_header(line: str): if "|" not in line: return False cells = split_table_row(line) return len(cells) >= 2 def parse_align(sep_row: str, count: int): r = sep_row.strip() if r.startswith("|"): r = r[1:] if r.endswith("|"): r = r[:-1] parts = [p.strip() for p in r.split("|")] align = [] for p in parts: left = p.startswith(":") right = p.endswith(":") if left and right: align.append("center") elif right: align.append("right") else: align.append("left") while len(align) < count: align.append("left") return align i = 0 while i < n: raw = lines[i].rstrip("\r") line = raw if line.strip().startswith("```"): if in_code: html.append(self._render_code_block("\n".join(code))) code = [] in_code = False else: close_lists() in_code = True i += 1 continue if in_code: code.append(raw) i += 1 continue if re.match(r"^(\t| {4,})", line): close_lists() indented = [] while i < n and ( lines[i].startswith("\t") or re.match(r"^ {4,}", lines[i]) ): ind_line = lines[i] ind_line = ( ind_line[1:] if ind_line.startswith("\t") else (ind_line[4:] if len(ind_line) >= 4 else ind_line.lstrip()) ) indented.append(ind_line.rstrip("\r")) i += 1 html.append(self._render_code_block("\n".join(indented))) continue if re.fullmatch(r"\s*([-*_]){3,}\s*", line): close_lists() html.append("<hr>") i += 1 continue m = re.match(r"^\s*(#{1,6})\s+(.*)$", line) if m: close_lists() lvl = len(m.group(1)) content = self._inline_md(m.group(2).strip()) if lvl in (2, 3): plain = re.sub(r"<[^>]+>", "", content) slug = base = self._slug(plain) j = 1 while slug in anchors_used: j += 1 slug = f"{base}-{j}" anchors_used.add(slug) html.append(f'<h{lvl} id="{slug}">{content}</h{lvl}>') toc.append((lvl, plain, slug)) else: html.append(f"<h{lvl}>{content}</h{lvl}>") i += 1 continue def split_row_ok(candidate: str): c = candidate.strip() if c == "" or c.startswith("#") or c.startswith("```"): return False return "|" in c if is_table_header(line) and (i + 1 < n) and is_table_sep(lines[i + 1]): close_lists() header_cells = split_table_row(line) aligns = parse_align(lines[i + 1], len(header_cells)) i += 2 rows = [] while i < n and split_row_ok(lines[i]): rows.append(split_table_row(lines[i].strip())) i += 1 t = [] t.append('<table class="mdtbl" lang="es">') col_count = len(header_cells) widths = [round(100 / col_count, 2)] * col_count if col_count == 3: widths = [22, 39, 39] elif col_count == 2: widths = [30, 70] if widths: t.append("<colgroup>") for w in widths: t.append(f'<col style="width:{w}%">') t.append("</colgroup>") t.append("<thead><tr>") for idx, c in enumerate(header_cells): a = aligns[idx] if idx < len(aligns) else "left" t.append(f'<th style="text-align:{a}">{self._inline_md(c)}</th>') t.append("</tr></thead><tbody>") for row in rows: t.append("<tr>") for idx, c in enumerate(row): a = aligns[idx] if idx < len(aligns) else "left" t.append( f'<td style="text-align:{a}">{self._inline_md(c)}</td>' ) t.append("</tr>") t.append("</tbody></table>") html.append("\n".join(t)) continue if re.match(r"^\s*\d+\.\s+.+", line): if in_ul: html.append("</ul>") in_ul = False if not in_ol: html.append("<ol>") in_ol = True item = re.sub(r"^\s*\d+\.\s+", "", line, 1).strip() html.append(f"<li>{self._inline_md(item)}</li>") elif re.match(r"^\s*[-*]\s+.+", line): if in_ol: html.append("</ol>") in_ol = False if not in_ul: html.append("<ul>") in_ul = True item = re.sub(r"^\s*[-*]\s+", "", line, 1).strip() html.append(f"<li>{self._inline_md(item)}</li>") else: if line.strip() == "": close_lists() html.append("<br>") else: close_lists() html.append(f"<p>{self._inline_md(line)}</p>") i += 1 if in_code: html.append(self._render_code_block("\n".join(code))) if in_ul: html.append("</ul>") if in_ol: html.append("</ol>") return "\n".join(html), toc def _pdf_html(self, title: str, content_md: str, brand_css: str = "") -> str: body, toc = self._md_to_html_and_toc(content_md) toc_html = "" if toc: items = [] for lvl, text, anchor in toc: cls = "lvl2" if lvl == 2 else "lvl3" items.append( f'<li class="{cls}"><a href="#{anchor}">{self._escape_html(text)}</a></li>' ) toc_html = f""" <div class="toc"> <h2>Content</h2> <ul>{''.join(items)}</ul> <hr> </div> """ fs_pt = 11.0 lh = 1.35 base_limit = 120 scaled_limit = max(90, min(160, round(base_limit * (12.0 / fs_pt)))) wrap_css = "" if self._has_very_long_code_line(content_md, scaled_limit): wrap_css = """ ol.codeblock > li { white-space: pre-wrap; } ol.codeblock > li > code { white-space: inherit; overflow-wrap: break-word; word-break: normal; hyphens: none; } """ return f"""<!doctype html> <html lang="es"><head> <meta charset="utf-8"> <title>{self._escape_html(title)}</title> <style> @page {{ size: A4; margin: 18mm; }} :root {{ --text:#111; --muted:#555; --rule:#ddd; --fs:{fs_pt:.0f}pt; --lh:{lh}; --codefs: calc(var(--fs) * 0.82); }} html,body {{ height:100%; }} body {{ font-family: -apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,Helvetica,Arial,'Apple Color Emoji','Segoe UI Emoji'; color:var(--text); line-height:var(--lh); font-size:var(--fs); }} h1,h2,h3,h4,h5,h6 {{ margin:0 0 .5rem 0; line-height:1.25; }} h1 {{ font-size: calc(var(--fs) * 1.64); }} h2 {{ font-size: calc(var(--fs) * 1.33); margin-top:.75rem; }} h3 {{ font-size: calc(var(--fs) * 1.18); margin-top:.5rem; }} p {{ margin:.25rem 0; }} ul,ol {{ margin:.25rem 0 .5rem 1.25rem; }} li {{ margin:.1rem 0; }} hr {{ border:0; border-top:1px solid var(--rule); margin:.6rem 0; }} .meta {{ color:var(--muted); margin-bottom:.6rem; font-size: calc(var(--fs) * 0.84); }} .doc {{ max-width:720px; margin:0 auto; }} .toc h2 {{ margin-top:.25rem; }} .toc ul {{ list-style:none; padding-left:0; margin:.25rem 0 .5rem 0; }} .toc li.lvl2 {{ margin-left:0; }} .toc li.lvl3 {{ margin-left:1.0rem; }} .toc a {{ text-decoration:none; color: #0b57d0; }} table.mdtbl {{ width:100%; border-collapse:collapse; margin:.6rem 0 1rem; font-size: calc(var(--fs) * 0.92); }} table.mdtbl th, table.mdtbl td {{ border:1px solid var(--rule); padding:6px 8px; vertical-align:top; overflow-wrap:anywhere; }} table.mdtbl thead th {{ background:#f3f4f6; font-weight:600; }} table.mdtbl tbody tr:nth-child(even) {{ background:#fbfbfb; }} table.mdtbl tr, table.mdtbl thead, table.mdtbl tbody {{ page-break-inside:avoid; }} ol.codeblock {{ list-style:none; margin:.6rem 0 1rem; padding:.6rem .8rem; background:#f6f6f6; border:1px solid var(--rule); border-radius:6px; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; font-size: var(--codefs); tab-size:4; -moz-tab-size:4; }} ol.codeblock > li {{ white-space: pre; line-height:1.5; page-break-inside:avoid; break-inside:avoid; }} ol.codeblock > li > code {{ background:none; border:0; padding:0; margin:0; font:inherit; font-variant-ligatures:none; hyphens:none; }} code {{ background:#f6f6f6; padding:.05rem .3rem; border-radius:4px; border:1px solid #eee; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; font-size: var(--codefs); font-variant-ligatures:none; }} {wrap_css} </style> {brand_css} </head> <body> <div class="doc"> <h1>{self._escape_html(title)}</h1> <div class="meta">{self._nowstamp(date_only=True)}</div> <hr> {toc_html} {body} </div> <script> (function () {{ function once() {{ if (window.__printed__) return; window.__printed__ = true; setTimeout(() => {{ try {{ window.print(); }} catch (_) {{}} }}, 250); }} if (document.readyState === 'complete') once(); else window.addEventListener('load', once, {{ once: true }}); }})(); </script> </body></html>""" async def _force_download_md(self, __event_call__, filename, md_text: str): b64 = base64.b64encode(md_text.encode("utf-8")).decode("ascii") js = f""" (function(){{ try {{ const a = document.createElement('a'); const b64 = {json.dumps(b64)}; const fname = {json.dumps(filename)}; a.href = "data:text/markdown;charset=utf-8;base64," + b64; a.download = fname; document.body.appendChild(a); a.click(); setTimeout(()=>a.remove(),0); return "ok"; }} catch(e) {{ return "err:"+(e&&e.message?e.message:'unknown'); }} }})()""" try: return await __event_call__({"type": "execute", "data": {"code": js}}) except Exception: return "err:execute" async def _open_print_window(self, __event_call__, html_doc: str): b64 = base64.b64encode(html_doc.encode("utf-8")).decode("ascii") js = f""" (function(){{ try {{ const win = window.open("about:blank","_blank"); if (!win) return "err:popup-blocked"; try {{ win.opener = null; }} catch(_) {{}} const bin = atob({json.dumps(b64)}); const bytes = new Uint8Array(bin.length); for (let i=0;i<bin.length;i++) bytes[i] = bin.charCodeAt(i); const html = new TextDecoder('utf-8').decode(bytes); let wrote = false; try {{ win.document.open(); win.document.write(html); win.document.close(); wrote = true; }} catch(_){{ }} if (!wrote) {{ try {{ const blob = new Blob([html], {{type: "text/html;charset=utf-8"}}); const url = URL.createObjectURL(blob); win.location.href = url; }} catch (_err2) {{ return "err:cannot-write"; }} }} return "ok"; }} catch(e) {{ return "err:"+(e&&e.message?e.message:'unknown'); }} }})()""" try: return await __event_call__({"type": "execute", "data": {"code": js}}) except Exception: return "err:execute" def _compute_brand_css(self, preset: str): preset = (preset or "").lower() if preset == "none": return """ <style> :root { --brand: #0b57d0; --bar-w: 0mm; --bar-pad: 0mm; } @media screen { body::before { display:none; } body { padding-left: 0; } } @media print { .doc { border-left: 0; padding-left: 0; } } </style> """ elif preset == "burgundy": brand = "#7a1e2e" elif preset == "gray": brand = "#4a5568" else: brand = "#00637c" # teal por defecto return f""" <style> :root {{ --brand: {brand}; --bar-w: 3mm; --bar-pad: 7mm; }} @media screen {{ body::before {{ content:""; position: fixed; top:0; left:0; bottom:0; width: var(--bar-w); background: var(--brand); }} body {{ padding-left: calc(var(--bar-w) + var(--bar-pad)); }} }} @media print {{ body::before {{ display:none; }} body {{ padding-left: 0; }} .doc {{ box-sizing: border-box; border-left: var(--bar-w) solid var(--brand); padding-left: calc(var(--bar-w) + var(--bar-pad)); width: 100%; max-width: none; margin: 0; }} * {{ -webkit-print-color-adjust: exact; print-color-adjust: exact; }} }} .toc a {{ color: var(--brand); }} </style> """ # --- merge de settings por-usuario → global --- def _resolve_settings(self, __user__): uv = None try: if isinstance(__user__, dict): uv = __user__.get("valves") except Exception: uv = None def pick(k, default): return ( uv.get(k) if isinstance(uv, dict) else getattr(uv, k, None) ) or default branding = pick("branding", self.valves.branding) tag_cleaning = pick("tag_cleaning", self.valves.tag_cleaning) custom_tokens = pick("custom_tokens", self.valves.custom_tokens) if branding not in ("none", "teal", "burgundy", "gray"): branding = "none" if tag_cleaning not in ("none", "known", "custom"): tag_cleaning = "none" custom_tokens = custom_tokens or "" return branding, tag_cleaning, custom_tokens # ---------- main ---------- async def action( self, body: dict, __user__=None, __event_emitter__=None, __event_call__=None ): try: messages = body.get("messages", []) or [] sel = ( await self._prompt( __event_call__, "Doc Builder (MD+PDF) — Source (Required)", "Required → 1) assistant, 2) user, 3) chat, 4) paste", ) ).strip()[:1] if not sel: # Cancelación silenciosa (toast, no ensucia el chat/export) try: await __event_emitter__( { "type": "notification", "data": { "type": "warning", "content": "Cancelled: source is required.", }, } ) except Exception: pass return # Lee settings una vez (necesarios para el caso 'chat completo') branding, tag_cleaning, custom_tokens = self._resolve_settings(__user__) if sel == "1": last = next( (m for m in reversed(messages) if m.get("role") == "assistant"), None, ) src = ( self._flat_content((last or {}).get("content")) or "(No recent assistant message.)" ) # Limpieza y fencing para 1/2/4 if tag_cleaning == "known": src = self._clean_known_artifacts(src) elif tag_cleaning == "custom": src = self._clean_by_custom_tokens(src, custom_tokens) src = self._auto_fence_code_in_text(src) elif sel == "2": last = next( (m for m in reversed(messages) if m.get("role") == "user"), None ) src = ( self._flat_content((last or {}).get("content")) or "(No recent user message.)" ) if tag_cleaning == "known": src = self._clean_known_artifacts(src) elif tag_cleaning == "custom": src = self._clean_by_custom_tokens(src, custom_tokens) src = self._auto_fence_code_in_text(src) elif sel == "3": # Chat completo: limpieza por-mensaje (evita capturas cross-bloque) src = ( self._collect_chat_text_cleaned( messages, tag_cleaning, custom_tokens ) or "(Empty conversation or no text.)" ) elif sel == "4": src = await self._prompt( __event_call__, "Paste content", "Type or paste here…" ) if tag_cleaning == "known": src = self._clean_known_artifacts(src) elif tag_cleaning == "custom": src = self._clean_by_custom_tokens(src, custom_tokens) src = self._auto_fence_code_in_text(src) else: try: await __event_emitter__( { "type": "notification", "data": {"type": "warning", "content": "Invalid option."}, } ) except Exception: pass return # Nombre base suggested = self._default_basename() base = ( await self._prompt( __event_call__, "Base name (without extension)", suggested ) ).strip() or suggested md_name = self._sanitize_name(base, "md") pdf_name = self._sanitize_name(base, "pdf") title = os.path.splitext(os.path.basename(pdf_name))[0] # Build documents now = self._nowstamp(date_only=True) md_text = f"# {title}\n\n*{now}*\n\n---\n\n{src}\n" # Copias locales (opcionales) try: with open( os.path.join(self.out_md, md_name), "w", encoding="utf-8" ) as f: f.write(md_text) except Exception: pass try: with open( os.path.join(self.out_pdf, f"{title}.txt"), "w", encoding="utf-8" ) as f: f.write(src) except Exception: pass # MD (descarga directa) await self._force_download_md(__event_call__, md_name, md_text) try: await __event_emitter__( { "type": "notification", "data": { "type": "success", "content": f"📥 MD ready: {md_name}", }, } ) except Exception: pass # PDF (ventana de impresión) brand_css = self._compute_brand_css(branding) html_doc = self._pdf_html(title, src, brand_css) res = await self._open_print_window(__event_call__, html_doc) if isinstance(res, str) and res.startswith("err"): try: await __event_emitter__( { "type": "notification", "data": { "type": "warning", "content": "⚠️ Could not open the print window. Check your popup blocker.", }, } ) except Exception: pass return try: await __event_emitter__( { "type": "notification", "data": { "type": "success", "content": "🖨️ PDF prepared. Use 'Save as PDF'.", }, } ) except Exception: pass except Exception as e: await __event_emitter__( { "type": "message", "data": {"content": f"⚠️ Doc Builder error (MD+PDF): {e}"}, } ) # --- prompts --- async def _prompt(self, __event_call__, title, placeholder=""): try: r = await __event_call__( { "type": "input", "data": {"title": title, "message": "", "placeholder": placeholder}, } ) except Exception: return "" d = r.get("data") if isinstance(r, dict) else r return d if isinstance(d, str) else "" """ ----- LICENSE (MIT) ----- MIT License Copyright (c) 2025 José Antonio Iranzo Bazco Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----- END LICENSE ----- """