Source code for webmentions.render._renderer

import logging
import datetime
import json
import os
import re
from pathlib import Path
from typing import Callable, Collection, Union
from urllib.parse import urlparse

from jinja2 import Environment, PackageLoader, Template, select_autoescape
from markupsafe import Markup

from .._model import Webmention

# HTML sanitisation: tags and attributes allowed in content.
_ALLOWED_TAGS = frozenset(
    {
        "a",
        "p",
        "br",
        "span",
        "strong",
        "em",
        "b",
        "i",
        "u",
        "s",
        "del",
        "blockquote",
        "pre",
        "code",
        "ul",
        "ol",
        "li",
    }
)
_ALLOWED_ATTRS = frozenset(
    {
        "href",
        "class",
        "rel",
        "translate",
        "title",
        "lang",
        "dir",
    }
)
_TAG_RE = re.compile(r"<(/?)(\w+)([^>]*)>", re.DOTALL)
_ATTR_RE = re.compile(r'(\w[\w-]*)=["\']([^"\']*)["\']')


def _sanitize_html(html: str) -> Markup:
    """
    Strip disallowed tags and attributes from *html*, returning a
    :class:`Markup` instance safe for rendering.
    """

    def _replace_tag(m: re.Match) -> str:
        slash, tag, attrs_str = m.group(1), m.group(2).lower(), m.group(3)
        if tag not in _ALLOWED_TAGS:
            return ""
        if slash:
            return f"</{tag}>"
        # Filter attributes
        safe_attrs = []
        for am in _ATTR_RE.finditer(attrs_str):
            attr_name = am.group(1).lower()
            if attr_name in _ALLOWED_ATTRS:
                # Extra check: only allow safe href schemes
                if attr_name == "href":
                    val = am.group(2).strip()
                    parsed = urlparse(val)
                    if parsed.scheme.lower() not in ("http", "https", ""):
                        continue
                safe_attrs.append(f'{am.group(1)}="{am.group(2)}"')
        attr_str = (" " + " ".join(safe_attrs)) if safe_attrs else ""
        return f"<{tag}{attr_str}>"

    return Markup(_TAG_RE.sub(_replace_tag, html))


TemplateLike = Union[str, Path, Template]

logger = logging.getLogger(__name__)


class TemplateUtils:
    """
    Collection of Jinja2 template helper functions.
    """

    @staticmethod
    def format_date(d: object) -> str:
        if not d:
            return ""
        if isinstance(d, datetime.datetime):
            return d.strftime("%b %d, %Y")
        if isinstance(d, str):
            return datetime.datetime.fromisoformat(d).strftime("%b %d, %Y")
        return str(d)

    @staticmethod
    def format_datetime(dt: object) -> str:
        if not dt:
            return ""
        if isinstance(dt, datetime.datetime):
            return dt.strftime("%b %d, %Y at %H:%M")
        if isinstance(dt, str):
            return datetime.datetime.fromisoformat(dt).strftime("%b %d, %Y at %H:%M")
        return str(dt)

    @staticmethod
    def as_url(v: object) -> str:
        if isinstance(v, str):
            return v
        if isinstance(v, dict):
            return str(v.get("url") or v.get("value") or "")
        if isinstance(v, (list, tuple)):
            return str(v[0]) if v else ""
        return ""

    @classmethod
    def hostname(cls, url: str) -> str:
        if not url:
            return ""
        return urlparse(cls.as_url(url)).hostname or ""

    @classmethod
    def safe_url(cls, url: object) -> str:
        u = cls.as_url(url).strip()
        if not u:
            return ""

        parsed = urlparse(u)
        if parsed.scheme.lower() not in {"http", "https"}:
            return ""
        if not parsed.netloc:
            return ""

        return u

    @staticmethod
    def fromjson(v: object) -> object:
        return (
            json.loads(v)
            if isinstance(v, str) and v and v.strip() and v.strip()[0] in '[{"'
            else ({} if v is None else v)
        )

    @staticmethod
    def sanitize_html(html: object) -> Markup:
        """Sanitize HTML content, returning a safe :class:`Markup` instance."""
        if not html:
            return Markup("")
        if isinstance(html, str) and html.strip().lower() == "none":
            return Markup("")
        return _sanitize_html(str(html))

    @classmethod
    def to_dict(cls) -> dict[str, Callable]:
        helpers: dict[str, Callable] = {}
        for name in dir(cls):
            if name.startswith("_"):
                continue
            value = getattr(cls, name)
            if callable(value):
                helpers[name] = value
        return helpers


[docs] class WebmentionsRenderer: """ Webmentions renderer. A utility class for rendering Webmentions into HTML through Jinja2 templates. """ def _get_template(self, template: TemplateLike | None, *, default: str) -> Template: if Environment is None: raise RuntimeError("Jinja2 is required for rendering Webmentions") env = Environment( loader=PackageLoader("webmentions", "templates"), autoescape=select_autoescape(enabled_extensions=("html", "xml")), ) template_obj = None if template is None: template_obj = env.get_template(default) elif isinstance(template, Path) or ( isinstance(template, str) and os.path.isfile(template) ): template_path = Path(template) template_obj = env.from_string(template_path.read_text(encoding="utf-8")) elif isinstance(template, str): template_obj = env.from_string(template) elif isinstance(template, Template): template_obj = template if not template_obj: raise ValueError(f"Invalid template: {template}") return template_obj def _get_markup(self, template: TemplateLike | None, *, default: str, **kwargs): template_obj = self._get_template(template, default=default) return Markup(template_obj.render(**kwargs, **TemplateUtils.to_dict()))
[docs] def render_webmention( self, webmention: Webmention, template: TemplateLike | None = None ) -> Markup: return self._get_markup(template, default="webmention.html", mention=webmention)
[docs] def render_webmentions( self, webmentions: Collection[Webmention], template: TemplateLike | None = None ) -> Markup: def _sort_key(wm: Webmention): return ( wm.created_at or wm.published or datetime.datetime.min.replace(tzinfo=datetime.timezone.utc) ) sorted_mentions = sorted( webmentions, key=_sort_key, reverse=True, ) rendered_mentions = [ self.render_webmention(mention) for mention in sorted_mentions ] counts = {"likes": 0, "reposts": 0, "replies": 0, "mentions": 0} for wm in sorted_mentions: mt = getattr(wm, "mention_type", None) if mt is not None: type_val = mt.value if hasattr(mt, "value") else str(mt) if type_val == "like": counts["likes"] += 1 elif type_val == "repost": counts["reposts"] += 1 elif type_val == "reply": counts["replies"] += 1 else: counts["mentions"] += 1 return self._get_markup( template, default="webmentions.html", mentions=rendered_mentions, counts=counts, )