rendercomment.py 4.77 KB
Newer Older
1
#!/usr/bin/env python3
2
3

import sys
4

5
from urllib.parse import quote_plus
6
7
from xml.etree.ElementTree import Element

8
import bleach
9
import markdown
10
import pygit2
11

12
import aurweb.config
13

14
15
from aurweb import db, logging, util
from aurweb.models import PackageComment
16
17

logger = logging.get_logger(__name__)
18

19

20
21
22
23
24
class LinkifyExtension(markdown.extensions.Extension):
    """
    Turn URLs into links, even without explicit markdown.
    Do not linkify URLs in code blocks.
    """
25

26
27
28
29
    # Captures http(s) and ftp URLs until the first non URL-ish character.
    # Excludes trailing punctuation.
    _urlre = (r'(\b(?:https?|ftp):\/\/[\w\/\#~:.?+=&%@!\-;,]+?'
              r'(?=[.:?\-;,]*(?:[^\w\/\#~:.?+=&%@!\-;,]|$)))')
30

31
    def extendMarkdown(self, md):
32
        processor = markdown.inlinepatterns.AutolinkInlineProcessor(self._urlre, md)
33
34
        # Register it right after the default <>-link processor (priority 120).
        md.inlinePatterns.register(processor, 'linkify', 119)
35
36


37
38
39
40
41
42
43
class FlysprayLinksInlineProcessor(markdown.inlinepatterns.InlineProcessor):
    """
    Turn Flyspray task references like FS#1234 into links to bugs.archlinux.org.

    The pattern's capture group 0 is the text of the link and group 1 is the
    Flyspray task ID.
    """
44

45
    def handleMatch(self, m, data):
46
        el = Element('a')
47
48
        el.set('href', f'https://bugs.archlinux.org/task/{m.group(1)}')
        el.text = markdown.util.AtomicString(m.group(0))
49
        return (el, m.start(0), m.end(0))
50
51
52


class FlysprayLinksExtension(markdown.extensions.Extension):
53
    def extendMarkdown(self, md):
54
        processor = FlysprayLinksInlineProcessor(r'\bFS#(\d+)\b', md)
55
        md.inlinePatterns.register(processor, 'flyspray-links', 118)
56
57


58
59
60
61
62
63
64
65
66
class GitCommitsInlineProcessor(markdown.inlinepatterns.InlineProcessor):
    """
    Turn Git hashes like f7f5152be5ab into links to AUR's cgit.

    Only commit references that do exist are linkified. Hashes are shortened to
    shorter non-ambiguous prefixes. Only hashes with at least 7 digits are
    considered.
    """

67
    def __init__(self, md, head):
68
        repo_path = aurweb.config.get('serve', 'repo-path')
69
        self._repo = pygit2.Repository(repo_path)
70
        self._head = head
71
        super().__init__(r'\b([0-9a-f]{7,40})\b', md)
72

73
74
    def handleMatch(self, m, data):
        oid = m.group(1)
75
        if oid not in self._repo:
76
            # Unknown OID; preserve the orginal text.
77
            return (None, None, None)
78

79
        el = Element('a')
80
81
        commit_uri = aurweb.config.get("options", "commit_uri")
        prefixlen = util.git_search(self._repo, oid)
82
83
84
85
        el.set('href', commit_uri % (
            quote_plus(self._head),
            quote_plus(oid[:prefixlen])
        ))
86
        el.text = markdown.util.AtomicString(oid[:prefixlen])
87
        return (el, m.start(0), m.end(0))
88
89
90
91
92
93
94
95
96


class GitCommitsExtension(markdown.extensions.Extension):
    _head = None

    def __init__(self, head):
        self._head = head
        super(markdown.extensions.Extension, self).__init__()

97
98
99
100
101
102
    def extendMarkdown(self, md):
        try:
            processor = GitCommitsInlineProcessor(md, self._head)
            md.inlinePatterns.register(processor, 'git-commits', 117)
        except pygit2.GitError:
            logger.error(f"No git repository found for '{self._head}'.")
103
104


105
106
107
108
109
110
111
112
113
114
class HeadingTreeprocessor(markdown.treeprocessors.Treeprocessor):
    def run(self, doc):
        for elem in doc:
            if elem.tag == 'h1':
                elem.tag = 'h5'
            elif elem.tag in ['h2', 'h3', 'h4', 'h5']:
                elem.tag = 'h6'


class HeadingExtension(markdown.extensions.Extension):
115
    def extendMarkdown(self, md):
116
117
        # Priority doesn't matter since we don't conflict with other processors.
        md.treeprocessors.register(HeadingTreeprocessor(md), 'heading', 30)
118
119


120
121
122
def save_rendered_comment(comment: PackageComment, html: str):
    with db.begin():
        comment.RenderedComment = html
123
124


125
126
def update_comment_render_fastapi(comment: PackageComment) -> None:
    update_comment_render(comment)
127

128

129
130
131
def update_comment_render(comment: PackageComment) -> None:
    text = comment.Comments
    pkgbasename = comment.PackageBase.Name
132

133
134
135
136
    html = markdown.markdown(text, extensions=[
        'fenced_code',
        LinkifyExtension(),
        FlysprayLinksExtension(),
137
        GitCommitsExtension(pkgbasename),
138
139
140
141
142
        HeadingExtension()
    ])

    allowed_tags = (bleach.sanitizer.ALLOWED_TAGS
                    + ['p', 'pre', 'h4', 'h5', 'h6', 'br', 'hr'])
143
    html = bleach.clean(html, tags=allowed_tags)
144
145
    save_rendered_comment(comment, html)
    db.refresh(comment)
146
147


148
def main():
149
150
151
152
153
154
    db.get_engine()
    comment_id = int(sys.argv[1])
    comment = db.query(PackageComment).filter(
        PackageComment.ID == comment_id
    ).first()
    update_comment_render(comment)
155
156


157
158
if __name__ == '__main__':
    main()