rendercomment.py 4.81 KB
Newer Older
1
#!/usr/bin/env python3
2

3
import re
4
import pygit2
5
6
import sys
import bleach
7
import markdown
8

9
import aurweb.config
10
11
import aurweb.db

12
13
14
repo_path = aurweb.config.get('serve', 'repo-path')
commit_uri = aurweb.config.get('options', 'commit_uri')

15

16
17
18
19
20
class LinkifyExtension(markdown.extensions.Extension):
    """
    Turn URLs into links, even without explicit markdown.
    Do not linkify URLs in code blocks.
    """
21

22
23
24
25
    # Captures http(s) and ftp URLs until the first non URL-ish character.
    # Excludes trailing punctuation.
    _urlre = (r'(\b(?:https?|ftp):\/\/[\w\/\#~:.?+=&%@!\-;,]+?'
              r'(?=[.:?\-;,]*(?:[^\w\/\#~:.?+=&%@!\-;,]|$)))')
26
27

    def extendMarkdown(self, md, md_globals):
28
        processor = markdown.inlinepatterns.AutolinkInlineProcessor(self._urlre, md)
29
30
        # Register it right after the default <>-link processor (priority 120).
        md.inlinePatterns.register(processor, 'linkify', 119)
31
32


33
34
35
36
37
38
39
class FlysprayLinksInlineProcessor(markdown.inlinepatterns.InlineProcessor):
    """
    Turn Flyspray task references like FS#1234 into links to bugs.archlinux.org.

    The pattern's capture group 0 is the text of the link and group 1 is the
    Flyspray task ID.
    """
40

41
42
43
44
45
    def handleMatch(self, m, data):
        el = markdown.util.etree.Element('a')
        el.set('href', f'https://bugs.archlinux.org/task/{m.group(1)}')
        el.text = markdown.util.AtomicString(m.group(0))
        return el, m.start(0), m.end(0)
46
47
48
49


class FlysprayLinksExtension(markdown.extensions.Extension):
    def extendMarkdown(self, md, md_globals):
50
        processor = FlysprayLinksInlineProcessor(r'\bFS#(\d+)\b',md)
51
        md.inlinePatterns.register(processor, 'flyspray-links', 118)
52
53


54
55
56
57
58
59
60
61
62
class GitCommitsInlineProcessor(markdown.inlinepatterns.InlineProcessor):
    """
    Turn Git hashes like f7f5152be5ab into links to AUR's cgit.

    Only commit references that do exist are linkified. Hashes are shortened to
    shorter non-ambiguous prefixes. Only hashes with at least 7 digits are
    considered.
    """

63
64
65
66
    _repo = pygit2.Repository(repo_path)

    def __init__(self, md, head):
        self._head = head
67
        super().__init__(r'\b([0-9a-f]{7,40})\b', md)
68

69
70
    def handleMatch(self, m, data):
        oid = m.group(1)
71
        if oid not in self._repo:
72
73
            # Unkwown OID; preserve the orginal text.
            return None, None, None
74
75
76
77
78
79
80

        prefixlen = 12
        while prefixlen < 40:
            if oid[:prefixlen] in self._repo:
                break
            prefixlen += 1

81
82
83
84
        el = markdown.util.etree.Element('a')
        el.set('href', commit_uri % (self._head, oid[:prefixlen]))
        el.text = markdown.util.AtomicString(oid[:prefixlen])
        return el, m.start(0), m.end(0)
85
86
87
88
89
90
91
92
93
94


class GitCommitsExtension(markdown.extensions.Extension):
    _head = None

    def __init__(self, head):
        self._head = head
        super(markdown.extensions.Extension, self).__init__()

    def extendMarkdown(self, md, md_globals):
95
        processor = GitCommitsInlineProcessor(md, self._head)
96
        md.inlinePatterns.register(processor, 'git-commits', 117)
97
98


99
100
101
102
103
104
105
106
107
108
109
class HeadingTreeprocessor(markdown.treeprocessors.Treeprocessor):
    def run(self, doc):
        for elem in doc:
            if elem.tag == 'h1':
                elem.tag = 'h5'
            elif elem.tag in ['h2', 'h3', 'h4', 'h5']:
                elem.tag = 'h6'


class HeadingExtension(markdown.extensions.Extension):
    def extendMarkdown(self, md, md_globals):
110
111
        # Priority doesn't matter since we don't conflict with other processors.
        md.treeprocessors.register(HeadingTreeprocessor(md), 'heading', 30)
112
113


114
def get_comment(conn, commentid):
115
116
117
118
119
    cur = conn.execute('SELECT PackageComments.Comments, PackageBases.Name '
                       'FROM PackageComments INNER JOIN PackageBases '
                       'ON PackageBases.ID = PackageComments.PackageBaseID '
                       'WHERE PackageComments.ID = ?', [commentid])
    return cur.fetchone()
120
121
122
123
124
125
126
127
128
129
130
131


def save_rendered_comment(conn, commentid, html):
    conn.execute('UPDATE PackageComments SET RenderedComment = ? WHERE ID = ?',
                 [html, commentid])


def main():
    commentid = int(sys.argv[1])

    conn = aurweb.db.Connection()

132
    text, pkgbase = get_comment(conn, commentid)
133
134
    html = markdown.markdown(text, extensions=['fenced_code',
                                               LinkifyExtension(),
135
                                               FlysprayLinksExtension(),
136
137
                                               GitCommitsExtension(pkgbase),
                                               HeadingExtension()])
138
139
    allowed_tags = (bleach.sanitizer.ALLOWED_TAGS +
                    ['p', 'pre', 'h4', 'h5', 'h6', 'br', 'hr'])
140
    html = bleach.clean(html, tags=allowed_tags)
141
142
143
144
145
146
147
148
    save_rendered_comment(conn, commentid, html)

    conn.commit()
    conn.close()


if __name__ == '__main__':
    main()