Skip to content

Commit 4ff541e

Browse files
authored
Merge PR #59 - Switch Markdown parser to Mistune for speed
#59
2 parents 456ce90 + 1fbed3d commit 4ff541e

File tree

9 files changed

+247
-35
lines changed

9 files changed

+247
-35
lines changed

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ python:
33
- '3.5'
44
cache:
55
- pip
6+
install:
7+
- pip install -r requirements.txt pytest
68
script:
9+
- python -m pytest test_naucse
710
- python -m naucse freeze
811
deploy:
912
provider: script

lessons/beginners/cmdline/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
```
1010
{%- endfilter -%}
1111
</div>
12-
{% endfor %}
12+
{%- endfor -%}
1313
</div>
1414
{%- endmacro -%}
1515

lessons/beginners/install-editor/_linux_base.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,14 @@
55
Na Linuxu se {{ editor_name }} instaluje jako ostatní programy:
66

77
Fedora
8-
: `sudo dnf install {{ editor_cmd }}`
8+
: ```console
9+
$ sudo dnf install {{ editor_cmd }}
10+
```
911

1012
Ubuntu
11-
: `sudo apt-get install {{ editor_cmd }}`
13+
: ```console
14+
$ sudo apt-get install {{ editor_cmd }}
15+
```
1216

1317
Používáš-li jiný Linux, předpokládám že programy instalovat umíš :)
1418

lessons/beginners/install-editor/index.md

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,18 @@ Obarvování
5353

5454
Pro ilustraci, takhle může v editoru vypadat kousek kódu:
5555

56-
:::python
57-
1 @app.route('/courses/<course:course>/')
58-
2 def course_page(course):
59-
3 try:
60-
4 return render_template(
61-
5 'course.html',
62-
6 course=course,
63-
7 plan=course.sessions,
64-
8 )
65-
9 except TemplateNotFound:
66-
10 abort(404)
56+
```python
57+
1 @app.route('/courses/<course:course>/')
58+
2 def course_page(course):
59+
3 try:
60+
4 return render_template(
61+
5 'course.html',
62+
6 course=course,
63+
7 plan=course.sessions,
64+
8 )
65+
9 except TemplateNotFound:
66+
10 abort(404)
67+
```
6768

6869

6970
## Volba a nastavení editoru

lessons/beginners/variables/index.md

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,18 +155,21 @@ pro teď to budou kouzelná zaříkadla:
155155

156156
* Chceš-li načíst **řetězec**, použij:
157157

158-
:::python
159-
promenna = input('Zadej řetězec: ')
158+
```python
159+
promenna = input('Zadej řetězec: ')
160+
```
160161

161162
* Chceš-li načíst **celé číslo**, použij:
162163

163-
:::python
164-
promenna = int(input('Zadej číslo: '))
164+
```python
165+
promenna = int(input('Zadej číslo: '))
166+
```
165167

166168
* Chceš-li načíst **desetinné číslo**, použij:
167169

168-
:::python
169-
promenna = float(input('Zadej číslo: '))
170+
```python
171+
promenna = float(input('Zadej číslo: '))
172+
```
170173

171174
Místo řetězce `'Zadej …'` se dá napsat i jiná výzva.
172175
A výsledek se samozřejmě dá uložit i do jiné proměnné než `promenna`.

naucse/markdown_util.py

Lines changed: 117 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,131 @@
11
from textwrap import dedent
2+
import re
23

3-
from markdown import Markdown
4-
from markdown.extensions.admonition import AdmonitionExtension
5-
from markdown.extensions.codehilite import CodeHiliteExtension
6-
from markdown.extensions.fenced_code import FencedCodeExtension
7-
from markdown.extensions.def_list import DefListExtension
4+
import mistune
85
from jinja2 import Markup
6+
import pygments
7+
import pygments.lexers
8+
import pygments.formatters.html
9+
10+
pygments_formatter = pygments.formatters.html.HtmlFormatter(
11+
cssclass='codehilite'
12+
)
13+
14+
15+
class BlockGrammar(mistune.BlockGrammar):
16+
admonition = re.compile(r'^!!! *(\S+) *"([^"]*)"\n((\n| .*)+)')
17+
deflist = re.compile(r'^(([^\n: ][^\n]*\n)+)((:( {0,3})[^\n]*\n)( \5[^\n]*\n|\n)+)')
18+
19+
20+
class BlockLexer(mistune.BlockLexer):
21+
grammar_class = BlockGrammar
22+
23+
default_rules = [
24+
'admonition',
25+
'deflist',
26+
] + mistune.BlockLexer.default_rules
27+
28+
def parse_admonition(self, m):
29+
self.tokens.append({
30+
'type': 'admonition_start',
31+
'name': m.group(1),
32+
'title': m.group(2),
33+
})
34+
self.parse(dedent(m.group(3)))
35+
self.tokens.append({
36+
'type': 'admonition_end',
37+
})
38+
39+
def parse_deflist(self, m):
40+
self.tokens.append({
41+
'type': 'deflist_term_start',
42+
})
43+
self.parse(dedent(m.group(1)))
44+
self.tokens.append({
45+
'type': 'deflist_term_end',
46+
})
47+
self.tokens.append({
48+
'type': 'deflist_def_start',
49+
})
50+
self.parse(dedent(' ' + m.group(3)[1:]))
51+
self.tokens.append({
52+
'type': 'deflist_def_end',
53+
})
54+
55+
56+
class Renderer(mistune.Renderer):
57+
def admonition(self, name, content):
58+
return '<div class="admonition {}">{}</div>'.format(name, content)
59+
60+
def block_code(self, code, lang):
61+
if lang is not None:
62+
lang = lang.strip()
63+
if not lang or lang == 'plain':
64+
escaped = mistune.escape(code)
65+
return '<div class="codehilite"><pre><code>{}</code></pre></div>'.format(escaped)
66+
lexer = pygments.lexers.get_lexer_by_name(lang)
67+
return pygments.highlight(code, lexer, pygments_formatter).strip()
68+
69+
def deflist(self, items):
70+
tags = {'term': 'dt', 'def': 'dd'}
71+
return '<dl>\n{}</dl>'.format(''.join(
72+
'<{tag}>{text}</{tag}>'.format(tag=tags[type], text=text)
73+
for type, text in items
74+
))
75+
76+
77+
class Markdown(mistune.Markdown):
78+
def output_admonition(self):
79+
name = self.token['name']
80+
body = self.renderer.placeholder()
81+
if self.token['title']:
82+
template = '<p class="admonition-title">{}</p>\n'
83+
body += template.format(self.token['title'])
84+
while self.pop()['type'] != 'admonition_end':
85+
body += self.tok()
86+
return self.renderer.admonition(name, body)
87+
88+
def output_deflist_term(self):
89+
items = [['term', self.renderer.placeholder()]]
90+
while True:
91+
end_token = 'deflist_{}_end'.format(items[-1][0])
92+
while self.pop()['type'] not in (end_token, 'paragraph'):
93+
items[-1][1] += self.tok()
94+
if self.token['type'] == 'paragraph':
95+
if items[-1][0] == 'term':
96+
items.append(['term', self.renderer.placeholder()])
97+
items[-1][1] += self.token['text']
98+
else:
99+
items[-1][1] += self.output_paragraph()
100+
elif self.peek()['type'] == 'deflist_term_start':
101+
self.pop()
102+
items.append(['term', self.renderer.placeholder()])
103+
elif self.peek()['type'] == 'deflist_def_start':
104+
self.pop()
105+
items.append(['def', self.renderer.placeholder()])
106+
else:
107+
break
108+
return self.renderer.deflist(items)
109+
9110

10111
markdown = Markdown(
11-
extensions=[
12-
AdmonitionExtension(),
13-
FencedCodeExtension(),
14-
CodeHiliteExtension(guess_lang=False),
15-
DefListExtension(),
16-
],
112+
escape = False,
113+
block = BlockLexer(),
114+
renderer = Renderer(),
17115
)
18116

117+
19118
def convert_markdown(text, *, inline=False):
119+
# Workaround for https://github.com/lepture/mistune/issues/125
120+
NBSP_REPLACER = '\uf8ff'
121+
text = text.replace('\N{NO-BREAK SPACE}', NBSP_REPLACER)
122+
20123
text = dedent(text)
21-
result = Markup(markdown.convert(text))
124+
result = Markup(markdown(text))
22125

23126
if inline and result.startswith('<p>') and result.endswith('</p>'):
24127
result = result[len('<p>'):-len('</p>')]
25128

129+
# Workaround for https://github.com/lepture/mistune/issues/125
130+
result = result.replace(NBSP_REPLACER, '\N{NO-BREAK SPACE}')
26131
return result

naucse/templates.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,11 @@ def __str__(self):
7676

7777
@template_function
7878
def figure(img, alt):
79-
t = Markup('''
79+
t = Markup(''.join(p.strip() for p in """
8080
<span class="figure">
8181
<a href="{img}">
8282
<img src="{img}" alt="{alt}">
8383
</a>
8484
</span>
85-
''')
85+
""".splitlines()))
8686
return t.strip().format(img=img, alt=alt)

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ PyYAML
22
flask
33
elsa>=0.1.2
44
frozen-flask
5-
markdown
5+
mistune
66
jinja2
77
werkzeug
88
pygments

test_naucse/test_markdown.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
from textwrap import dedent
2+
3+
from naucse.markdown_util import convert_markdown
4+
5+
6+
def test_markdown_admonition():
7+
src = dedent("""
8+
!!! note ""
9+
Foo *bar*
10+
""")
11+
expected = '<div class="admonition note"><p>Foo <em>bar</em></p>\n</div>'
12+
assert convert_markdown(src) == expected
13+
14+
15+
def test_markdown_admonition_paragraphs():
16+
src = dedent("""
17+
!!! note ""
18+
19+
Foo *fi*
20+
21+
fo
22+
23+
fum
24+
""")
25+
expected = dedent("""
26+
<div class="admonition note"><p>Foo <em>fi</em></p>
27+
<p>fo</p>
28+
<p>fum</p>
29+
</div>
30+
""").strip()
31+
assert convert_markdown(src) == expected
32+
33+
34+
def test_markdown_admonition_name():
35+
src = dedent("""
36+
!!! note "NB!"
37+
38+
foo
39+
""")
40+
expected = dedent("""
41+
<div class="admonition note"><p class="admonition-title">NB!</p>
42+
<p>foo</p>
43+
</div>
44+
""").strip()
45+
assert convert_markdown(src) == expected
46+
47+
48+
def test_markdown_definition_list():
49+
src = dedent("""
50+
Bla Bla
51+
52+
The Term
53+
: Its Definition
54+
55+
More Text
56+
""")
57+
expected = dedent("""
58+
<p>Bla Bla</p>
59+
<dl>
60+
<dt></dt><dt>The Term</dt><dd><p>Its Definition</p>
61+
</dd></dl><p>More Text</p>
62+
""").strip()
63+
assert convert_markdown(src).strip() == expected
64+
65+
66+
def test_markdown_definition_list_advanced():
67+
src = dedent("""
68+
Bla Bla
69+
70+
The Term
71+
: Its Definition
72+
More Definition
73+
74+
Even More
75+
76+
Another Term
77+
: Define this
78+
79+
More Text
80+
""")
81+
expected = dedent("""
82+
<p>Bla Bla</p>
83+
<dl>
84+
<dt></dt><dt>The Term</dt><dd><p>Its Definition
85+
More Definition</p>
86+
<p>Even More</p>
87+
</dd><dt></dt><dt>Another Term</dt><dd><p>Define this</p>
88+
</dd></dl><p>More Text</p>
89+
""").strip()
90+
print(convert_markdown(src))
91+
assert convert_markdown(src).strip() == expected
92+
93+
94+
def test_markdown_keeps_nbsp():
95+
text = 'Some text\N{NO-BREAK SPACE}more text'
96+
assert convert_markdown(text).strip() == '<p>{}</p>'.format(text)

0 commit comments

Comments
 (0)