Bug 731779: Integrate the Add-on SDK loader and API libraries into Firefox (uplifting from addon-sdk a16bbd5772880b578a939eeb65102bca6560d494)
This commit is contained in:
460
addon-sdk/source/python-lib/markdown/blockprocessors.py
Normal file
460
addon-sdk/source/python-lib/markdown/blockprocessors.py
Normal file
@@ -0,0 +1,460 @@
|
||||
"""
|
||||
CORE MARKDOWN BLOCKPARSER
|
||||
=============================================================================
|
||||
|
||||
This parser handles basic parsing of Markdown blocks. It doesn't concern itself
|
||||
with inline elements such as **bold** or *italics*, but rather just catches
|
||||
blocks, lists, quotes, etc.
|
||||
|
||||
The BlockParser is made up of a bunch of BlockProssors, each handling a
|
||||
different type of block. Extensions may add/replace/remove BlockProcessors
|
||||
as they need to alter how markdown blocks are parsed.
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
import markdown
|
||||
|
||||
class BlockProcessor:
|
||||
""" Base class for block processors.
|
||||
|
||||
Each subclass will provide the methods below to work with the source and
|
||||
tree. Each processor will need to define it's own ``test`` and ``run``
|
||||
methods. The ``test`` method should return True or False, to indicate
|
||||
whether the current block should be processed by this processor. If the
|
||||
test passes, the parser will call the processors ``run`` method.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, parser=None):
|
||||
self.parser = parser
|
||||
|
||||
def lastChild(self, parent):
|
||||
""" Return the last child of an etree element. """
|
||||
if len(parent):
|
||||
return parent[-1]
|
||||
else:
|
||||
return None
|
||||
|
||||
def detab(self, text):
|
||||
""" Remove a tab from the front of each line of the given text. """
|
||||
newtext = []
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
if line.startswith(' '*markdown.TAB_LENGTH):
|
||||
newtext.append(line[markdown.TAB_LENGTH:])
|
||||
elif not line.strip():
|
||||
newtext.append('')
|
||||
else:
|
||||
break
|
||||
return '\n'.join(newtext), '\n'.join(lines[len(newtext):])
|
||||
|
||||
def looseDetab(self, text, level=1):
|
||||
""" Remove a tab from front of lines but allowing dedented lines. """
|
||||
lines = text.split('\n')
|
||||
for i in range(len(lines)):
|
||||
if lines[i].startswith(' '*markdown.TAB_LENGTH*level):
|
||||
lines[i] = lines[i][markdown.TAB_LENGTH*level:]
|
||||
return '\n'.join(lines)
|
||||
|
||||
def test(self, parent, block):
|
||||
""" Test for block type. Must be overridden by subclasses.
|
||||
|
||||
As the parser loops through processors, it will call the ``test`` method
|
||||
on each to determine if the given block of text is of that type. This
|
||||
method must return a boolean ``True`` or ``False``. The actual method of
|
||||
testing is left to the needs of that particular block type. It could
|
||||
be as simple as ``block.startswith(some_string)`` or a complex regular
|
||||
expression. As the block type may be different depending on the parent
|
||||
of the block (i.e. inside a list), the parent etree element is also
|
||||
provided and may be used as part of the test.
|
||||
|
||||
Keywords:
|
||||
|
||||
* ``parent``: A etree element which will be the parent of the block.
|
||||
* ``block``: A block of text from the source which has been split at
|
||||
blank lines.
|
||||
"""
|
||||
pass
|
||||
|
||||
def run(self, parent, blocks):
|
||||
""" Run processor. Must be overridden by subclasses.
|
||||
|
||||
When the parser determines the appropriate type of a block, the parser
|
||||
will call the corresponding processor's ``run`` method. This method
|
||||
should parse the individual lines of the block and append them to
|
||||
the etree.
|
||||
|
||||
Note that both the ``parent`` and ``etree`` keywords are pointers
|
||||
to instances of the objects which should be edited in place. Each
|
||||
processor must make changes to the existing objects as there is no
|
||||
mechanism to return new/different objects to replace them.
|
||||
|
||||
This means that this method should be adding SubElements or adding text
|
||||
to the parent, and should remove (``pop``) or add (``insert``) items to
|
||||
the list of blocks.
|
||||
|
||||
Keywords:
|
||||
|
||||
* ``parent``: A etree element which is the parent of the current block.
|
||||
* ``blocks``: A list of all remaining blocks of the document.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ListIndentProcessor(BlockProcessor):
|
||||
""" Process children of list items.
|
||||
|
||||
Example:
|
||||
* a list item
|
||||
process this part
|
||||
|
||||
or this part
|
||||
|
||||
"""
|
||||
|
||||
INDENT_RE = re.compile(r'^(([ ]{%s})+)'% markdown.TAB_LENGTH)
|
||||
ITEM_TYPES = ['li']
|
||||
LIST_TYPES = ['ul', 'ol']
|
||||
|
||||
def test(self, parent, block):
|
||||
return block.startswith(' '*markdown.TAB_LENGTH) and \
|
||||
not self.parser.state.isstate('detabbed') and \
|
||||
(parent.tag in self.ITEM_TYPES or \
|
||||
(len(parent) and parent[-1] and \
|
||||
(parent[-1].tag in self.LIST_TYPES)
|
||||
)
|
||||
)
|
||||
|
||||
def run(self, parent, blocks):
|
||||
block = blocks.pop(0)
|
||||
level, sibling = self.get_level(parent, block)
|
||||
block = self.looseDetab(block, level)
|
||||
|
||||
self.parser.state.set('detabbed')
|
||||
if parent.tag in self.ITEM_TYPES:
|
||||
# The parent is already a li. Just parse the child block.
|
||||
self.parser.parseBlocks(parent, [block])
|
||||
elif sibling.tag in self.ITEM_TYPES:
|
||||
# The sibling is a li. Use it as parent.
|
||||
self.parser.parseBlocks(sibling, [block])
|
||||
elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES:
|
||||
# The parent is a list (``ol`` or ``ul``) which has children.
|
||||
# Assume the last child li is the parent of this block.
|
||||
if sibling[-1].text:
|
||||
# If the parent li has text, that text needs to be moved to a p
|
||||
block = '%s\n\n%s' % (sibling[-1].text, block)
|
||||
sibling[-1].text = ''
|
||||
self.parser.parseChunk(sibling[-1], block)
|
||||
else:
|
||||
self.create_item(sibling, block)
|
||||
self.parser.state.reset()
|
||||
|
||||
def create_item(self, parent, block):
|
||||
""" Create a new li and parse the block with it as the parent. """
|
||||
li = markdown.etree.SubElement(parent, 'li')
|
||||
self.parser.parseBlocks(li, [block])
|
||||
|
||||
def get_level(self, parent, block):
|
||||
""" Get level of indent based on list level. """
|
||||
# Get indent level
|
||||
m = self.INDENT_RE.match(block)
|
||||
if m:
|
||||
indent_level = len(m.group(1))/markdown.TAB_LENGTH
|
||||
else:
|
||||
indent_level = 0
|
||||
if self.parser.state.isstate('list'):
|
||||
# We're in a tightlist - so we already are at correct parent.
|
||||
level = 1
|
||||
else:
|
||||
# We're in a looselist - so we need to find parent.
|
||||
level = 0
|
||||
# Step through children of tree to find matching indent level.
|
||||
while indent_level > level:
|
||||
child = self.lastChild(parent)
|
||||
if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES):
|
||||
if child.tag in self.LIST_TYPES:
|
||||
level += 1
|
||||
parent = child
|
||||
else:
|
||||
# No more child levels. If we're short of indent_level,
|
||||
# we have a code block. So we stop here.
|
||||
break
|
||||
return level, parent
|
||||
|
||||
|
||||
class CodeBlockProcessor(BlockProcessor):
|
||||
""" Process code blocks. """
|
||||
|
||||
def test(self, parent, block):
|
||||
return block.startswith(' '*markdown.TAB_LENGTH)
|
||||
|
||||
def run(self, parent, blocks):
|
||||
sibling = self.lastChild(parent)
|
||||
block = blocks.pop(0)
|
||||
theRest = ''
|
||||
if sibling and sibling.tag == "pre" and len(sibling) \
|
||||
and sibling[0].tag == "code":
|
||||
# The previous block was a code block. As blank lines do not start
|
||||
# new code blocks, append this block to the previous, adding back
|
||||
# linebreaks removed from the split into a list.
|
||||
code = sibling[0]
|
||||
block, theRest = self.detab(block)
|
||||
code.text = markdown.AtomicString('%s\n%s\n' % (code.text, block.rstrip()))
|
||||
else:
|
||||
# This is a new codeblock. Create the elements and insert text.
|
||||
pre = markdown.etree.SubElement(parent, 'pre')
|
||||
code = markdown.etree.SubElement(pre, 'code')
|
||||
block, theRest = self.detab(block)
|
||||
code.text = markdown.AtomicString('%s\n' % block.rstrip())
|
||||
if theRest:
|
||||
# This block contained unindented line(s) after the first indented
|
||||
# line. Insert these lines as the first block of the master blocks
|
||||
# list for future processing.
|
||||
blocks.insert(0, theRest)
|
||||
|
||||
|
||||
class BlockQuoteProcessor(BlockProcessor):
|
||||
|
||||
RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
|
||||
|
||||
def test(self, parent, block):
|
||||
return bool(self.RE.search(block))
|
||||
|
||||
def run(self, parent, blocks):
|
||||
block = blocks.pop(0)
|
||||
m = self.RE.search(block)
|
||||
if m:
|
||||
before = block[:m.start()] # Lines before blockquote
|
||||
# Pass lines before blockquote in recursively for parsing forst.
|
||||
self.parser.parseBlocks(parent, [before])
|
||||
# Remove ``> `` from begining of each line.
|
||||
block = '\n'.join([self.clean(line) for line in
|
||||
block[m.start():].split('\n')])
|
||||
sibling = self.lastChild(parent)
|
||||
if sibling and sibling.tag == "blockquote":
|
||||
# Previous block was a blockquote so set that as this blocks parent
|
||||
quote = sibling
|
||||
else:
|
||||
# This is a new blockquote. Create a new parent element.
|
||||
quote = markdown.etree.SubElement(parent, 'blockquote')
|
||||
# Recursively parse block with blockquote as parent.
|
||||
self.parser.parseChunk(quote, block)
|
||||
|
||||
def clean(self, line):
|
||||
""" Remove ``>`` from beginning of a line. """
|
||||
m = self.RE.match(line)
|
||||
if line.strip() == ">":
|
||||
return ""
|
||||
elif m:
|
||||
return m.group(2)
|
||||
else:
|
||||
return line
|
||||
|
||||
class OListProcessor(BlockProcessor):
|
||||
""" Process ordered list blocks. """
|
||||
|
||||
TAG = 'ol'
|
||||
# Detect an item (``1. item``). ``group(1)`` contains contents of item.
|
||||
RE = re.compile(r'^[ ]{0,3}\d+\.[ ](.*)')
|
||||
# Detect items on secondary lines. they can be of either list type.
|
||||
CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ](.*)')
|
||||
# Detect indented (nested) items of either type
|
||||
INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ].*')
|
||||
|
||||
def test(self, parent, block):
|
||||
return bool(self.RE.match(block))
|
||||
|
||||
def run(self, parent, blocks):
|
||||
# Check fr multiple items in one block.
|
||||
items = self.get_items(blocks.pop(0))
|
||||
sibling = self.lastChild(parent)
|
||||
if sibling and sibling.tag in ['ol', 'ul']:
|
||||
# Previous block was a list item, so set that as parent
|
||||
lst = sibling
|
||||
# make sure previous item is in a p.
|
||||
if len(lst) and lst[-1].text and not len(lst[-1]):
|
||||
p = markdown.etree.SubElement(lst[-1], 'p')
|
||||
p.text = lst[-1].text
|
||||
lst[-1].text = ''
|
||||
# parse first block differently as it gets wrapped in a p.
|
||||
li = markdown.etree.SubElement(lst, 'li')
|
||||
self.parser.state.set('looselist')
|
||||
firstitem = items.pop(0)
|
||||
self.parser.parseBlocks(li, [firstitem])
|
||||
self.parser.state.reset()
|
||||
else:
|
||||
# This is a new list so create parent with appropriate tag.
|
||||
lst = markdown.etree.SubElement(parent, self.TAG)
|
||||
self.parser.state.set('list')
|
||||
# Loop through items in block, recursively parsing each with the
|
||||
# appropriate parent.
|
||||
for item in items:
|
||||
if item.startswith(' '*markdown.TAB_LENGTH):
|
||||
# Item is indented. Parse with last item as parent
|
||||
self.parser.parseBlocks(lst[-1], [item])
|
||||
else:
|
||||
# New item. Create li and parse with it as parent
|
||||
li = markdown.etree.SubElement(lst, 'li')
|
||||
self.parser.parseBlocks(li, [item])
|
||||
self.parser.state.reset()
|
||||
|
||||
def get_items(self, block):
|
||||
""" Break a block into list items. """
|
||||
items = []
|
||||
for line in block.split('\n'):
|
||||
m = self.CHILD_RE.match(line)
|
||||
if m:
|
||||
# This is a new item. Append
|
||||
items.append(m.group(3))
|
||||
elif self.INDENT_RE.match(line):
|
||||
# This is an indented (possibly nested) item.
|
||||
if items[-1].startswith(' '*markdown.TAB_LENGTH):
|
||||
# Previous item was indented. Append to that item.
|
||||
items[-1] = '%s\n%s' % (items[-1], line)
|
||||
else:
|
||||
items.append(line)
|
||||
else:
|
||||
# This is another line of previous item. Append to that item.
|
||||
items[-1] = '%s\n%s' % (items[-1], line)
|
||||
return items
|
||||
|
||||
|
||||
class UListProcessor(OListProcessor):
|
||||
""" Process unordered list blocks. """
|
||||
|
||||
TAG = 'ul'
|
||||
RE = re.compile(r'^[ ]{0,3}[*+-][ ](.*)')
|
||||
|
||||
|
||||
class HashHeaderProcessor(BlockProcessor):
|
||||
""" Process Hash Headers. """
|
||||
|
||||
# Detect a header at start of any line in block
|
||||
RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)')
|
||||
|
||||
def test(self, parent, block):
|
||||
return bool(self.RE.search(block))
|
||||
|
||||
def run(self, parent, blocks):
|
||||
block = blocks.pop(0)
|
||||
m = self.RE.search(block)
|
||||
if m:
|
||||
before = block[:m.start()] # All lines before header
|
||||
after = block[m.end():] # All lines after header
|
||||
if before:
|
||||
# As the header was not the first line of the block and the
|
||||
# lines before the header must be parsed first,
|
||||
# recursively parse this lines as a block.
|
||||
self.parser.parseBlocks(parent, [before])
|
||||
# Create header using named groups from RE
|
||||
h = markdown.etree.SubElement(parent, 'h%d' % len(m.group('level')))
|
||||
h.text = m.group('header').strip()
|
||||
if after:
|
||||
# Insert remaining lines as first block for future parsing.
|
||||
blocks.insert(0, after)
|
||||
else:
|
||||
# This should never happen, but just in case...
|
||||
message(CRITICAL, "We've got a problem header!")
|
||||
|
||||
|
||||
class SetextHeaderProcessor(BlockProcessor):
|
||||
""" Process Setext-style Headers. """
|
||||
|
||||
# Detect Setext-style header. Must be first 2 lines of block.
|
||||
RE = re.compile(r'^.*?\n[=-]{3,}', re.MULTILINE)
|
||||
|
||||
def test(self, parent, block):
|
||||
return bool(self.RE.match(block))
|
||||
|
||||
def run(self, parent, blocks):
|
||||
lines = blocks.pop(0).split('\n')
|
||||
# Determine level. ``=`` is 1 and ``-`` is 2.
|
||||
if lines[1].startswith('='):
|
||||
level = 1
|
||||
else:
|
||||
level = 2
|
||||
h = markdown.etree.SubElement(parent, 'h%d' % level)
|
||||
h.text = lines[0].strip()
|
||||
if len(lines) > 2:
|
||||
# Block contains additional lines. Add to master blocks for later.
|
||||
blocks.insert(0, '\n'.join(lines[2:]))
|
||||
|
||||
|
||||
class HRProcessor(BlockProcessor):
|
||||
""" Process Horizontal Rules. """
|
||||
|
||||
RE = r'[ ]{0,3}(?P<ch>[*_-])[ ]?((?P=ch)[ ]?){2,}[ ]*'
|
||||
# Detect hr on any line of a block.
|
||||
SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE)
|
||||
# Match a hr on a single line of text.
|
||||
MATCH_RE = re.compile(r'^%s$' % RE)
|
||||
|
||||
def test(self, parent, block):
|
||||
return bool(self.SEARCH_RE.search(block))
|
||||
|
||||
def run(self, parent, blocks):
|
||||
lines = blocks.pop(0).split('\n')
|
||||
prelines = []
|
||||
# Check for lines in block before hr.
|
||||
for line in lines:
|
||||
m = self.MATCH_RE.match(line)
|
||||
if m:
|
||||
break
|
||||
else:
|
||||
prelines.append(line)
|
||||
if len(prelines):
|
||||
# Recursively parse lines before hr so they get parsed first.
|
||||
self.parser.parseBlocks(parent, ['\n'.join(prelines)])
|
||||
# create hr
|
||||
hr = markdown.etree.SubElement(parent, 'hr')
|
||||
# check for lines in block after hr.
|
||||
lines = lines[len(prelines)+1:]
|
||||
if len(lines):
|
||||
# Add lines after hr to master blocks for later parsing.
|
||||
blocks.insert(0, '\n'.join(lines))
|
||||
|
||||
|
||||
class EmptyBlockProcessor(BlockProcessor):
|
||||
""" Process blocks and start with an empty line. """
|
||||
|
||||
# Detect a block that only contains whitespace
|
||||
# or only whitespace on the first line.
|
||||
RE = re.compile(r'^\s*\n')
|
||||
|
||||
def test(self, parent, block):
|
||||
return bool(self.RE.match(block))
|
||||
|
||||
def run(self, parent, blocks):
|
||||
block = blocks.pop(0)
|
||||
m = self.RE.match(block)
|
||||
if m:
|
||||
# Add remaining line to master blocks for later.
|
||||
blocks.insert(0, block[m.end():])
|
||||
sibling = self.lastChild(parent)
|
||||
if sibling and sibling.tag == 'pre' and sibling[0] and \
|
||||
sibling[0].tag == 'code':
|
||||
# Last block is a codeblock. Append to preserve whitespace.
|
||||
sibling[0].text = markdown.AtomicString('%s/n/n/n' % sibling[0].text )
|
||||
|
||||
|
||||
class ParagraphProcessor(BlockProcessor):
|
||||
""" Process Paragraph blocks. """
|
||||
|
||||
def test(self, parent, block):
|
||||
return True
|
||||
|
||||
def run(self, parent, blocks):
|
||||
block = blocks.pop(0)
|
||||
if block.strip():
|
||||
# Not a blank block. Add to parent, otherwise throw it away.
|
||||
if self.parser.state.isstate('list'):
|
||||
# The parent is a tight-list. Append to parent.text
|
||||
if parent.text:
|
||||
parent.text = '%s\n%s' % (parent.text, block)
|
||||
else:
|
||||
parent.text = block.lstrip()
|
||||
else:
|
||||
# Create a regular paragraph
|
||||
p = markdown.etree.SubElement(parent, 'p')
|
||||
p.text = block.lstrip()
|
||||
Reference in New Issue
Block a user