a violet pig being the logo of this site

docbook_build.pyΒΆ

Download this file

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2009, 2010 Sebastian Wiesner <lunaryorn@googlemail.com>

# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.


import os
import sys
from optparse import OptionParser
from functools import partial

from lxml import etree
from lxml.html import fragment_fromstring
from pygments import lex, highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import get_lexer_by_name
from pygments.token import Token


__version__ = '0.1.0'


XSLTHL_TAGS = {
    Token.Keyword: 'keyword',
    Token.String: 'string',
    Token.Number: 'number',
    Token.Comment: 'comment',
    Token.Comment.Special: 'doccomment',
    Token.Comment.Preproc: 'directive',
    Token.Name.Decorator: 'annotation',
    Token.Name.Tag: 'tag',
    Token.Name.Attribute: 'attribute',
    }


def xslthl_highlight(context, language, code, config):
    """
    Highlight the given ``code`` in the given ``language``.  ``context`` is
    the XPath context, in which this function was applied.  ``config`` is
    ignored.

    Return a list of xslthl xml nodes and text nodes containing the
    tokenized source code.
    """
    namespace = 'http://xslthl.sf.net'
    XslthlName = partial(etree.QName, namespace)
    # necessary, because lxml somehow fails to correctly pass code (reason
    # is unknown to me)
    if not code:
        code = context.context_node.xpath('.//text()')
    lexer = get_lexer_by_name(language[0].lower())
    root = etree.Element(XslthlName('xslthl'), nsmap={'xslthl': namespace})
    text = []
    for token, value in lex(''.join(code), lexer):
        # walk up the token hierarchy until the token type maps to a xslthl
        # tag
        while token and token not in XSLTHL_TAGS:
            token = token.parent
        # create a simple text node, if the token doesn't map to any tag
        if not token:
            text.append(value)
        else:
            if not len(root):
                root.text = ''.join(text)
            else:
                root[-1].tail = ''.join(text)
            text = []
            el = etree.SubElement(root, XslthlName(XSLTHL_TAGS[token]))
            el.text = value
    else:
        root[-1].tail = ''.join(text)
    return root


def html_highlight(context, language, code, config):
    """
    Highlight the given ``code`` in the given ``language``.  ``context`` is
    the XPath context, in which this function was applied.  ``config`` is
    ignored.

    Return a list of HTML nodes containined the highlighted code.
    """
    # necessary, because lxml somehow fails to correctly pass code (reason
    # is unknown to me)
    if not code:
        code = context.context_node.xpath('.//text()')
    lexer = get_lexer_by_name(language[0].lower())
    html = highlight(code[0], lexer, HtmlFormatter(nowrap=True))
    highlight_div = fragment_fromstring(html, create_parent=True)
    highlight_div.set('class', 'pygments_highlight')
    return [highlight_div]


def apply_xslt(stylesheet, docbook_document, base_directory=None,
               highlight_function=xslthl_highlight):
    """
    Transform ``docbook_document`` using the given ``stylesheet``.  Both
    must be lxml element trees.

    ``base_directory`` is the output directory for the files generated by
    the transformation process.  It defaults to the current working
    directory.

    ``highlight_function`` is used as DocBook syntax highlighting
    implementation.  Defaults to :func:`xslthl_highlight`.

    Return the error log of the transformation.
    """
    if not base_directory:
        base_directory = os.getcwd()
    if not base_directory.endswith(os.sep):
        base_directory += os.sep

    # register extension function for highlighting
    xhl = etree.FunctionNamespace('http://net.sf.xslthl/ConnectorXalan')
    xhl.prefix = 'xhl'
    xhl['highlight'] = highlight_function

    # perform transformation
    transform = etree.XSLT(stylesheet)
    params = {'base.dir': etree.XSLT.strparam(base_directory)}
    transform(docbook_document, **params)
    return transform.error_log


def print_errors(errors):
    for error in errors:
        if error.type == etree.ErrorTypes.ERR_OK:
            # success, so just print the message
            tmpl = '{0.message}'
        else:
            # print filename and columns
            tmpl = ('{0.level_name}:{0.filename}:{0.line},{0.column}: '
                    '{0.message} ({0.type_name})')
        print(tmpl.format(error))


def main():
    parser = OptionParser(
        usage='%prog [-b BASE_DIRECTORY] [-v] [--html] XSLT XML',
        description="Builds documentation",
        epilog="""\
Copyright (c) 2009 Sebastian Wiesner,
licensed under terms of MIT/X11 license""",
        version=__version__)
    parser.add_option('-b', '--base-directory', help='Base directory')
    parser.add_option('--html', action='store_true',
                      help='Create html output.  This uses a better '
                      'highlight function, but only works with the HTML '
                      'stylesheets')
    parser.add_option('-v', '--validate', action='store_true',
                      help='Validate the docbook document before '
                      'transformation')
    parser.add_option('--rng-schema', metavar='URL',
                      help='The schema to be used for validation.')
    parser.set_defaults(
        base_directory=os.getcwd(),
        rng_schema='http://www.docbook.org/xml/5.0/rng/docbookxi.rng')
    opts, args = parser.parse_args()
    if len(args) < 2:
        parser.error('missing arguments')
    elif len(args) > 2:
        parser.error('too many arguments')
    xslt_file, xml_file = args
    highlight_function = (html_highlight if opts.html else xslthl_highlight)

    docbook_document = etree.parse(xml_file)
    docbook_document.xinclude()
    if opts.validate:
        schema = etree.RelaxNG(file=opts.rng_schema)
        if not schema.validate(docbook_document):
            print_errors(schema.error_log)
            return 1
    stylesheet = etree.parse(xslt_file)
    print_errors(
        apply_xslt(stylesheet, docbook_document,
                   base_directory=opts.base_directory,
                   highlight_function=highlight_function))


if __name__ == '__main__':
    try:
        sys.exit(main())
    except KeyboardInterrupt:
        pass

Previous topic

Downloads

Next topic

kcfg.rnc

This Page