123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- from __future__ import print_function, unicode_literals
- import codecs
- import os
- import re
- import sys
- from collections import Callable
- from functools import partial
- SAY = partial(print, file=sys.stderr)
- dirname = ''
- RE_CODE_BLOCK = re.compile(r'(\s*).. code-block:: (.+?)\s*$')
- RE_INCLUDE = re.compile(r'\s*.. include:: (.+?)\s*$')
- RE_REFERENCE = re.compile(r':(\w+):`(.+?)`')
- RE_NAMED_REF = re.compile('(.+?)\<(.+)\>')
- UNITABLE = {
- '…': '...',
- '“': '"',
- '”': '"',
- }
- X = re.compile(re.escape('…'))
- HEADER = re.compile('^[\=\~\-]+$')
- UNIRE = re.compile('|'.join(re.escape(p) for p in UNITABLE),
- re.UNICODE)
- REFBASE = 'http://docs.celeryproject.org/en/latest'
- REFS = {
- 'mailing-list':
- 'http://groups.google.com/group/celery-users',
- 'irc-channel': 'getting-started/resources.html#irc',
- 'breakpoint-signal': 'tutorials/debugging.html',
- 'internals-guide': 'internals/guide.html',
- 'bundles': 'getting-started/introduction.html#bundles',
- 'reporting-bugs': 'contributing.html#reporting-bugs',
- }
- pending_refs = {}
- def _replace_handler(match, key=UNITABLE.__getitem__):
- return key(match.group(0))
- def include_file(lines, pos, match):
- global dirname
- orig_filename = match.groups()[0]
- filename = os.path.join(dirname, orig_filename)
- fh = codecs.open(filename, encoding='utf-8')
- try:
- old_dirname = dirname
- dirname = os.path.dirname(orig_filename)
- try:
- lines[pos] = sphinx_to_rst(fh)
- finally:
- dirname = old_dirname
- finally:
- fh.close()
- def asciify(lines):
- prev_diff = None
- for line in lines:
- new_line = UNIRE.sub(_replace_handler, line)
- if prev_diff and HEADER.match(new_line):
- new_line = ''.join([
- new_line.rstrip(), new_line[0] * prev_diff, '\n'])
- prev_diff = len(new_line) - len(line)
- yield new_line.encode('ascii')
- def replace_code_block(lines, pos, match):
- lines[pos] = ''
- curpos = pos - 1
- # Find the first previous line with text to append "::" to it.
- while True:
- prev_line = lines[curpos]
- if not prev_line.isspace():
- prev_line_with_text = curpos
- break
- curpos -= 1
- if lines[prev_line_with_text].endswith(':'):
- lines[prev_line_with_text] += ':'
- else:
- lines[prev_line_with_text] += match.group(1) + '::'
- def _deref_default(target):
- return r'``{0}``'.format(target)
- def _deref_ref(target):
- m = RE_NAMED_REF.match(target)
- if m:
- text, target = m.group(1).strip(), m.group(2).strip()
- else:
- text = target
- try:
- url = REFS[target]
- except KeyError:
- return _deref_default(target)
- if '://' not in url:
- url = '/'.join([REFBASE, url])
- pending_refs[text] = url
- return r'`{0}`_'.format(text)
- DEREF = {'ref': _deref_ref}
- def _deref(match):
- return DEREF.get(match.group(1), _deref_default)(match.group(2))
- def deref_all(line):
- return RE_REFERENCE.subn(_deref, line)[0]
- def resolve_ref(name, url):
- return '\n.. _`{0}`: {1}\n'.format(name, url)
- def resolve_pending_refs(lines):
- for line in lines:
- yield line
- for name, url in pending_refs.items():
- yield resolve_ref(name, url)
- TO_RST_MAP = {RE_CODE_BLOCK: replace_code_block,
- RE_INCLUDE: include_file}
- def _process(lines, encoding='utf-8'):
- lines = list(lines) # non-destructive
- for i, line in enumerate(lines):
- for regex, alt in TO_RST_MAP.items():
- if isinstance(alt, Callable):
- match = regex.match(line)
- if match:
- alt(lines, i, match)
- line = lines[i]
- else:
- lines[i] = regex.sub(alt, line)
- lines[i] = deref_all(lines[i])
- if encoding == 'ascii':
- lines = asciify(lines)
- return resolve_pending_refs(lines)
- def sphinx_to_rst(fh, encoding='utf-8'):
- return ''.join(_process(fh, encoding))
- if __name__ == '__main__':
- global dirname
- dirname = os.path.dirname(sys.argv[1])
- encoding = 'ascii' if '--ascii' in sys.argv else 'utf-8'
- fh = codecs.open(sys.argv[1], encoding='utf-8')
- try:
- print(sphinx_to_rst(fh, encoding).encode('utf-8'))
- finally:
- fh.close()
|