sphinx-to-rst.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import print_function, unicode_literals
  4. import codecs
  5. import os
  6. import re
  7. import sys
  8. from collections import Callable
  9. from functools import partial
  10. SAY = partial(print, file=sys.stderr)
  11. dirname = ''
  12. RE_CODE_BLOCK = re.compile(r'(\s*).. code-block:: (.+?)\s*$')
  13. RE_INCLUDE = re.compile(r'\s*.. include:: (.+?)\s*$')
  14. RE_REFERENCE = re.compile(r':(\w+):`(.+?)`')
  15. RE_NAMED_REF = re.compile('(.+?)\<(.+)\>')
  16. UNITABLE = {
  17. '…': '...',
  18. '“': '"',
  19. '”': '"',
  20. }
  21. X = re.compile(re.escape('…'))
  22. HEADER = re.compile('^[\=\~\-]+$')
  23. UNIRE = re.compile('|'.join(re.escape(p) for p in UNITABLE),
  24. re.UNICODE)
  25. REFBASE = 'http://docs.celeryproject.org/en/latest'
  26. REFS = {
  27. 'mailing-list':
  28. 'http://groups.google.com/group/celery-users',
  29. 'irc-channel': 'getting-started/resources.html#irc',
  30. 'breakpoint-signal': 'tutorials/debugging.html',
  31. 'internals-guide': 'internals/guide.html',
  32. 'bundles': 'getting-started/introduction.html#bundles',
  33. 'reporting-bugs': 'contributing.html#reporting-bugs',
  34. }
  35. pending_refs = {}
  36. def _replace_handler(match, key=UNITABLE.__getitem__):
  37. return key(match.group(0))
  38. def include_file(lines, pos, match):
  39. global dirname
  40. orig_filename = match.groups()[0]
  41. filename = os.path.join(dirname, orig_filename)
  42. fh = codecs.open(filename, encoding='utf-8')
  43. try:
  44. old_dirname = dirname
  45. dirname = os.path.dirname(orig_filename)
  46. try:
  47. lines[pos] = sphinx_to_rst(fh)
  48. finally:
  49. dirname = old_dirname
  50. finally:
  51. fh.close()
  52. def asciify(lines):
  53. prev_diff = None
  54. for line in lines:
  55. new_line = UNIRE.sub(_replace_handler, line)
  56. if prev_diff and HEADER.match(new_line):
  57. new_line = ''.join([
  58. new_line.rstrip(), new_line[0] * prev_diff, '\n'])
  59. prev_diff = len(new_line) - len(line)
  60. yield new_line.encode('ascii')
  61. def replace_code_block(lines, pos, match):
  62. lines[pos] = ''
  63. curpos = pos - 1
  64. # Find the first previous line with text to append "::" to it.
  65. while True:
  66. prev_line = lines[curpos]
  67. if not prev_line.isspace():
  68. prev_line_with_text = curpos
  69. break
  70. curpos -= 1
  71. if lines[prev_line_with_text].endswith(':'):
  72. lines[prev_line_with_text] += ':'
  73. else:
  74. lines[prev_line_with_text] += match.group(1) + '::'
  75. def _deref_default(target):
  76. return r'``{0}``'.format(target)
  77. def _deref_ref(target):
  78. m = RE_NAMED_REF.match(target)
  79. if m:
  80. text, target = m.group(1).strip(), m.group(2).strip()
  81. else:
  82. text = target
  83. try:
  84. url = REFS[target]
  85. except KeyError:
  86. return _deref_default(target)
  87. if '://' not in url:
  88. url = '/'.join([REFBASE, url])
  89. pending_refs[text] = url
  90. return r'`{0}`_'.format(text)
  91. DEREF = {'ref': _deref_ref}
  92. def _deref(match):
  93. return DEREF.get(match.group(1), _deref_default)(match.group(2))
  94. def deref_all(line):
  95. return RE_REFERENCE.subn(_deref, line)[0]
  96. def resolve_ref(name, url):
  97. return '\n.. _`{0}`: {1}\n'.format(name, url)
  98. def resolve_pending_refs(lines):
  99. for line in lines:
  100. yield line
  101. for name, url in pending_refs.items():
  102. yield resolve_ref(name, url)
  103. TO_RST_MAP = {RE_CODE_BLOCK: replace_code_block,
  104. RE_INCLUDE: include_file}
  105. def _process(lines, encoding='utf-8'):
  106. lines = list(lines) # non-destructive
  107. for i, line in enumerate(lines):
  108. for regex, alt in TO_RST_MAP.items():
  109. if isinstance(alt, Callable):
  110. match = regex.match(line)
  111. if match:
  112. alt(lines, i, match)
  113. line = lines[i]
  114. else:
  115. lines[i] = regex.sub(alt, line)
  116. lines[i] = deref_all(lines[i])
  117. if encoding == 'ascii':
  118. lines = asciify(lines)
  119. return resolve_pending_refs(lines)
  120. def sphinx_to_rst(fh, encoding='utf-8'):
  121. return ''.join(_process(fh, encoding))
  122. if __name__ == '__main__':
  123. global dirname
  124. dirname = os.path.dirname(sys.argv[1])
  125. encoding = 'ascii' if '--ascii' in sys.argv else 'utf-8'
  126. fh = codecs.open(sys.argv[1], encoding='utf-8')
  127. try:
  128. print(sphinx_to_rst(fh, encoding).encode('utf-8'))
  129. finally:
  130. fh.close()